示例#1
0
/*
 * Convert our list of pools into the definitive set of configurations.  We
 * start by picking the best config for each toplevel vdev.  Once that's done,
 * we assemble the toplevel vdevs into a full config for the pool.  We make a
 * pass to fix up any incorrect paths, and then add it to the main list to
 * return to the user.
 */
static nvlist_t *
get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
{
	pool_entry_t *pe;
	vdev_entry_t *ve;
	config_entry_t *ce;
	nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
	nvlist_t **spares, **l2cache;
	uint_t i, nspares, nl2cache;
	boolean_t config_seen;
	uint64_t best_txg;
	char *name, *hostname = NULL;
	uint64_t guid;
	uint_t children = 0;
	nvlist_t **child = NULL;
	uint_t holes;
	uint64_t *hole_array, max_id;
	uint_t c;
	boolean_t isactive;
	uint64_t hostid;
	nvlist_t *nvl;
	boolean_t valid_top_config = B_FALSE;

	if (nvlist_alloc(&ret, 0, 0) != 0)
		goto nomem;

	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
		uint64_t id, max_txg = 0;

		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
			goto nomem;
		config_seen = B_FALSE;

		/*
		 * Iterate over all toplevel vdevs.  Grab the pool configuration
		 * from the first one we find, and then go through the rest and
		 * add them as necessary to the 'vdevs' member of the config.
		 */
		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {

			/*
			 * Determine the best configuration for this vdev by
			 * selecting the config with the latest transaction
			 * group.
			 */
			best_txg = 0;
			for (ce = ve->ve_configs; ce != NULL;
			    ce = ce->ce_next) {

				if (ce->ce_txg > best_txg) {
					tmp = ce->ce_config;
					best_txg = ce->ce_txg;
				}
			}

			/*
			 * We rely on the fact that the max txg for the
			 * pool will contain the most up-to-date information
			 * about the valid top-levels in the vdev namespace.
			 */
			if (best_txg > max_txg) {
				(void) nvlist_remove(config,
				    ZPOOL_CONFIG_VDEV_CHILDREN,
				    DATA_TYPE_UINT64);
				(void) nvlist_remove(config,
				    ZPOOL_CONFIG_HOLE_ARRAY,
				    DATA_TYPE_UINT64_ARRAY);

				max_txg = best_txg;
				hole_array = NULL;
				holes = 0;
				max_id = 0;
				valid_top_config = B_FALSE;

				if (nvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
					verify(nvlist_add_uint64(config,
					    ZPOOL_CONFIG_VDEV_CHILDREN,
					    max_id) == 0);
					valid_top_config = B_TRUE;
				}

				if (nvlist_lookup_uint64_array(tmp,
				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
				    &holes) == 0) {
					verify(nvlist_add_uint64_array(config,
					    ZPOOL_CONFIG_HOLE_ARRAY,
					    hole_array, holes) == 0);
				}
			}

			if (!config_seen) {
				/*
				 * Copy the relevant pieces of data to the pool
				 * configuration:
				 *
				 *	version
				 *	pool guid
				 *	name
				 *	comment (if available)
				 *	pool state
				 *	hostid (if available)
				 *	hostname (if available)
				 */
				uint64_t state, version;
				char *comment = NULL;

				version = fnvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_VERSION);
				fnvlist_add_uint64(config,
				    ZPOOL_CONFIG_VERSION, version);
				guid = fnvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_POOL_GUID);
				fnvlist_add_uint64(config,
				    ZPOOL_CONFIG_POOL_GUID, guid);
				name = fnvlist_lookup_string(tmp,
				    ZPOOL_CONFIG_POOL_NAME);
				fnvlist_add_string(config,
				    ZPOOL_CONFIG_POOL_NAME, name);

				if (nvlist_lookup_string(tmp,
				    ZPOOL_CONFIG_COMMENT, &comment) == 0)
					fnvlist_add_string(config,
					    ZPOOL_CONFIG_COMMENT, comment);

				state = fnvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_POOL_STATE);
				fnvlist_add_uint64(config,
				    ZPOOL_CONFIG_POOL_STATE, state);

				hostid = 0;
				if (nvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
					fnvlist_add_uint64(config,
					    ZPOOL_CONFIG_HOSTID, hostid);
					hostname = fnvlist_lookup_string(tmp,
					    ZPOOL_CONFIG_HOSTNAME);
					fnvlist_add_string(config,
					    ZPOOL_CONFIG_HOSTNAME, hostname);
				}

				config_seen = B_TRUE;
			}

			/*
			 * Add this top-level vdev to the child array.
			 */
			verify(nvlist_lookup_nvlist(tmp,
			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
			    &id) == 0);

			if (id >= children) {
				nvlist_t **newchild;

				newchild = zfs_alloc(hdl, (id + 1) *
				    sizeof (nvlist_t *));
				if (newchild == NULL)
					goto nomem;

				for (c = 0; c < children; c++)
					newchild[c] = child[c];

				free(child);
				child = newchild;
				children = id + 1;
			}
			if (nvlist_dup(nvtop, &child[id], 0) != 0)
				goto nomem;

		}

		/*
		 * If we have information about all the top-levels then
		 * clean up the nvlist which we've constructed. This
		 * means removing any extraneous devices that are
		 * beyond the valid range or adding devices to the end
		 * of our array which appear to be missing.
		 */
		if (valid_top_config) {
			if (max_id < children) {
				for (c = max_id; c < children; c++)
					nvlist_free(child[c]);
				children = max_id;
			} else if (max_id > children) {
				nvlist_t **newchild;

				newchild = zfs_alloc(hdl, (max_id) *
				    sizeof (nvlist_t *));
				if (newchild == NULL)
					goto nomem;

				for (c = 0; c < children; c++)
					newchild[c] = child[c];

				free(child);
				child = newchild;
				children = max_id;
			}
		}

		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
		    &guid) == 0);

		/*
		 * The vdev namespace may contain holes as a result of
		 * device removal. We must add them back into the vdev
		 * tree before we process any missing devices.
		 */
		if (holes > 0) {
			ASSERT(valid_top_config);

			for (c = 0; c < children; c++) {
				nvlist_t *holey;

				if (child[c] != NULL ||
				    !vdev_is_hole(hole_array, holes, c))
					continue;

				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
				    0) != 0)
					goto nomem;

				/*
				 * Holes in the namespace are treated as
				 * "hole" top-level vdevs and have a
				 * special flag set on them.
				 */
				if (nvlist_add_string(holey,
				    ZPOOL_CONFIG_TYPE,
				    VDEV_TYPE_HOLE) != 0 ||
				    nvlist_add_uint64(holey,
				    ZPOOL_CONFIG_ID, c) != 0 ||
				    nvlist_add_uint64(holey,
				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
					nvlist_free(holey);
					goto nomem;
				}
				child[c] = holey;
			}
		}

		/*
		 * Look for any missing top-level vdevs.  If this is the case,
		 * create a faked up 'missing' vdev as a placeholder.  We cannot
		 * simply compress the child array, because the kernel performs
		 * certain checks to make sure the vdev IDs match their location
		 * in the configuration.
		 */
		for (c = 0; c < children; c++) {
			if (child[c] == NULL) {
				nvlist_t *missing;
				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
				    0) != 0)
					goto nomem;
				if (nvlist_add_string(missing,
				    ZPOOL_CONFIG_TYPE,
				    VDEV_TYPE_MISSING) != 0 ||
				    nvlist_add_uint64(missing,
				    ZPOOL_CONFIG_ID, c) != 0 ||
				    nvlist_add_uint64(missing,
				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
					nvlist_free(missing);
					goto nomem;
				}
				child[c] = missing;
			}
		}

		/*
		 * Put all of this pool's top-level vdevs into a root vdev.
		 */
		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
			goto nomem;
		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
		    VDEV_TYPE_ROOT) != 0 ||
		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
		    child, children) != 0) {
			nvlist_free(nvroot);
			goto nomem;
		}

		for (c = 0; c < children; c++)
			nvlist_free(child[c]);
		free(child);
		children = 0;
		child = NULL;

		/*
		 * Go through and fix up any paths and/or devids based on our
		 * known list of vdev GUID -> path mappings.
		 */
		if (fix_paths(nvroot, pl->names) != 0) {
			nvlist_free(nvroot);
			goto nomem;
		}

		/*
		 * Add the root vdev to this pool's configuration.
		 */
		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
		    nvroot) != 0) {
			nvlist_free(nvroot);
			goto nomem;
		}
		nvlist_free(nvroot);

		/*
		 * zdb uses this path to report on active pools that were
		 * imported or created using -R.
		 */
		if (active_ok)
			goto add_pool;

		/*
		 * Determine if this pool is currently active, in which case we
		 * can't actually import it.
		 */
		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
		    &name) == 0);
		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
		    &guid) == 0);

		if (pool_active(hdl, name, guid, &isactive) != 0)
			goto error;

		if (isactive) {
			nvlist_free(config);
			config = NULL;
			continue;
		}

		if ((nvl = refresh_config(hdl, config)) == NULL) {
			nvlist_free(config);
			config = NULL;
			continue;
		}

		nvlist_free(config);
		config = nvl;

		/*
		 * Go through and update the paths for spares, now that we have
		 * them.
		 */
		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
		    &nvroot) == 0);
		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
		    &spares, &nspares) == 0) {
			for (i = 0; i < nspares; i++) {
				if (fix_paths(spares[i], pl->names) != 0)
					goto nomem;
			}
		}

		/*
		 * Update the paths for l2cache devices.
		 */
		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
		    &l2cache, &nl2cache) == 0) {
			for (i = 0; i < nl2cache; i++) {
				if (fix_paths(l2cache[i], pl->names) != 0)
					goto nomem;
			}
		}

		/*
		 * Restore the original information read from the actual label.
		 */
		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
		    DATA_TYPE_UINT64);
		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
		    DATA_TYPE_STRING);
		if (hostid != 0) {
			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
			    hostid) == 0);
			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
			    hostname) == 0);
		}

add_pool:
		/*
		 * Add this pool to the list of configs.
		 */
		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
		    &name) == 0);
		if (nvlist_add_nvlist(ret, name, config) != 0)
			goto nomem;

		nvlist_free(config);
		config = NULL;
	}

	return (ret);

nomem:
	(void) no_memory(hdl);
error:
	nvlist_free(config);
	nvlist_free(ret);
	for (c = 0; c < children; c++)
		nvlist_free(child[c]);
	free(child);

	return (NULL);
}
示例#2
0
文件: zfs_iter.c 项目: Bingfeng/zfs
/*
 * Sort datasets by specified columns.
 *
 * o  Numeric types sort in ascending order.
 * o  String types sort in alphabetical order.
 * o  Types inappropriate for a row sort that row to the literal
 *    bottom, regardless of the specified ordering.
 *
 * If no sort columns are specified, or two datasets compare equally
 * across all specified columns, they are sorted alphabetically by name
 * with snapshots grouped under their parents.
 */
static int
zfs_sort(const void *larg, const void *rarg, void *data)
{
	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
	zfs_sort_column_t *sc = (zfs_sort_column_t *)data;
	zfs_sort_column_t *psc;

	for (psc = sc; psc != NULL; psc = psc->sc_next) {
		char lbuf[ZFS_MAXPROPLEN], rbuf[ZFS_MAXPROPLEN];
		char *lstr, *rstr;
		uint64_t lnum, rnum;
		boolean_t lvalid, rvalid;
		int ret = 0;

		/*
		 * We group the checks below the generic code.  If 'lstr' and
		 * 'rstr' are non-NULL, then we do a string based comparison.
		 * Otherwise, we compare 'lnum' and 'rnum'.
		 */
		lstr = rstr = NULL;
		if (psc->sc_prop == ZPROP_INVAL) {
			nvlist_t *luser, *ruser;
			nvlist_t *lval, *rval;

			luser = zfs_get_user_props(l);
			ruser = zfs_get_user_props(r);

			lvalid = (nvlist_lookup_nvlist(luser,
			    psc->sc_user_prop, &lval) == 0);
			rvalid = (nvlist_lookup_nvlist(ruser,
			    psc->sc_user_prop, &rval) == 0);

			if (lvalid)
				verify(nvlist_lookup_string(lval,
				    ZPROP_VALUE, &lstr) == 0);
			if (rvalid)
				verify(nvlist_lookup_string(rval,
				    ZPROP_VALUE, &rstr) == 0);
		} else if (psc->sc_prop == ZFS_PROP_NAME) {
			lvalid = rvalid = B_TRUE;

			(void) strlcpy(lbuf, zfs_get_name(l), sizeof (lbuf));
			(void) strlcpy(rbuf, zfs_get_name(r), sizeof (rbuf));

			lstr = lbuf;
			rstr = rbuf;
		} else if (zfs_prop_is_string(psc->sc_prop)) {
			lvalid = (zfs_prop_get(l, psc->sc_prop, lbuf,
			    sizeof (lbuf), NULL, NULL, 0, B_TRUE) == 0);
			rvalid = (zfs_prop_get(r, psc->sc_prop, rbuf,
			    sizeof (rbuf), NULL, NULL, 0, B_TRUE) == 0);

			lstr = lbuf;
			rstr = rbuf;
		} else {
			lvalid = zfs_prop_valid_for_type(psc->sc_prop,
			    zfs_get_type(l));
			rvalid = zfs_prop_valid_for_type(psc->sc_prop,
			    zfs_get_type(r));

			if (lvalid)
				(void) zfs_prop_get_numeric(l, psc->sc_prop,
				    &lnum, NULL, NULL, 0);
			if (rvalid)
				(void) zfs_prop_get_numeric(r, psc->sc_prop,
				    &rnum, NULL, NULL, 0);
		}

		if (!lvalid && !rvalid)
			continue;
		else if (!lvalid)
			return (1);
		else if (!rvalid)
			return (-1);

		if (lstr)
			ret = strcmp(lstr, rstr);
		else if (lnum < rnum)
			ret = -1;
		else if (lnum > rnum)
			ret = 1;

		if (ret != 0) {
			if (psc->sc_reverse == B_TRUE)
				ret = (ret < 0) ? 1 : -1;
			return (ret);
		}
	}

	return (zfs_compare(larg, rarg, NULL));
}
示例#3
0
/*
 * Determines if the pool is in use.  If so, it returns true and the state of
 * the pool as well as the name of the pool.  Both strings are allocated and
 * must be freed by the caller.
 */
int
zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
    boolean_t *inuse)
{
	nvlist_t *config;
	char *name;
	boolean_t ret;
	uint64_t guid, vdev_guid;
	zpool_handle_t *zhp;
	nvlist_t *pool_config;
	uint64_t stateval, isspare;
	aux_cbdata_t cb = { 0 };
	boolean_t isactive;

	*inuse = B_FALSE;

	if (zpool_read_label(fd, &config, NULL) != 0) {
		(void) no_memory(hdl);
		return (-1);
	}

	if (config == NULL)
		return (0);

	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    &stateval) == 0);
	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
	    &vdev_guid) == 0);

	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
		    &name) == 0);
		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
		    &guid) == 0);
	}

	switch (stateval) {
	case POOL_STATE_EXPORTED:
		/*
		 * A pool with an exported state may in fact be imported
		 * read-only, so check the in-core state to see if it's
		 * active and imported read-only.  If it is, set
		 * its state to active.
		 */
		if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
		    (zhp = zpool_open_canfail(hdl, name)) != NULL) {
			if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
				stateval = POOL_STATE_ACTIVE;

			/*
			 * All we needed the zpool handle for is the
			 * readonly prop check.
			 */
			zpool_close(zhp);
		}

		ret = B_TRUE;
		break;

	case POOL_STATE_ACTIVE:
		/*
		 * For an active pool, we have to determine if it's really part
		 * of a currently active pool (in which case the pool will exist
		 * and the guid will be the same), or whether it's part of an
		 * active pool that was disconnected without being explicitly
		 * exported.
		 */
		if (pool_active(hdl, name, guid, &isactive) != 0) {
			nvlist_free(config);
			return (-1);
		}

		if (isactive) {
			/*
			 * Because the device may have been removed while
			 * offlined, we only report it as active if the vdev is
			 * still present in the config.  Otherwise, pretend like
			 * it's not in use.
			 */
			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
			    (pool_config = zpool_get_config(zhp, NULL))
			    != NULL) {
				nvlist_t *nvroot;

				verify(nvlist_lookup_nvlist(pool_config,
				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
				ret = find_guid(nvroot, vdev_guid);
			} else {
				ret = B_FALSE;
			}

			/*
			 * If this is an active spare within another pool, we
			 * treat it like an unused hot spare.  This allows the
			 * user to create a pool with a hot spare that currently
			 * in use within another pool.  Since we return B_TRUE,
			 * libdiskmgt will continue to prevent generic consumers
			 * from using the device.
			 */
			if (ret && nvlist_lookup_uint64(config,
			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
				stateval = POOL_STATE_SPARE;

			if (zhp != NULL)
				zpool_close(zhp);
		} else {
			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
			ret = B_TRUE;
		}
		break;

	case POOL_STATE_SPARE:
		/*
		 * For a hot spare, it can be either definitively in use, or
		 * potentially active.  To determine if it's in use, we iterate
		 * over all pools in the system and search for one with a spare
		 * with a matching guid.
		 *
		 * Due to the shared nature of spares, we don't actually report
		 * the potentially active case as in use.  This means the user
		 * can freely create pools on the hot spares of exported pools,
		 * but to do otherwise makes the resulting code complicated, and
		 * we end up having to deal with this case anyway.
		 */
		cb.cb_zhp = NULL;
		cb.cb_guid = vdev_guid;
		cb.cb_type = ZPOOL_CONFIG_SPARES;
		if (zpool_iter(hdl, find_aux, &cb) == 1) {
			name = (char *)zpool_get_name(cb.cb_zhp);
			ret = B_TRUE;
		} else {
			ret = B_FALSE;
		}
		break;

	case POOL_STATE_L2CACHE:

		/*
		 * Check if any pool is currently using this l2cache device.
		 */
		cb.cb_zhp = NULL;
		cb.cb_guid = vdev_guid;
		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
		if (zpool_iter(hdl, find_aux, &cb) == 1) {
			name = (char *)zpool_get_name(cb.cb_zhp);
			ret = B_TRUE;
		} else {
			ret = B_FALSE;
		}
		break;

	default:
		ret = B_FALSE;
	}


	if (ret) {
		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
			if (cb.cb_zhp)
				zpool_close(cb.cb_zhp);
			nvlist_free(config);
			return (-1);
		}
		*state = (pool_state_t)stateval;
	}

	if (cb.cb_zhp)
		zpool_close(cb.cb_zhp);

	nvlist_free(config);
	*inuse = ret;
	return (0);
}
示例#4
0
/*
 * Linux adds ZFS_IOC_RECV_NEW for resumable streams and preserves the legacy
 * ZFS_IOC_RECV user/kernel interface.  The new interface supports all stream
 * options but is currently only used for resumable streams.  This way updated
 * user space utilities will interoperate with older kernel modules.
 *
 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
 */
static int
recv_impl(const char *snapname, nvlist_t *props, const char *origin,
    boolean_t force, boolean_t resumable, int input_fd,
    const dmu_replay_record_t *begin_record, int cleanup_fd,
    uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
    nvlist_t **errors)
{
	dmu_replay_record_t drr;
	char fsname[MAXPATHLEN];
	char *atp;
	int error;

	ASSERT3S(g_refcount, >, 0);
	VERIFY3S(g_fd, !=, -1);

	/* Set 'fsname' to the name of containing filesystem */
	(void) strlcpy(fsname, snapname, sizeof (fsname));
	atp = strchr(fsname, '@');
	if (atp == NULL)
		return (EINVAL);
	*atp = '\0';

	/* If the fs does not exist, try its parent. */
	if (!lzc_exists(fsname)) {
		char *slashp = strrchr(fsname, '/');
		if (slashp == NULL)
			return (ENOENT);
		*slashp = '\0';
	}

	/*
	 * The begin_record is normally a non-byteswapped BEGIN record.
	 * For resumable streams it may be set to any non-byteswapped
	 * dmu_replay_record_t.
	 */
	if (begin_record == NULL) {
		error = recv_read(input_fd, &drr, sizeof (drr));
		if (error != 0)
			return (error);
	} else {
		drr = *begin_record;
	}

	if (resumable) {
		nvlist_t *outnvl = NULL;
		nvlist_t *innvl = fnvlist_alloc();

		fnvlist_add_string(innvl, "snapname", snapname);

		if (props != NULL)
			fnvlist_add_nvlist(innvl, "props", props);

		if (origin != NULL && strlen(origin))
			fnvlist_add_string(innvl, "origin", origin);

		fnvlist_add_byte_array(innvl, "begin_record",
		    (uchar_t *)&drr, sizeof (drr));

		fnvlist_add_int32(innvl, "input_fd", input_fd);

		if (force)
			fnvlist_add_boolean(innvl, "force");

		if (resumable)
			fnvlist_add_boolean(innvl, "resumable");

		if (cleanup_fd >= 0)
			fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);

		if (action_handle != NULL)
			fnvlist_add_uint64(innvl, "action_handle",
			    *action_handle);

		error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);

		if (error == 0 && read_bytes != NULL)
			error = nvlist_lookup_uint64(outnvl, "read_bytes",
			    read_bytes);

		if (error == 0 && errflags != NULL)
			error = nvlist_lookup_uint64(outnvl, "error_flags",
			    errflags);

		if (error == 0 && action_handle != NULL)
			error = nvlist_lookup_uint64(outnvl, "action_handle",
			    action_handle);

		if (error == 0 && errors != NULL) {
			nvlist_t *nvl;
			error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
			if (error == 0)
				*errors = fnvlist_dup(nvl);
		}

		fnvlist_free(innvl);
		fnvlist_free(outnvl);
	} else {
		zfs_cmd_t zc = {"\0"};
		char *packed = NULL;
		size_t size;

		ASSERT3S(g_refcount, >, 0);

		(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_value));
		(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));

		if (props != NULL) {
			packed = fnvlist_pack(props, &size);
			zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
			zc.zc_nvlist_src_size = size;
		}

		if (origin != NULL)
			(void) strlcpy(zc.zc_string, origin,
			    sizeof (zc.zc_string));

		ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
		zc.zc_begin_record = drr.drr_u.drr_begin;
		zc.zc_guid = force;
		zc.zc_cookie = input_fd;
		zc.zc_cleanup_fd = -1;
		zc.zc_action_handle = 0;

		if (cleanup_fd >= 0)
			zc.zc_cleanup_fd = cleanup_fd;

		if (action_handle != NULL)
			zc.zc_action_handle = *action_handle;

		zc.zc_nvlist_dst_size = 128 * 1024;
		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
		    malloc(zc.zc_nvlist_dst_size);

		error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
		if (error != 0) {
			error = errno;
		} else {
			if (read_bytes != NULL)
				*read_bytes = zc.zc_cookie;

			if (errflags != NULL)
				*errflags = zc.zc_obj;

			if (action_handle != NULL)
				*action_handle = zc.zc_action_handle;

			if (errors != NULL)
				VERIFY0(nvlist_unpack(
				    (void *)(uintptr_t)zc.zc_nvlist_dst,
				    zc.zc_nvlist_dst_size, errors, KM_SLEEP));
		}

		if (packed != NULL)
			fnvlist_pack_free(packed, size);
		free((void *)(uintptr_t)zc.zc_nvlist_dst);
	}

	return (error);
}
示例#5
0
文件: logfix.c 项目: pjjw/logfix
int
main(int argc, char **argv)
{
	int fd_pool;
	int fd_log;
	vdev_label_t vl_pool;
	vdev_label_t vl_log;
	nvlist_t *config_pool;
	nvlist_t *config_log;

	uint64_t guid;		// ZPOOL_CONFIG_GUID
	uint64_t is_log;	// ZPOOL_CONFIG_IS_LOG
	nvlist_t *vdev_tree;	// ZPOOL_CONFIG_VDEV_TREE

	char *buf;
	size_t buflen;

	VERIFY(argc == 4);
	VERIFY((fd_pool = open(argv[1], O_RDWR)) != -1);
	VERIFY((fd_log = open(argv[2], O_RDWR)) != -1);
	VERIFY(sscanf(argv[3], "%" SCNu64 , &guid) == 1);
	//guid = 9851295902337437618ULL;

	VERIFY(pread64(fd_pool, &vl_pool, sizeof (vdev_label_t), 0) ==
	    sizeof (vdev_label_t));
	VERIFY(nvlist_unpack(vl_pool.vl_vdev_phys.vp_nvlist,
	    sizeof (vl_pool.vl_vdev_phys.vp_nvlist), &config_pool, 0) == 0);
	VERIFY(pread64(fd_log, &vl_log, sizeof (vdev_label_t), 0) ==
	    sizeof (vdev_label_t));
	VERIFY(nvlist_unpack(vl_log.vl_vdev_phys.vp_nvlist,
	    sizeof (vl_log.vl_vdev_phys.vp_nvlist), &config_log, 0) == 0);

	// save what we want from config_log -- is_log, vdev_tree
	VERIFY(nvlist_lookup_uint64(config_log, ZPOOL_CONFIG_IS_LOG, &is_log) == 0);
	VERIFY(nvlist_lookup_nvlist(config_log, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) == 0);

	// fix guid for vdev_log
	VERIFY(nvlist_remove_all(vdev_tree, ZPOOL_CONFIG_GUID) == 0);
	VERIFY(nvlist_add_uint64(vdev_tree, ZPOOL_CONFIG_GUID, guid) == 0);

	// remove what we are going to replace on config_pool
	VERIFY(nvlist_remove_all(config_pool, ZPOOL_CONFIG_TOP_GUID) == 0);
	VERIFY(nvlist_remove_all(config_pool, ZPOOL_CONFIG_GUID) == 0);
	VERIFY(nvlist_remove_all(config_pool, ZPOOL_CONFIG_VDEV_TREE) == 0);

	// add back what we want 
	VERIFY(nvlist_add_uint64(config_pool, ZPOOL_CONFIG_TOP_GUID, guid) == 0);
	VERIFY(nvlist_add_uint64(config_pool, ZPOOL_CONFIG_GUID, guid) == 0);
	VERIFY(nvlist_add_uint64(config_pool, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
	VERIFY(nvlist_add_nvlist(config_pool, ZPOOL_CONFIG_VDEV_TREE, vdev_tree) == 0);

	buf = vl_pool.vl_vdev_phys.vp_nvlist;
	buflen = sizeof (vl_pool.vl_vdev_phys.vp_nvlist);
	VERIFY(nvlist_pack(config_pool, &buf, &buflen, NV_ENCODE_XDR, 0) == 0);

	label_write(fd_log, offsetof(vdev_label_t, vl_vdev_phys),
	    VDEV_PHYS_SIZE, &vl_pool.vl_vdev_phys);

	fsync(fd_log);

	return (0);
}
示例#6
0
/*
 * Active pool health status.
 *
 * To determine the status for a pool, we make several passes over the config,
 * picking the most egregious error we find.  In order of importance, we do the
 * following:
 *
 *	- Check for a complete and valid configuration
 *	- Look for any faulted or missing devices in a non-replicated config
 *	- Check for any data errors
 *	- Check for any faulted or missing devices in a replicated config
 *	- Look for any devices showing errors
 *	- Check for any resilvering devices
 *
 * There can obviously be multiple errors within a single pool, so this routine
 * only picks the most damaging of all the current errors to report.
 */
static zpool_status_t
check_status(nvlist_t *config, boolean_t isimport)
{
	nvlist_t *nvroot;
	vdev_stat_t *vs;
	pool_scan_stat_t *ps = NULL;
	uint_t vsc, psc;
	uint64_t nerr;
	uint64_t version;
	uint64_t stateval;
	uint64_t suspended;
	uint64_t hostid = 0;
	unsigned long system_hostid = gethostid() & 0xffffffff;

	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
	    &version) == 0);
	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
	    &nvroot) == 0);
	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
	    (uint64_t **)&vs, &vsc) == 0);
	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    &stateval) == 0);

	/*
	 * Currently resilvering a vdev
	 */
	(void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
	    (uint64_t **)&ps, &psc);
	if (ps && ps->pss_func == POOL_SCAN_RESILVER &&
	    ps->pss_state == DSS_SCANNING)
		return (ZPOOL_STATUS_RESILVERING);

	/*
	 * Pool last accessed by another system.
	 */
	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
	if (hostid != 0 && (unsigned long)hostid != system_hostid &&
	    stateval == POOL_STATE_ACTIVE)
		return (ZPOOL_STATUS_HOSTID_MISMATCH);

	/*
	 * Newer on-disk version.
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
		return (ZPOOL_STATUS_VERSION_NEWER);

	/*
	 * Unsupported feature(s).
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_UNSUP_FEAT) {
		nvlist_t *nvinfo;

		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
		    &nvinfo) == 0);
		if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY))
			return (ZPOOL_STATUS_UNSUP_FEAT_WRITE);
		return (ZPOOL_STATUS_UNSUP_FEAT_READ);
	}

	/*
	 * Check that the config is complete.
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
		return (ZPOOL_STATUS_BAD_GUID_SUM);

	/*
	 * Check whether the pool has suspended due to failed I/O.
	 */
	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
	    &suspended) == 0) {
		if (suspended == ZIO_FAILURE_MODE_CONTINUE)
			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
		return (ZPOOL_STATUS_IO_FAILURE_WAIT);
	}

	/*
	 * Could not read a log.
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
		return (ZPOOL_STATUS_BAD_LOG);
	}

	/*
	 * Bad devices in non-replicated config.
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    find_vdev_problem(nvroot, vdev_faulted))
		return (ZPOOL_STATUS_FAULTED_DEV_NR);

	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    find_vdev_problem(nvroot, vdev_missing))
		return (ZPOOL_STATUS_MISSING_DEV_NR);

	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    find_vdev_problem(nvroot, vdev_broken))
		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);

	/*
	 * Corrupted pool metadata
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
		return (ZPOOL_STATUS_CORRUPT_POOL);

	/*
	 * Persistent data errors.
	 */
	if (!isimport) {
		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
		    &nerr) == 0 && nerr != 0)
			return (ZPOOL_STATUS_CORRUPT_DATA);
	}

	/*
	 * Missing devices in a replicated config.
	 */
	if (find_vdev_problem(nvroot, vdev_faulted))
		return (ZPOOL_STATUS_FAULTED_DEV_R);
	if (find_vdev_problem(nvroot, vdev_missing))
		return (ZPOOL_STATUS_MISSING_DEV_R);
	if (find_vdev_problem(nvroot, vdev_broken))
		return (ZPOOL_STATUS_CORRUPT_LABEL_R);

	/*
	 * Devices with errors
	 */
	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
		return (ZPOOL_STATUS_FAILING_DEV);

	/*
	 * Offlined devices
	 */
	if (find_vdev_problem(nvroot, vdev_offlined))
		return (ZPOOL_STATUS_OFFLINE_DEV);

	/*
	 * Removed device
	 */
	if (find_vdev_problem(nvroot, vdev_removed))
		return (ZPOOL_STATUS_REMOVED_DEV);

	/*
	 * Outdated, but usable, version
	 */
	if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION)
		return (ZPOOL_STATUS_VERSION_OLDER);

	/*
	 * Usable pool with disabled features
	 */
	if (version >= SPA_VERSION_FEATURES) {
		int i;
		nvlist_t *feat;

		if (isimport) {
			feat = fnvlist_lookup_nvlist(config,
			    ZPOOL_CONFIG_LOAD_INFO);
			feat = fnvlist_lookup_nvlist(feat,
			    ZPOOL_CONFIG_ENABLED_FEAT);
		} else {
			feat = fnvlist_lookup_nvlist(config,
			    ZPOOL_CONFIG_FEATURE_STATS);
		}

		for (i = 0; i < SPA_FEATURES; i++) {
			zfeature_info_t *fi = &spa_feature_table[i];
			if (!nvlist_exists(feat, fi->fi_guid))
				return (ZPOOL_STATUS_FEAT_DISABLED);
		}
	}

	return (ZPOOL_STATUS_OK);
}
示例#7
0
/*
 * Active pool health status.
 *
 * To determine the status for a pool, we make several passes over the config,
 * picking the most egregious error we find.  In order of importance, we do the
 * following:
 *
 *	- Check for a complete and valid configuration
 *	- Look for any faulted or missing devices in a non-replicated config
 *	- Check for any data errors
 *	- Check for any faulted or missing devices in a replicated config
 *	- Look for any devices showing errors
 *	- Check for any resilvering devices
 *
 * There can obviously be multiple errors within a single pool, so this routine
 * only picks the most damaging of all the current errors to report.
 */
static zpool_status_t
check_status(zpool_handle_t *zhp, nvlist_t *config, boolean_t isimport)
{
	nvlist_t *nvroot;
	vdev_stat_t *vs;
	uint_t vsc;
	uint64_t nerr;
	uint64_t version;
	uint64_t stateval;
	uint64_t hostid = 0;

	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
	    &version) == 0);
	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
	    &nvroot) == 0);
	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
	    (uint64_t **)&vs, &vsc) == 0);
	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    &stateval) == 0);
	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);

	/*
	 * Pool last accessed by another system.
	 */
	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
	    stateval == POOL_STATE_ACTIVE)
		return (ZPOOL_STATUS_HOSTID_MISMATCH);

	/*
	 * Newer on-disk version.
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
		return (ZPOOL_STATUS_VERSION_NEWER);

	/*
	 * Check that the config is complete.
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
		return (ZPOOL_STATUS_BAD_GUID_SUM);

	/*
	 * Pool has experienced failed I/O.
	 */
	if (stateval == POOL_STATE_IO_FAILURE) {
		zpool_handle_t *tmp_zhp = NULL;
		libzfs_handle_t *hdl = NULL;
		char property[ZPOOL_MAXPROPLEN];
		char *failmode = NULL;

		if (zhp == NULL) {
			char *poolname;

			verify(nvlist_lookup_string(config,
			    ZPOOL_CONFIG_POOL_NAME, &poolname) == 0);
			if ((hdl = libzfs_init()) == NULL)
				return (ZPOOL_STATUS_IO_FAILURE_WAIT);
			tmp_zhp = zpool_open_canfail(hdl, poolname);
			if (tmp_zhp == NULL) {
				libzfs_fini(hdl);
				return (ZPOOL_STATUS_IO_FAILURE_WAIT);
			}
		}
		if (zpool_get_prop(zhp ? zhp : tmp_zhp, ZPOOL_PROP_FAILUREMODE,
		    property, sizeof (property), NULL) == 0)
			failmode = property;
		if (tmp_zhp != NULL)
			zpool_close(tmp_zhp);
		if (hdl != NULL)
			libzfs_fini(hdl);
		if (failmode == NULL)
			return (ZPOOL_STATUS_IO_FAILURE_WAIT);

		if (strncmp(failmode, "continue", strlen("continue")) == 0)
			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
		else
			return (ZPOOL_STATUS_IO_FAILURE_WAIT);
	}

	/*
	 * Could not read a log.
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
		return (ZPOOL_STATUS_BAD_LOG);
	}

	/*
	 * Bad devices in non-replicated config.
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    find_vdev_problem(nvroot, vdev_faulted))
		return (ZPOOL_STATUS_FAULTED_DEV_NR);

	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    find_vdev_problem(nvroot, vdev_missing))
		return (ZPOOL_STATUS_MISSING_DEV_NR);

	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    find_vdev_problem(nvroot, vdev_broken))
		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);

	/*
	 * Corrupted pool metadata
	 */
	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
		return (ZPOOL_STATUS_CORRUPT_POOL);

	/*
	 * Persistent data errors.
	 */
	if (!isimport) {
		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
		    &nerr) == 0 && nerr != 0)
			return (ZPOOL_STATUS_CORRUPT_DATA);
	}

	/*
	 * Missing devices in a replicated config.
	 */
	if (find_vdev_problem(nvroot, vdev_faulted))
		return (ZPOOL_STATUS_FAULTED_DEV_R);
	if (find_vdev_problem(nvroot, vdev_missing))
		return (ZPOOL_STATUS_MISSING_DEV_R);
	if (find_vdev_problem(nvroot, vdev_broken))
		return (ZPOOL_STATUS_CORRUPT_LABEL_R);

	/*
	 * Devices with errors
	 */
	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
		return (ZPOOL_STATUS_FAILING_DEV);

	/*
	 * Offlined devices
	 */
	if (find_vdev_problem(nvroot, vdev_offlined))
		return (ZPOOL_STATUS_OFFLINE_DEV);

	/*
	 * Currently resilvering
	 */
	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
		return (ZPOOL_STATUS_RESILVERING);

	/*
	 * Outdated, but usable, version
	 */
	if (version < SPA_VERSION)
		return (ZPOOL_STATUS_VERSION_OLDER);

	return (ZPOOL_STATUS_OK);
}
示例#8
0
文件: getroot.c 项目: pendor/grub-zfs
static char *
find_root_device_from_libzfs (const char *dir)
{
  char *device = NULL;
  char *poolname;
  char *poolfs;

  grub_find_zpool_from_dir (dir, &poolname, &poolfs);
  if (! poolname)
    return NULL;

  {
    zpool_handle_t *zpool;
    libzfs_handle_t *libzfs;
    nvlist_t *config, *vdev_tree;
    nvlist_t **children, **path;
    unsigned int nvlist_count;
    unsigned int i;

    libzfs = grub_get_libzfs_handle ();
    if (! libzfs)
      return NULL;

    zpool = zpool_open (libzfs, poolname);
    config = zpool_get_config (zpool, NULL);

    if (nvlist_lookup_nvlist (config, "vdev_tree", &vdev_tree) != 0)
      error (1, errno, "nvlist_lookup_nvlist (\"vdev_tree\")");

    if (nvlist_lookup_nvlist_array (vdev_tree, "children", &children, &nvlist_count) != 0)
      error (1, errno, "nvlist_lookup_nvlist_array (\"children\")");
    assert (nvlist_count > 0);

    while (nvlist_lookup_nvlist_array (children[0], "children",
				       &children, &nvlist_count) == 0)
      assert (nvlist_count > 0);

    for (i = 0; i < nvlist_count; i++)
      {
	if (nvlist_lookup_string (children[i], "path", &device) != 0)
	  error (1, errno, "nvlist_lookup_string (\"path\")");

	struct stat st;
	if (stat (device, &st) == 0)
	  {
	    device = xstrdup (device);
	    break;
	  }

	device = NULL;
      }

    zpool_close (zpool);
  }

  free (poolname);
  if (poolfs)
    free (poolfs);

  return device;
}
/*
 * Attach new_disk (fully described by nvroot) to old_disk.
 * If 'replacing' is specified, tne new disk will replace the old one.
 */
int
zpool_vdev_attach(zpool_handle_t *zhp,
    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
{
	zfs_cmd_t zc = { 0 };
	char msg[1024];
	int ret;
	nvlist_t *tgt;
	boolean_t avail_spare;
	uint64_t val;
	char *path;
	nvlist_t **child;
	uint_t children;
	nvlist_t *config_root;
	libzfs_handle_t *hdl = zhp->zpool_hdl;

	if (replacing)
		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
		    "cannot replace %s with %s"), old_disk, new_disk);
	else
		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
		    "cannot attach %s to %s"), new_disk, old_disk);

	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare)) == 0)
		return (zfs_error(hdl, EZFS_NODEVICE, msg));

	if (avail_spare)
		return (zfs_error(hdl, EZFS_ISSPARE, msg));

	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
	zc.zc_cookie = replacing;

	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
	    &child, &children) != 0 || children != 1) {
		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
		    "new device must be a single disk"));
		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
	}

	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);

	/*
	 * If the target is a hot spare that has been swapped in, we can only
	 * replace it with another hot spare.
	 */
	if (replacing &&
	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
	    (zpool_find_vdev(zhp, path, &avail_spare) == NULL ||
	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
		    "can only be replaced by another hot spare"));
		return (zfs_error(hdl, EZFS_BADTARGET, msg));
	}

	/*
	 * If we are attempting to replace a spare, it canot be applied to an
	 * already spared device.
	 */
	if (replacing &&
	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
	    zpool_find_vdev(zhp, path, &avail_spare) != NULL && avail_spare &&
	    is_replacing_spare(config_root, tgt, 0)) {
		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
		    "device has already been replaced with a spare"));
		return (zfs_error(hdl, EZFS_BADTARGET, msg));
	}

	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
		return (-1);

	ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc);

	zcmd_free_nvlists(&zc);

	if (ret == 0)
		return (0);

	switch (errno) {
	case ENOTSUP:
		/*
		 * Can't attach to or replace this type of vdev.
		 */
		if (replacing)
			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
			    "cannot replace a replacing device"));
		else
			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
			    "can only attach to mirrors and top-level "
			    "disks"));
		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
		break;

	case EINVAL:
		/*
		 * The new device must be a single disk.
		 */
		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
		    "new device must be a single disk"));
		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
		break;

	case EBUSY:
		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
		    new_disk);
		(void) zfs_error(hdl, EZFS_BADDEV, msg);
		break;

	case EOVERFLOW:
		/*
		 * The new device is too small.
		 */
		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
		    "device is too small"));
		(void) zfs_error(hdl, EZFS_BADDEV, msg);
		break;

	case EDOM:
		/*
		 * The new device has a different alignment requirement.
		 */
		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
		    "devices have different sector alignment"));
		(void) zfs_error(hdl, EZFS_BADDEV, msg);
		break;

	case ENAMETOOLONG:
		/*
		 * The resulting top-level vdev spec won't fit in the label.
		 */
		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
		break;

	default:
		(void) zpool_standard_error(hdl, errno, msg);
	}

	return (-1);
}
示例#10
0
/*ARGSUSED*/
static int
sw_fmri_nvl2str(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    nvlist_t *nvl, nvlist_t **out)
{
	nvlist_t *object, *site = NULL, *anvl = NULL;
	char *file, *func, *token;
	uint8_t scheme_version;
	char *path, *root;
	nvlist_t *fmristr;
	size_t buflen = 0;
	int linevalid = 0;
	char *buf = NULL;
	ssize_t size = 0;
	char linebuf[32];
	int64_t line;
	int pass;
	int err;

	if (version > TOPO_METH_NVL2STR_VERSION)
		return (topo_mod_seterrno(mod, EMOD_VER_NEW));

	if (nvlist_lookup_uint8(nvl, FM_VERSION, &scheme_version) != 0 ||
	    scheme_version > FM_SW_SCHEME_VERSION)
		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));

	/* Get authority, if present */
	err = nvlist_lookup_nvlist(nvl, FM_FMRI_AUTHORITY, &anvl);
	if (err != 0 && err != ENOENT)
		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));

	/*
	 * The 'object' nvlist is required. It must include the path,
	 * but the root is optional.
	 */
	if (nvlist_lookup_nvlist(nvl, FM_FMRI_SW_OBJ, &object) != 0 ||
	    !lookup_string(object, FM_FMRI_SW_OBJ_PATH, &path, B_TRUE) ||
	    !lookup_string(object, FM_FMRI_SW_OBJ_ROOT, &root, B_FALSE))
		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));

	/* The 'site' nvlist is optional */
	file = func = token = NULL;
	linevalid = 0;
	if ((err = nvlist_lookup_nvlist(nvl, FM_FMRI_SW_SITE, &site)) == 0) {
		/*
		 * Prefer 'token' to file/func/line
		 */
		if (lookup_string(site, FM_FMRI_SW_SITE_TOKEN, &token,
		    B_FALSE) <= 0) {
			/*
			 * If no token then try file, func, line - but
			 * func and line are meaningless without file.
			 */
			if (lookup_string(site, FM_FMRI_SW_SITE_FILE,
			    &file, B_FALSE) == 1) {
				(void) lookup_string(site, FM_FMRI_SW_SITE_FUNC,
				    &func, B_FALSE);
				if (nvlist_lookup_int64(site,
				    FM_FMRI_SW_SITE_LINE, &line) == 0)
					linevalid = 1;
			}
		}
	} else if (err != ENOENT) {
		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));
	}

	/* On the first pass buf is NULL and size and buflen are 0 */
	pass = 1;
again:
	/*
	 * sw://[<authority>]/
	 *	[:root=<object.root]
	 *	:path=<object.path>
	 *	[#<fragment-identifier>]
	 *
	 *	<fragment-identifier> is one of
	 *
	 *		:token=<site.token>
	 *	or
	 *		:file=<site.file>[:func=<site.func>][:line=<site.line>]
	 */

	/* sw:// */
	topo_fmristr_build(&size, buf, buflen, FM_FMRI_SCHEME_SW,
	    NULL, "://");

	/* authority, if any */
	if (anvl != NULL) {
		nvpair_t *apair;
		char *aname, *aval;

		for (apair = nvlist_next_nvpair(anvl, NULL);
		    apair != NULL; apair = nvlist_next_nvpair(anvl, apair)) {
			if (nvpair_type(apair) != DATA_TYPE_STRING ||
			    nvpair_value_string(apair, &aval) != 0)
				continue;
			aname = nvpair_name(apair);
			topo_fmristr_build(&size, buf, buflen, ":", NULL, NULL);
			topo_fmristr_build(&size, buf, buflen, "=",
			    aname, aval);
		}
	}

	/* separating slash */
	topo_fmristr_build(&size, buf, buflen, "/", NULL, NULL);

	/* :root=... */
	if (root) {
		topo_fmristr_build(&size, buf, buflen, root,
		    ":" FM_FMRI_SW_OBJ_ROOT "=", NULL);
	}

	/* :path=... */
	topo_fmristr_build(&size, buf, buflen, path,
	    ":" FM_FMRI_SW_OBJ_PATH "=", NULL);

	if (token) {
		/* #:token=... */
		topo_fmristr_build(&size, buf, buflen, token,
		    "#:" FM_FMRI_SW_SITE_TOKEN "=", NULL);
	} else if (file) {
		/* #:file=... */
		topo_fmristr_build(&size, buf, buflen, file,
		    "#:" FM_FMRI_SW_SITE_FILE "=", NULL);

		/* :func=... */
		if (func) {
			topo_fmristr_build(&size, buf, buflen, func,
			    ":" FM_FMRI_SW_SITE_FUNC "=", NULL);
		}

		/* :line=... */
		if (linevalid) {
			if (pass == 1)
				(void) snprintf(linebuf, sizeof (linebuf),
				    "%lld", line);

			topo_fmristr_build(&size, buf, buflen, linebuf,
			    ":" FM_FMRI_SW_SITE_LINE "=", NULL);
		}
	}

	if (buf == NULL) {
		if ((buf = topo_mod_alloc(mod, size + 1)) == NULL)
			return (topo_mod_seterrno(mod, EMOD_NOMEM));

		buflen = size + 1;
		size = 0;
		pass = 2;
		goto again;
	}

	/*
	 * Construct the nvlist to return as the result.
	 */
	if (topo_mod_nvalloc(mod, &fmristr, NV_UNIQUE_NAME) != 0) {
		topo_mod_strfree(mod, buf);
		return (topo_mod_seterrno(mod, EMOD_NOMEM));
	}

	if (nvlist_add_string(fmristr, "fmri-string", buf) != 0) {
		topo_mod_strfree(mod, buf);
		nvlist_free(fmristr);
		return (topo_mod_seterrno(mod, EMOD_NOMEM));
	}
	topo_mod_strfree(mod, buf);
	*out = fmristr;

	return (0);
}
示例#11
0
/*ARGSUSED*/
static int
sw_fmri_create(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    nvlist_t *in, nvlist_t **out)
{
	nvlist_t *args, *fmri = NULL, *obj = NULL, *site = NULL, *ctxt = NULL;
	topo_mod_errno_t moderr;
	int err = 0;

	char *obj_path, *obj_root;
	nvlist_t *obj_pkg;

	char *site_token, *site_module, *site_file, *site_func;
	int64_t site_line;

	char *ctxt_origin, *ctxt_execname, *ctxt_zone;
	int64_t ctxt_pid, ctxt_ctid;
	char **ctxt_stack;
	uint_t ctxt_stackdepth;


	if (version > TOPO_METH_FMRI_VERSION)
		return (topo_mod_seterrno(mod, EMOD_VER_NEW));

	if (nvlist_lookup_nvlist(in, TOPO_METH_FMRI_ARG_NVL, &args) != 0)
		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));

	if (nvlist_lookup_string(args, "obj_path", &obj_path) != 0)
		return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
	err |= sw_get_optl_string(args, "obj_root", &obj_root);
	err |= sw_get_optl_nvlist(args, "obj-pkg", &obj_pkg);

	err |= sw_get_optl_string(args, "site_token", &site_token);
	err |= sw_get_optl_string(args, "site_module", &site_module);
	err |= sw_get_optl_string(args, "site_file", &site_file);
	err |= sw_get_optl_string(args, "site_func", &site_func);
	err |= sw_get_optl_int64(args, "site_line", &site_line);

	err |= sw_get_optl_string(args, "ctxt_origin", &ctxt_origin);
	err |= sw_get_optl_string(args, "ctxt_execname", &ctxt_execname);
	err |= sw_get_optl_string(args, "ctxt_zone", &ctxt_zone);
	err |= sw_get_optl_int64(args, "ctxt_pid", &ctxt_pid);
	err |= sw_get_optl_int64(args, "ctxt_ctid", &ctxt_ctid);

	if (nvlist_lookup_string_array(args, "stack", &ctxt_stack,
	    &ctxt_stackdepth) != 0) {
		if (errno == ENOENT)
			ctxt_stack = NULL;
		else
			err++;
	}

	if (err)
		(void) topo_mod_seterrno(mod, EMOD_FMRI_NVL);

	if (topo_mod_nvalloc(mod, &fmri, NV_UNIQUE_NAME) != 0 ||
	    topo_mod_nvalloc(mod, &obj, NV_UNIQUE_NAME) != 0) {
		moderr = EMOD_NOMEM;
		goto out;
	}

	/*
	 * Add standard FMRI members 'version' and 'scheme'.
	 */
	err |= nvlist_add_uint8(fmri, FM_VERSION, FM_SW_SCHEME_VERSION);
	err |= nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_SW);

	/*
	 * Build up the 'object' nvlist.
	 */
	err |= nvlist_add_string(obj, FM_FMRI_SW_OBJ_PATH, obj_path);
	err |= sw_add_optl_string(obj, FM_FMRI_SW_OBJ_ROOT, obj_root);
	if (obj_pkg)
		err |= nvlist_add_nvlist(obj, FM_FMRI_SW_OBJ_PKG, obj_pkg);

	/*
	 * Add 'object' to the fmri.
	 */
	if (err == 0)
		err |= nvlist_add_nvlist(fmri, FM_FMRI_SW_OBJ, obj);

	if (err) {
		moderr = EMOD_NOMEM;
		goto out;
	}

	/*
	 * Do we have anything for a 'site' nvlist?
	 */
	if (site_token == NULL && site_module == NULL && site_file == NULL &&
	    site_func == NULL && site_line == -1)
		goto context;

	/*
	 * Allocate and build 'site' nvlist.
	 */
	if (topo_mod_nvalloc(mod, &site, NV_UNIQUE_NAME) != 0) {
		moderr = EMOD_NOMEM;
		goto out;
	}

	err |= sw_add_optl_string(site, FM_FMRI_SW_SITE_TOKEN, site_token);
	err |= sw_add_optl_string(site, FM_FMRI_SW_SITE_MODULE, site_module);
	err |= sw_add_optl_string(site, FM_FMRI_SW_SITE_FILE, site_file);
	err |= sw_add_optl_string(site, FM_FMRI_SW_SITE_FUNC, site_func);
	if ((site_token || site_module || site_file || site_func) &&
	    site_line != -1)
		err |= nvlist_add_int64(site, FM_FMRI_SW_SITE_LINE, site_line);

	/*
	 * Add 'site' to the fmri.
	 */
	if (err == 0)
		err |= nvlist_add_nvlist(fmri, FM_FMRI_SW_SITE, site);

	if (err) {
		moderr = EMOD_NOMEM;
		goto out;
	}

context:
	/*
	 * Do we have anything for a 'context' nvlist?
	 */
	if (ctxt_origin || ctxt_execname || ctxt_zone ||
	    ctxt_pid != -1 || ctxt_ctid != -1 || ctxt_stack != NULL)
		goto out;

	/*
	 * Allocate and build 'context' nvlist.
	 */
	if (topo_mod_nvalloc(mod, &ctxt, NV_UNIQUE_NAME) != 0) {
		moderr = EMOD_NOMEM;
		goto out;
	}

	err |= sw_add_optl_string(ctxt, FM_FMRI_SW_CTXT_ORIGIN, ctxt_origin);
	err |= sw_add_optl_string(ctxt, FM_FMRI_SW_CTXT_EXECNAME,
	    ctxt_execname);
	err |= sw_add_optl_string(ctxt, FM_FMRI_SW_CTXT_ZONE, ctxt_zone);
	if (ctxt_pid != -1)
		err |= nvlist_add_int64(ctxt, FM_FMRI_SW_CTXT_PID, ctxt_pid);
	if (ctxt_ctid != -1)
		err |= nvlist_add_int64(ctxt, FM_FMRI_SW_CTXT_CTID, ctxt_ctid);
	if (ctxt_stack != NULL)
		err |= nvlist_add_string_array(ctxt, FM_FMRI_SW_CTXT_STACK,
		    ctxt_stack, ctxt_stackdepth);

	/*
	 * Add 'context' to the fmri.
	 */
	if (err == 0)
		err |= nvlist_add_nvlist(fmri, FM_FMRI_SW_CTXT, ctxt);

	moderr = err ? EMOD_NOMEM : 0;
out:
	if (moderr == 0)
		*out = fmri;

	if (moderr != 0 && fmri)
		nvlist_free(fmri);

	if (obj)
		nvlist_free(obj);

	if (site)
		nvlist_free(site);

	if (ctxt)
		nvlist_free(ctxt);

	return (moderr == 0 ? 0 : topo_mod_seterrno(mod, moderr));
}
示例#12
0
/*
 * Decodes share information in an nvlist format into a smb_kshare_t
 * structure.
 *
 * This is a temporary function and will be replaced by functions
 * provided by libsharev2 code after it's available.
 */
static smb_kshare_t *
smb_kshare_decode(nvlist_t *share)
{
	smb_kshare_t tmp;
	smb_kshare_t *shr;
	nvlist_t *smb;
	char *csc_name = NULL;
	int rc;

	ASSERT(share);

	bzero(&tmp, sizeof (smb_kshare_t));

	rc = nvlist_lookup_string(share, "name", &tmp.shr_name);
	rc |= nvlist_lookup_string(share, "path", &tmp.shr_path);
	(void) nvlist_lookup_string(share, "desc", &tmp.shr_cmnt);

	ASSERT(tmp.shr_name && tmp.shr_path);

	rc |= nvlist_lookup_nvlist(share, "smb", &smb);
	if (rc != 0) {
		cmn_err(CE_WARN, "kshare: failed looking up SMB properties"
		    " (%d)", rc);
		return (NULL);
	}

	rc = nvlist_lookup_uint32(smb, "type", &tmp.shr_type);
	if (rc != 0) {
		cmn_err(CE_WARN, "kshare[%s]: failed getting the share type"
		    " (%d)", tmp.shr_name, rc);
		return (NULL);
	}

	(void) nvlist_lookup_string(smb, SHOPT_AD_CONTAINER,
	    &tmp.shr_container);
	(void) nvlist_lookup_string(smb, SHOPT_NONE, &tmp.shr_access_none);
	(void) nvlist_lookup_string(smb, SHOPT_RO, &tmp.shr_access_ro);
	(void) nvlist_lookup_string(smb, SHOPT_RW, &tmp.shr_access_rw);

	tmp.shr_flags |= smb_kshare_decode_bool(smb, SHOPT_ABE, SMB_SHRF_ABE);
	tmp.shr_flags |= smb_kshare_decode_bool(smb, SHOPT_CATIA,
	    SMB_SHRF_CATIA);
	tmp.shr_flags |= smb_kshare_decode_bool(smb, SHOPT_GUEST,
	    SMB_SHRF_GUEST_OK);
	tmp.shr_flags |= smb_kshare_decode_bool(smb, SHOPT_DFSROOT,
	    SMB_SHRF_DFSROOT);
	tmp.shr_flags |= smb_kshare_decode_bool(smb, "Autohome",
	    SMB_SHRF_AUTOHOME);

	if ((tmp.shr_flags & SMB_SHRF_AUTOHOME) == SMB_SHRF_AUTOHOME) {
		rc = nvlist_lookup_uint32(smb, "uid", &tmp.shr_uid);
		rc |= nvlist_lookup_uint32(smb, "gid", &tmp.shr_gid);
		if (rc != 0) {
			cmn_err(CE_WARN, "kshare: failed looking up uid/gid"
			    " (%d)", rc);
			return (NULL);
		}
	}

	(void) nvlist_lookup_string(smb, SHOPT_CSC, &csc_name);
	smb_kshare_csc_flags(&tmp, csc_name);

	shr = kmem_cache_alloc(smb_kshare_cache_share, KM_SLEEP);
	bzero(shr, sizeof (smb_kshare_t));

	shr->shr_magic = SMB_SHARE_MAGIC;
	shr->shr_refcnt = 1;

	shr->shr_name = smb_mem_strdup(tmp.shr_name);
	shr->shr_path = smb_mem_strdup(tmp.shr_path);
	if (tmp.shr_cmnt)
		shr->shr_cmnt = smb_mem_strdup(tmp.shr_cmnt);
	if (tmp.shr_container)
		shr->shr_container = smb_mem_strdup(tmp.shr_container);
	if (tmp.shr_access_none)
		shr->shr_access_none = smb_mem_strdup(tmp.shr_access_none);
	if (tmp.shr_access_ro)
		shr->shr_access_ro = smb_mem_strdup(tmp.shr_access_ro);
	if (tmp.shr_access_rw)
		shr->shr_access_rw = smb_mem_strdup(tmp.shr_access_rw);

	shr->shr_oemname = smb_kshare_oemname(shr->shr_name);
	shr->shr_flags = tmp.shr_flags | smb_kshare_is_admin(shr->shr_name);
	shr->shr_type = tmp.shr_type | smb_kshare_is_special(shr->shr_name);

	shr->shr_uid = tmp.shr_uid;
	shr->shr_gid = tmp.shr_gid;

	if ((shr->shr_flags & SMB_SHRF_AUTOHOME) == SMB_SHRF_AUTOHOME)
		shr->shr_autocnt = 1;

	return (shr);
}
示例#13
0
/*
 * Synchronize pool configuration to disk.  This must be called with the
 * namespace lock held. Synchronizing the pool cache is typically done after
 * the configuration has been synced to the MOS. This exposes a window where
 * the MOS config will have been updated but the cache file has not. If
 * the system were to crash at that instant then the cached config may not
 * contain the correct information to open the pool and an explicity import
 * would be required.
 */
void
spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
{
	spa_config_dirent_t *dp, *tdp;
	nvlist_t *nvl;
	boolean_t ccw_failure;
	int error;

	ASSERT(MUTEX_HELD(&spa_namespace_lock));

	if (rootdir == NULL || !(spa_mode_global & FWRITE))
		return;

	/*
	 * Iterate over all cachefiles for the pool, past or present.  When the
	 * cachefile is changed, the new one is pushed onto this list, allowing
	 * us to update previous cachefiles that no longer contain this pool.
	 */
	ccw_failure = B_FALSE;
	for (dp = list_head(&target->spa_config_list); dp != NULL;
	    dp = list_next(&target->spa_config_list, dp)) {
		spa_t *spa = NULL;
		if (dp->scd_path == NULL)
			continue;

		/*
		 * Iterate over all pools, adding any matching pools to 'nvl'.
		 */
		nvl = NULL;
		while ((spa = spa_next(spa)) != NULL) {
			nvlist_t *nvroot = NULL;
			/*
			 * Skip over our own pool if we're about to remove
			 * ourselves from the spa namespace or any pool that
			 * is readonly. Since we cannot guarantee that a
			 * readonly pool would successfully import upon reboot,
			 * we don't allow them to be written to the cache file.
			 */
			if ((spa == target && removing) ||
			    (spa_state(spa) == POOL_STATE_ACTIVE &&
			    !spa_writeable(spa)))
				continue;

			mutex_enter(&spa->spa_props_lock);
			tdp = list_head(&spa->spa_config_list);
			if (spa->spa_config == NULL ||
			    tdp->scd_path == NULL ||
			    strcmp(tdp->scd_path, dp->scd_path) != 0) {
				mutex_exit(&spa->spa_props_lock);
				continue;
			}

			if (nvl == NULL)
				VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
				    KM_SLEEP) == 0);

			VERIFY(nvlist_add_nvlist(nvl, spa->spa_name,
			    spa->spa_config) == 0);
			mutex_exit(&spa->spa_props_lock);

			if (nvlist_lookup_nvlist(nvl, spa->spa_name, &nvroot) == 0)
				spa_config_clean(nvroot);
		}

		error = spa_config_write(dp, nvl);
		if (error != 0)
			ccw_failure = B_TRUE;
		nvlist_free(nvl);
	}

	if (ccw_failure) {
		/*
		 * Keep trying so that configuration data is
		 * written if/when any temporary filesystem
		 * resource issues are resolved.
		 */
		if (target->spa_ccw_fail_time == 0) {
			zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
			    target, NULL, NULL, 0, 0);
		}
		target->spa_ccw_fail_time = gethrtime();
		spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
	} else {
		/*
		 * Do not rate limit future attempts to update
		 * the config cache.
		 */
		target->spa_ccw_fail_time = 0;
	}

	/*
	 * Remove any config entries older than the current one.
	 */
	dp = list_head(&target->spa_config_list);
	while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) {
		list_remove(&target->spa_config_list, tdp);
		if (tdp->scd_path != NULL)
			spa_strfree(tdp->scd_path);
		kmem_free(tdp, sizeof (spa_config_dirent_t));
	}

	spa_config_generation++;

	if (postsysevent)
		spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
}
示例#14
0
/*ARGSUSED*/
int
cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru,
                const char *uuid, boolean_t repair)
{
    cma_page_t *page;
    uint64_t pageaddr;
    const char *action = repair ? "unretire" : "retire";
    int rc;
    nvlist_t *rsrc = NULL, *asrucp = NULL, *hcsp;

    (void) nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc);

    if (nvlist_dup(asru, &asrucp, 0) != 0) {
        fmd_hdl_debug(hdl, "page retire nvlist dup failed\n");
        return (CMA_RA_FAILURE);
    }

    /* It should already be expanded, but we'll do it again anyway */
    if (fmd_nvl_fmri_expand(hdl, asrucp) < 0) {
        fmd_hdl_debug(hdl, "failed to expand page asru\n");
        cma_stats.bad_flts.fmds_value.ui64++;
        nvlist_free(asrucp);
        return (CMA_RA_FAILURE);
    }

    if (!repair && !fmd_nvl_fmri_present(hdl, asrucp)) {
        fmd_hdl_debug(hdl, "page retire overtaken by events\n");
        cma_stats.page_nonent.fmds_value.ui64++;
        nvlist_free(asrucp);
        return (CMA_RA_SUCCESS);
    }

    /* Figure out physaddr from resource or asru */
    if (rsrc == NULL ||
            nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcsp) != 0 ||
            (nvlist_lookup_uint64(hcsp, "asru-" FM_FMRI_HC_SPECIFIC_PHYSADDR,
                                  &pageaddr) != 0 && nvlist_lookup_uint64(hcsp,
                                          FM_FMRI_HC_SPECIFIC_PHYSADDR, &pageaddr) != 0)) {
        if (nvlist_lookup_uint64(asrucp, FM_FMRI_MEM_PHYSADDR,
                                 &pageaddr) != 0) {
            fmd_hdl_debug(hdl, "mem fault missing 'physaddr'\n");
            cma_stats.bad_flts.fmds_value.ui64++;
            nvlist_free(asrucp);
            return (CMA_RA_FAILURE);
        }
    }

    if (repair) {
        if (!cma.cma_page_dounretire) {
            fmd_hdl_debug(hdl, "suppressed unretire of page %llx\n",
                          (u_longlong_t)pageaddr);
            cma_stats.page_supp.fmds_value.ui64++;
            nvlist_free(asrucp);
            return (CMA_RA_SUCCESS);
        }
        /* If unretire via topo fails, we fall back to legacy way */
        if (rsrc == NULL || (rc = fmd_nvl_fmri_unretire(hdl, rsrc)) < 0)
            rc = cma_fmri_page_unretire(hdl, asrucp);
    } else {
        if (!cma.cma_page_doretire) {
            fmd_hdl_debug(hdl, "suppressed retire of page %llx\n",
                          (u_longlong_t)pageaddr);
            cma_stats.page_supp.fmds_value.ui64++;
            nvlist_free(asrucp);
            return (CMA_RA_FAILURE);
        }
        /* If retire via topo fails, we fall back to legacy way */
        if (rsrc == NULL || (rc = fmd_nvl_fmri_retire(hdl, rsrc)) < 0)
            rc = cma_fmri_page_retire(hdl, asrucp);
    }

    if (rc == FMD_AGENT_RETIRE_DONE) {
        fmd_hdl_debug(hdl, "%sd page 0x%llx\n",
                      action, (u_longlong_t)pageaddr);
        if (repair)
            cma_stats.page_repairs.fmds_value.ui64++;
        else
            cma_stats.page_flts.fmds_value.ui64++;
        nvlist_free(asrucp);
        return (CMA_RA_SUCCESS);
    } else if (repair || rc != FMD_AGENT_RETIRE_ASYNC) {
        fmd_hdl_debug(hdl, "%s of page 0x%llx failed, will not "
                      "retry: %s\n", action, (u_longlong_t)pageaddr,
                      strerror(errno));

        cma_stats.page_fails.fmds_value.ui64++;
        nvlist_free(asrucp);
        return (CMA_RA_FAILURE);
    }

    /*
     * The page didn't immediately retire.  We'll need to periodically
     * check to see if it has been retired.
     */
    fmd_hdl_debug(hdl, "page didn't retire - sleeping\n");

    page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP);
    page->pg_addr = pageaddr;
    if (rsrc != NULL)
        (void) nvlist_dup(rsrc, &page->pg_rsrc, 0);
    page->pg_asru = asrucp;
    if (uuid != NULL)
        page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP);

    page->pg_next = cma.cma_pages;
    cma.cma_pages = page;

    if (cma.cma_page_timerid != 0)
        fmd_timer_remove(hdl, cma.cma_page_timerid);

    cma.cma_page_curdelay = cma.cma_page_mindelay;

    cma.cma_page_timerid =
        fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);

    /* Don't free asrucp here.  This FMRI will be needed for retry. */
    return (CMA_RA_FAILURE);
}