Exemple #1
0
/*
 * Enable processor set plugin.
 */
int
pool_pset_enable(void)
{
	int error;
	nvlist_t *props;

	ASSERT(pool_lock_held());
	ASSERT(INGLOBALZONE(curproc));
	/*
	 * Can't enable pools if there are existing cpu partitions.
	 */
	mutex_enter(&cpu_lock);
	if (cp_numparts > 1) {
		mutex_exit(&cpu_lock);
		return (EEXIST);
	}

	/*
	 * We want to switch things such that everything that was tagged with
	 * the special ALL_ZONES token now is explicitly visible to all zones:
	 * first add individual zones to the visibility list then remove the
	 * special "ALL_ZONES" token.  There must only be the default pset
	 * (PS_NONE) active if pools are being enabled, so we only need to
	 * deal with it.
	 *
	 * We want to make pool_pset_enabled() start returning B_TRUE before
	 * we call any of the visibility update functions.
	 */
	global_zone->zone_psetid = PS_NONE;
	/*
	 * We need to explicitly handle the global zone since
	 * zone_pset_set() won't modify it.
	 */
	pool_pset_visibility_add(PS_NONE, global_zone);
	/*
	 * A NULL argument means the ALL_ZONES token.
	 */
	pool_pset_visibility_remove(PS_NONE, NULL);
	error = zone_walk(pool_pset_zone_pset_set, (void *)PS_NONE);
	ASSERT(error == 0);

	/*
	 * It is safe to drop cpu_lock here.  We're still
	 * holding pool_lock so no new cpu partitions can
	 * be created while we're here.
	 */
	mutex_exit(&cpu_lock);
	(void) nvlist_alloc(&pool_pset_default->pset_props,
	    NV_UNIQUE_NAME, KM_SLEEP);
	props = pool_pset_default->pset_props;
	(void) nvlist_add_string(props, "pset.name", "pset_default");
	(void) nvlist_add_string(props, "pset.comment", "");
	(void) nvlist_add_int64(props, "pset.sys_id", PS_NONE);
	(void) nvlist_add_string(props, "pset.units", "population");
	(void) nvlist_add_byte(props, "pset.default", 1);
	(void) nvlist_add_uint64(props, "pset.max", 65536);
	(void) nvlist_add_uint64(props, "pset.min", 1);
	pool_pset_mod = pool_cpu_mod = gethrtime();
	return (0);
}
Exemple #2
0
/*
 * Post the PICL_FRU_ADDED/PICL_FRU_REMOVED event
 */
static void
post_frudr_event(char *ename, picl_nodehdl_t parenth, picl_nodehdl_t fruh)
{
	nvlist_t	*nvl;
	char		*ev_name;

	ev_name = strdup(ename);
	if (ev_name == NULL)
		return;
	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME_TYPE, NULL)) {
		free(ev_name);
		return;
	}
	if (parenth != 0L &&
	    nvlist_add_uint64(nvl, PICLEVENTARG_PARENTHANDLE, parenth)) {
		free(ev_name);
		nvlist_free(nvl);
		return;
	}
	if (fruh != 0L &&
	    nvlist_add_uint64(nvl, PICLEVENTARG_FRUHANDLE, fruh)) {
		free(ev_name);
		nvlist_free(nvl);
		return;
	}
	if (ptree_post_event(ev_name, nvl, sizeof (nvl),
	    frudr_completion_handler) != 0) {
		free(ev_name);
		nvlist_free(nvl);
	}
}
/**
 * Create the vdev leaf for the given path.
 * The function assume that the path is a block device or a file.
 * Log devices and hot spares are not supported
 * @param psz_path: path to the device to use
 * @return the new vdev or NULL in case of error.
 */
nvlist_t *lzwu_make_leaf_vdev(const char *psz_path)
{
        struct stat64 statbuf;
        nvlist_t *p_vdev;
        const char *psz_type;

        if(stat64(psz_path, &statbuf) != 0)
                return NULL;

        if(S_ISBLK(statbuf.st_mode))
                psz_type = VDEV_TYPE_DISK;
        else if(S_ISREG(statbuf.st_mode))
                psz_type = VDEV_TYPE_FILE;
        else
                return NULL;

        nvlist_alloc(&p_vdev, NV_UNIQUE_NAME, 0);
        nvlist_add_string(p_vdev, ZPOOL_CONFIG_PATH, psz_path);
        nvlist_add_string(p_vdev, ZPOOL_CONFIG_TYPE, psz_type);
        nvlist_add_string(p_vdev, ZPOOL_CONFIG_IS_LOG, 0);
        if(!strcmp(psz_type, VDEV_TYPE_DISK))
                nvlist_add_uint64(p_vdev, ZPOOL_CONFIG_WHOLE_DISK, 0);

        return p_vdev;
}
Exemple #4
0
static int
logpage_selftest_analyze(ds_scsi_info_t *sip, scsi_log_parameter_header_t *lphp,
    int log_length)
{
	int i, plen = 0;
	int entries = 0;
	ushort_t param_code;
	scsi_selftest_log_param_t *stp;
	nvlist_t *nvl;

	assert(sip->si_dsp->ds_testfail == NULL);
	if (nvlist_alloc(&sip->si_dsp->ds_testfail, NV_UNIQUE_NAME, 0) != 0)
		return (scsi_set_errno(sip, EDS_NOMEM));
	nvl = sip->si_dsp->ds_testfail;

	for (i = 0; i < log_length; i += plen, entries++) {
		lphp = (scsi_log_parameter_header_t *)((char *)lphp + plen);
		param_code = BE_16(lphp->lph_param);
		stp = (scsi_selftest_log_param_t *)lphp;

		if (param_code >= LOGPAGE_SELFTEST_MIN_PARAM_CODE &&
		    param_code <= LOGPAGE_SELFTEST_MAX_PARAM_CODE &&
		    lphp->lph_length >= LOGPAGE_SELFTEST_PARAM_LEN) {
			/*
			 * We always log the last result, or the result of the
			 * last completed test.
			 */
			if ((param_code == 1 ||
			    SELFTEST_COMPLETE(stp->st_results))) {
				if (nvlist_add_uint8(nvl,
				    FM_EREPORT_PAYLOAD_SCSI_RESULTCODE,
				    stp->st_results) != 0 ||
				    nvlist_add_uint16(nvl,
				    FM_EREPORT_PAYLOAD_SCSI_TIMESTAMP,
				    BE_16(stp->st_timestamp)) != 0 ||
				    nvlist_add_uint8(nvl,
				    FM_EREPORT_PAYLOAD_SCSI_SEGMENT,
				    stp->st_number) != 0 ||
				    nvlist_add_uint64(nvl,
				    FM_EREPORT_PAYLOAD_SCSI_ADDRESS,
				    BE_64(stp->st_lba)) != 0)
					return (scsi_set_errno(sip,
					    EDS_NOMEM));

				if (SELFTEST_COMPLETE(stp->st_results)) {
					if (stp->st_results != SELFTEST_OK)
						sip->si_dsp->ds_faults |=
						    DS_FAULT_TESTFAIL;
					return (0);
				}
			}
		}

		plen = lphp->lph_length +
		    sizeof (scsi_log_parameter_header_t);
	}

	return (0);
}
Exemple #5
0
int
pi_walker_init(topo_mod_t *mod)
{
	int			result;
	pi_enum_functions_t	*fp;
	pi_methods_t		*mp;

	result = topo_mod_nvalloc(mod, &pi_enum_fns, NV_UNIQUE_NAME);
	result |= topo_mod_nvalloc(mod, &pi_meths, NV_UNIQUE_NAME);
	if (result != 0) {
		topo_mod_dprintf(mod, "pi_walker_init failed\n");
		nvlist_free(pi_enum_fns);
		nvlist_free(pi_meths);
		return (-1);
	}

	/* Add the builtin functions to the list */
	fp = pi_enum_fns_builtin;
	while (fp != NULL && fp->hc_name != NULL) {
		uint64_t	faddr;

		faddr = (uint64_t)(uintptr_t)*(fp->func);
		result |= nvlist_add_uint64(pi_enum_fns, fp->hc_name, faddr);
		fp++;
	}

	/* Add the builtin methods to the list */
	mp = pi_meths_builtin;
	while (mp != NULL && mp->hc_name != NULL) {
		uint64_t	maddr;

		maddr = (uint64_t)(uintptr_t)mp->meths;
		result |= nvlist_add_uint64(pi_meths, mp->hc_name, maddr);
		mp++;
	}

	if (result != 0) {
		topo_mod_dprintf(mod, "pi_walker_init failed\n");
		nvlist_free(pi_enum_fns);
		nvlist_free(pi_meths);
		return (-1);
	}

	return (0);
}
static nvlist_t *
inhm_dimm(nhm_dimm_t *nhm_dimm, uint32_t node, uint8_t channel, uint32_t dimm)
{
	nvlist_t *newdimm;
	uint8_t t;
	char sbuf[65];

	(void) nvlist_alloc(&newdimm, NV_UNIQUE_NAME, KM_SLEEP);
	(void) nvlist_add_uint32(newdimm, "dimm-number", dimm);

	if (nhm_dimm->dimm_size >= 1024*1024*1024) {
		(void) snprintf(sbuf, sizeof (sbuf), "%dG",
		    (int)(nhm_dimm->dimm_size / (1024*1024*1024)));
	} else {
		(void) snprintf(sbuf, sizeof (sbuf), "%dM",
		    (int)(nhm_dimm->dimm_size / (1024*1024)));
	}
	(void) nvlist_add_string(newdimm, "dimm-size", sbuf);
	(void) nvlist_add_uint64(newdimm, "size", nhm_dimm->dimm_size);
	(void) nvlist_add_uint32(newdimm, "nbanks", (uint32_t)nhm_dimm->nbanks);
	(void) nvlist_add_uint32(newdimm, "ncolumn",
	    (uint32_t)nhm_dimm->ncolumn);
	(void) nvlist_add_uint32(newdimm, "nrow", (uint32_t)nhm_dimm->nrow);
	(void) nvlist_add_uint32(newdimm, "width", (uint32_t)nhm_dimm->width);
	(void) nvlist_add_uint32(newdimm, "ranks", (uint32_t)nhm_dimm->nranks);
	inhm_rank(newdimm, nhm_dimm, node, channel, dimm,
	    nhm_dimm->dimm_size / nhm_dimm->nranks);
	if (nhm_dimm->manufacturer && nhm_dimm->manufacturer[0]) {
		t = sizeof (nhm_dimm->manufacturer);
		(void) strncpy(sbuf, nhm_dimm->manufacturer, t);
		sbuf[t] = 0;
		(void) nvlist_add_string(newdimm, "manufacturer", sbuf);
	}
	if (nhm_dimm->serial_number && nhm_dimm->serial_number[0]) {
		t = sizeof (nhm_dimm->serial_number);
		(void) strncpy(sbuf, nhm_dimm->serial_number, t);
		sbuf[t] = 0;
		(void) nvlist_add_string(newdimm, FM_FMRI_HC_SERIAL_ID, sbuf);
	}
	if (nhm_dimm->part_number && nhm_dimm->part_number[0]) {
		t = sizeof (nhm_dimm->part_number);
		(void) strncpy(sbuf, nhm_dimm->part_number, t);
		sbuf[t] = 0;
		(void) nvlist_add_string(newdimm, FM_FMRI_HC_PART, sbuf);
	}
	if (nhm_dimm->revision && nhm_dimm->revision[0]) {
		t = sizeof (nhm_dimm->revision);
		(void) strncpy(sbuf, nhm_dimm->revision, t);
		sbuf[t] = 0;
		(void) nvlist_add_string(newdimm, FM_FMRI_HC_REVISION, sbuf);
	}
	t = sizeof (nhm_dimm->label);
	(void) strncpy(sbuf, nhm_dimm->label, t);
	sbuf[t] = 0;
	(void) nvlist_add_string(newdimm, FM_FAULT_FRU_LABEL, sbuf);
	return (newdimm);
}
Exemple #7
0
int
fmd_fmri_expand(nvlist_t *nvl)
{
	uint8_t version;
	uint32_t cpuid;
	uint64_t serialid;
	char *serstr, serbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
	int rc, err;
	topo_hdl_t *thp;

	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
	    nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid) != 0)
		return (fmd_fmri_set_errno(EINVAL));

	/*
	 * If the cpu-scheme topology exports this method expand(), invoke it.
	 */
	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
		return (fmd_fmri_set_errno(EINVAL));

	rc = topo_fmri_expand(thp, nvl, &err);
	fmd_fmri_topo_rele(thp);
	if (err != ETOPO_METHOD_NOTSUP)
		return (rc);

	if (version == CPU_SCHEME_VERSION0) {
		if ((rc = nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID,
		    &serialid)) != 0) {
			if (rc != ENOENT)
				return (fmd_fmri_set_errno(rc));

			if (cpu_get_serialid_V0(cpuid, &serialid) != 0)
				return (-1); /* errno is set for us */

			if ((rc = nvlist_add_uint64(nvl, FM_FMRI_CPU_SERIAL_ID,
			    serialid)) != 0)
				return (fmd_fmri_set_errno(rc));
		}
	} else if (version == CPU_SCHEME_VERSION1) {
		if ((rc = nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID,
		    &serstr)) != 0) {
			if (rc != ENOENT)
				return (fmd_fmri_set_errno(rc));

			if (cpu_get_serialid_V1(cpuid, serbuf, 21) != 0)
				return (0); /* Serial number is optional */

			if ((rc = nvlist_add_string(nvl, FM_FMRI_CPU_SERIAL_ID,
			    serbuf)) != 0)
				return (fmd_fmri_set_errno(rc));
		}
	} else {
		return (fmd_fmri_set_errno(EINVAL));
	}

	return (0);
}
Exemple #8
0
/*
 * Solve a given ZFS case.  This first checks to make sure the diagnosis is
 * still valid, as well as cleaning up any pending timer associated with the
 * case.
 */
static void
zfs_case_solve(fmd_hdl_t *hdl, zfs_case_t *zcp, const char *faultname,
    boolean_t checkunusable)
{
	nvlist_t *detector, *fault;
	boolean_t serialize;
	nvlist_t *fru = NULL;
	fmd_hdl_debug(hdl, "solving fault '%s'", faultname);

	/*
	 * Construct the detector from the case data.  The detector is in the
	 * ZFS scheme, and is either the pool or the vdev, depending on whether
	 * this is a vdev or pool fault.
	 */
	detector = fmd_nvl_alloc(hdl, FMD_SLEEP);

	(void) nvlist_add_uint8(detector, FM_VERSION, ZFS_SCHEME_VERSION0);
	(void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS);
	(void) nvlist_add_uint64(detector, FM_FMRI_ZFS_POOL,
	    zcp->zc_data.zc_pool_guid);
	if (zcp->zc_data.zc_vdev_guid != 0) {
		(void) nvlist_add_uint64(detector, FM_FMRI_ZFS_VDEV,
		    zcp->zc_data.zc_vdev_guid);
	}

	fault = fmd_nvl_create_fault(hdl, faultname, 100, detector,
	    fru, detector);
	fmd_case_add_suspect(hdl, zcp->zc_case, fault);

	nvlist_free(fru);

	fmd_case_solve(hdl, zcp->zc_case);

	serialize = B_FALSE;
	if (zcp->zc_data.zc_has_remove_timer) {
		fmd_timer_remove(hdl, zcp->zc_remove_timer);
		zcp->zc_data.zc_has_remove_timer = 0;
		serialize = B_TRUE;
	}
	if (serialize)
		zfs_case_serialize(hdl, zcp);

	nvlist_free(detector);
}
Exemple #9
0
/*
 * There can be more than one kstat value when we have multi-path drives
 * that are not under mpxio (since there is more than one kstat name for
 * the drive in this case).  So, we may have merge all of the kstat values
 * to give an accurate set of stats for the drive.
 */
static int
update_stat64(nvlist_t *stats, char *attr, uint64_t value)
{
	int64_t	currval;

	if (nvlist_lookup_int64(stats, attr, &currval) == 0) {
	    value += currval;
	}
	return (nvlist_add_uint64(stats, attr, value));
}
Exemple #10
0
static nvlist_t *
dict2nvl(PyObject *d)
{
	nvlist_t *nvl;
	int err;
	PyObject *key, *value;
	int pos = 0;

	if (!PyDict_Check(d)) {
		PyErr_SetObject(PyExc_ValueError, d);
		return (NULL);
	}

	err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
	assert(err == 0);

	while (PyDict_Next(d, &pos, &key, &value)) {
		char *keystr = PyString_AsString(key);
		if (keystr == NULL) {
			PyErr_SetObject(PyExc_KeyError, key);
			nvlist_free(nvl);
			return (NULL);
		}

		if (PyDict_Check(value)) {
			nvlist_t *valnvl = dict2nvl(value);
			err = nvlist_add_nvlist(nvl, keystr, valnvl);
			nvlist_free(valnvl);
		} else if (value == Py_None) {
			err = nvlist_add_boolean(nvl, keystr);
		} else if (PyString_Check(value)) {
			char *valstr = PyString_AsString(value);
			err = nvlist_add_string(nvl, keystr, valstr);
		} else if (PyInt_Check(value)) {
			uint64_t valint = PyInt_AsUnsignedLongLongMask(value);
			err = nvlist_add_uint64(nvl, keystr, valint);
		} else if (PyBool_Check(value)) {
			boolean_t valbool = value == Py_True ? B_TRUE : B_FALSE;
			err = nvlist_add_boolean_value(nvl, keystr, valbool);
		} else {
			PyErr_SetObject(PyExc_ValueError, value);
			nvlist_free(nvl);
			return (NULL);
		}
		assert(err == 0);
	}

	return (nvl);
}
Exemple #11
0
/*
 * Set the named uint64 in the given nvlist_t.
 *
 * @param       attrs
 *              the nvlist_t to search
 *
 * @param       which
 *              the string key for this element in the list
 *
 * @param       val
 *              the value to set
 *
 * @return      0
 *              if successful
 *
 * @return      EINVAL
 *              if there is an invalid argument
 *
 * @return      ENOMEM
 *              if there is insufficient memory
 */
int
set_uint64(
    nvlist_t *attrs,
    char *which,
    uint64_t val)
{
    int error = 0;

    if ((error = nvlist_add_uint64(attrs, which, val)) != 0) {
        volume_set_error(
            gettext("nvlist_add_int64(%s) failed: %d\n"), which, error);
    }

    return (error);
}
static void
inhm_vrank(nvlist_t *vrank, int num, uint64_t dimm_base, uint64_t limit,
    uint32_t sinterleave, uint32_t cinterleave, uint32_t rinterleave,
    uint32_t sway, uint32_t cway, uint32_t rway)
{
	char buf[128];

	(void) snprintf(buf, sizeof (buf), "dimm-rank-base-%d", num);
	(void) nvlist_add_uint64(vrank, buf, dimm_base);
	(void) snprintf(buf, sizeof (buf), "dimm-rank-limit-%d", num);
	(void) nvlist_add_uint64(vrank, buf, dimm_base + limit);
	if (sinterleave > 1) {
		(void) snprintf(buf, sizeof (buf), "dimm-socket-interleave-%d",
		    num);
		(void) nvlist_add_uint32(vrank, buf, sinterleave);
		(void) snprintf(buf, sizeof (buf),
		    "dimm-socket-interleave-way-%d", num);
		(void) nvlist_add_uint32(vrank, buf, sway);
	}
	if (cinterleave > 1) {
		(void) snprintf(buf, sizeof (buf), "dimm-channel-interleave-%d",
		    num);
		(void) nvlist_add_uint32(vrank, buf, cinterleave);
		(void) snprintf(buf, sizeof (buf),
		    "dimm-channel-interleave-way-%d", num);
		(void) nvlist_add_uint32(vrank, buf, cway);
	}
	if (rinterleave > 1) {
		(void) snprintf(buf, sizeof (buf), "dimm-rank-interleave-%d",
		    num);
		(void) nvlist_add_uint32(vrank, buf, rinterleave);
		(void) snprintf(buf, sizeof (buf),
		    "dimm-rank-interleave-way-%d", num);
		(void) nvlist_add_uint32(vrank, buf, rway);
	}
}
Exemple #13
0
/* ARGSUSED */
int
fab_prep_basic_erpt(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *erpt,
    boolean_t isRC)
{
	uint64_t	*now;
	uint64_t	ena;
	uint_t		nelem;
	nvlist_t	*detector, *new_detector;
	char		rcpath[255];
	int		err = 0;

	/* Grab the tod, ena and detector(FMRI) */
	err |= nvlist_lookup_uint64_array(nvl, "__tod", &now, &nelem);
	err |= nvlist_lookup_uint64(nvl, "ena", &ena);
	err |= nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &detector);
	if (err)
		return (err);

	/* Make a copy of the detector */
	err = nvlist_dup(detector, &new_detector, NV_UNIQUE_NAME);
	if (err)
		return (err);

	/* Copy the tod and ena to erpt */
	(void) nvlist_add_uint64(erpt, FM_EREPORT_ENA, ena);
	(void) nvlist_add_uint64_array(erpt, "__tod", now, nelem);

	/*
	 * Create the correct ROOT FMRI from PCIe leaf fabric ereports.	 Used
	 * only by fab_prep_fake_rc_erpt.  See the fab_pciex_fake_rc_erpt_tbl
	 * comments for more information.
	 */
	if (isRC && fab_get_rcpath(hdl, nvl, rcpath)) {
		/* Create the correct PCIe RC new_detector aka FMRI */
		(void) nvlist_remove(new_detector, FM_FMRI_DEV_PATH,
		    DATA_TYPE_STRING);
		(void) nvlist_add_string(new_detector, FM_FMRI_DEV_PATH,
		    rcpath);
	}

	/* Copy the FMRI to erpt */
	(void) nvlist_add_nvlist(erpt, FM_EREPORT_DETECTOR, new_detector);

	nvlist_free(new_detector);
	return (err);
}
Exemple #14
0
/*
 * Validate a proposed value against the iSER and/or iSCSI RFC's minimum and
 * maximum values, and set an alternate, if necessary.  Note that the value
 * 'iser_max_value" represents our implementation maximum (typically the max).
 */
static kv_status_t
iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx,
                      uint64_t min_value, uint64_t max_value, uint64_t iser_max_value,
                      nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
{
    kv_status_t		kvrc;
    int			nvrc;
    boolean_t		respond;

    /* Validate against standard */
    if ((value < min_value) || (value > max_value)) {
        kvrc = KV_VALUE_ERROR;
    } else {
        if (value > iser_max_value) {
            /*
             * Respond back to initiator with our value, and
             * set the return value to unset the transit bit.
             */
            value = iser_max_value;
            kvrc = KV_HANDLED_NO_TRANSIT;
            nvrc = 0;
            respond = B_TRUE;
        } else {
            /* Add this to our negotiated values */
            nvrc = nvlist_add_nvpair(negotiated_nvl, nvp);
            /* Respond if this is not a declarative */
            respond = (ikvx->ik_declarative == B_FALSE);
        }

        /* Response of Simple-value Negotiation */
        if (nvrc == 0 && respond) {
            nvrc = nvlist_add_uint64(response_nvl,
                                     ikvx->ik_key_name, value);
            /* Remove from the request (we've handled it) */
            (void) nvlist_remove_all(request_nvl,
                                     ikvx->ik_key_name);
        }
    }

    if (kvrc == KV_HANDLED_NO_TRANSIT) {
        return (kvrc);
    }

    return (idm_nvstat_to_kvstat(nvrc));
}
Exemple #15
0
/*ARGSUSED*/
static int
label_set(tnode_t *tn, did_t *pd,
    const char *dpnm, const char *tpgrp, const char *tpnm)
{
	topo_mod_t *mp;
	nvlist_t *in, *out;
	char *label;
	int err;

	mp = did_mod(pd);
	/*
	 * If this is a PCIEX_BUS and its parent is a PCIEX_ROOT,
	 * check for a CPUBOARD predecessor.  If found, inherit its
	 * parent's Label.  Otherwise, continue with label set.
	 */
	if ((strcmp(topo_node_name(tn), PCIEX_BUS) == 0) &&
	    (strcmp(topo_node_name(topo_node_parent(tn)), PCIEX_ROOT) == 0)) {

		if (use_predecessor_label(mp, tn, CPUBOARD) == 0)
			return (0);
	}
	if (topo_mod_nvalloc(mp, &in, NV_UNIQUE_NAME) != 0)
		return (topo_mod_seterrno(mp, EMOD_FMRI_NVL));
	if (nvlist_add_uint64(in, TOPO_METH_LABEL_ARG_NVL, (uintptr_t)pd) !=
	    0) {
		nvlist_free(in);
		return (topo_mod_seterrno(mp, EMOD_NOMEM));
	}
	if (topo_method_invoke(tn,
	    TOPO_METH_LABEL, TOPO_METH_LABEL_VERSION, in, &out, &err) != 0) {
		nvlist_free(in);
		return (topo_mod_seterrno(mp, err));
	}
	nvlist_free(in);
	if (out != NULL &&
	    nvlist_lookup_string(out, TOPO_METH_LABEL_RET_STR, &label) == 0) {
		if (topo_prop_set_string(tn, TOPO_PGROUP_PROTOCOL,
		    TOPO_PROP_LABEL, TOPO_PROP_IMMUTABLE, label, &err) != 0) {
			nvlist_free(out);
			return (topo_mod_seterrno(mp, err));
		}
		nvlist_free(out);
	}
	return (0);
}
Exemple #16
0
/*
 * Generate the pool's configuration based on the current in-core state.
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;

	ASSERT(spa_config_held(spa, RW_READER));

	if (vd == NULL)
		vd = rvd;

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    spa_name(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	}

	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	return (config);
}
Exemple #17
0
/* ARGSUSED */
static kv_status_t
iser_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
                        nvlist_t *outgoing_nvl)
{
    kv_status_t		kvrc;
    int			nvrc = 0;
    int			rc;
    uint64_t		uint64_val;

    if ((rc = nvlist_lookup_uint64(config_nvl,
                                   ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, &uint64_val)) != ENOENT) {
        ASSERT(rc == 0);
        if (outgoing_nvl) {
            nvrc = nvlist_add_uint64(outgoing_nvl,
                                     ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, uint64_val);
        }
    }
    kvrc = idm_nvstat_to_kvstat(nvrc);
    return (kvrc);
}
Exemple #18
0
/*
 * Target is the dataset whose pool we want to open.
 */
static void
zhack_import(char *target, boolean_t readonly)
{
	nvlist_t *config;
	nvlist_t *props;
	int error;

	kernel_init(readonly ? FREAD : (FREAD | FWRITE));
	g_zfs = libzfs_init();
	ASSERT(g_zfs != NULL);

	dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);

	g_readonly = readonly;
	g_importargs.unique = B_TRUE;
	g_importargs.can_be_active = readonly;
	g_pool = strdup(target);

	error = zpool_tryimport(g_zfs, target, &config, &g_importargs);
	if (error)
		fatal(NULL, FTAG, "cannot import '%s': %s", target,
		    libzfs_error_description(g_zfs));

	props = NULL;
	if (readonly) {
		VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
		VERIFY(nvlist_add_uint64(props,
		    zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0);
	}

	zfeature_checks_disable = B_TRUE;
	error = spa_import(target, config, props,
	    (readonly ?  ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
	zfeature_checks_disable = B_FALSE;
	if (error == EEXIST)
		error = 0;

	if (error)
		fatal(NULL, FTAG, "can't import '%s': %s", target,
		    strerror(error));
}
Exemple #19
0
/*
 * zfs_init_fs - Initialize the zfsvfs struct and the file system
 *	incore "master" object.  Verify version compatibility.
 */
int
zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr)
{
	extern int zfsfstype;

	objset_t	*os = zfsvfs->z_os;
	int		i, error;
	dmu_object_info_t doi;
	uint64_t fsid_guid;
	uint64_t zval;

	*zpp = NULL;

	/*
	 * XXX - hack to auto-create the pool root filesystem at
	 * the first attempted mount.
	 */
	if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) {
		dmu_tx_t *tx = dmu_tx_create(os);
		uint64_t zpl_version;
		nvlist_t *zprops;

		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */
		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */
		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */
		error = dmu_tx_assign(tx, TXG_WAIT);
		ASSERT3U(error, ==, 0);
		if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
			zpl_version = ZPL_VERSION;
		else
			zpl_version = ZPL_VERSION_FUID - 1;

		VERIFY(nvlist_alloc(&zprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
		VERIFY(nvlist_add_uint64(zprops,
		    zfs_prop_to_name(ZFS_PROP_VERSION), zpl_version) == 0);
		zfs_create_fs(os, cr, zprops, tx);
		nvlist_free(zprops);
		dmu_tx_commit(tx);
	}
Exemple #20
0
/*
 * Get dynamic property for processor sets.
 * The only dynamic property currently implemented is "pset.load".
 */
int
pool_pset_propget(psetid_t psetid, char *name, nvlist_t *nvl)
{
	cpupart_t *cpupart;
	pool_pset_t *pset;
	int ret = ESRCH;

	ASSERT(pool_lock_held());

	mutex_enter(&cpu_lock);
	pset = pool_lookup_pset_by_id(psetid);
	cpupart = cpupart_find(psetid);
	if (cpupart == NULL || pset == NULL) {
		mutex_exit(&cpu_lock);
		return (EINVAL);
	}
	if (strcmp(name, "pset.load") == 0)
		ret = nvlist_add_uint64(nvl, "pset.load",
		    (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0]));
	else
		ret = EINVAL;
	mutex_exit(&cpu_lock);
	return (ret);
}
Exemple #21
0
static
#endif
int
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain,
    dmu_tx_t *tx)
{
	fuid_domain_t searchnode, *findnode;
	avl_index_t loc;

	/*
	 * If the dummy "nobody" domain then return an index of 0
	 * to cause the created FUID to be a standard POSIX id
	 * for the user nobody.
	 */
	if (domain[0] == '\0') {
		*retdomain = "";
		return (0);
	}

	searchnode.f_ksid = ksid_lookupdomain(domain);
	if (retdomain) {
		*retdomain = searchnode.f_ksid->kd_name;
	}
	if (!zfsvfs->z_fuid_loaded)
		zfs_fuid_init(zfsvfs, tx);

	rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
	findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
	rw_exit(&zfsvfs->z_fuid_lock);

	if (findnode) {
		ksiddomain_rele(searchnode.f_ksid);
		return (findnode->f_idx);
	} else {
		fuid_domain_t *domnode;
		nvlist_t *nvp;
		nvlist_t **fuids;
		uint64_t retidx;
		size_t nvsize = 0;
		char *packed;
		dmu_buf_t *db;
		int i = 0;

		domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
		domnode->f_ksid = searchnode.f_ksid;

		rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
		retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;

		avl_add(&zfsvfs->z_fuid_domain, domnode);
		avl_add(&zfsvfs->z_fuid_idx, domnode);
		/*
		 * Now resync the on-disk nvlist.
		 */
		VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

		domnode = avl_first(&zfsvfs->z_fuid_domain);
		fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP);
		while (domnode) {
			VERIFY(nvlist_alloc(&fuids[i],
			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
			VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
			    domnode->f_idx) == 0);
			VERIFY(nvlist_add_uint64(fuids[i],
			    FUID_OFFSET, 0) == 0);
			VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN,
			    domnode->f_ksid->kd_name) == 0);
			domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode);
		}
		VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
		    fuids, retidx) == 0);
		for (i = 0; i != retidx; i++)
			nvlist_free(fuids[i]);
		kmem_free(fuids, retidx * sizeof (void *));
		VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
		packed = kmem_alloc(nvsize, KM_SLEEP);
		VERIFY(nvlist_pack(nvp, &packed, &nvsize,
		    NV_ENCODE_XDR, KM_SLEEP) == 0);
		nvlist_free(nvp);
		zfsvfs->z_fuid_size = nvsize;
		dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
		    zfsvfs->z_fuid_size, packed, tx);
		kmem_free(packed, zfsvfs->z_fuid_size);
		VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
		    FTAG, &db));
		dmu_buf_will_dirty(db, tx);
		*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
		dmu_buf_rele(db, FTAG);

		rw_exit(&zfsvfs->z_fuid_lock);
		return (retidx);
	}
}
Exemple #22
0
/*
 * The device associated with the given vdev (either by devid or physical path)
 * has been added to the system.  If 'isdisk' is set, then we only attempt a
 * replacement if it's a whole disk.  This also implies that we should label the
 * disk first.
 *
 * First, we attempt to online the device (making sure to undo any spare
 * operation when finished).  If this succeeds, then we're done.  If it fails,
 * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
 * but that the label was not what we expected.  If the 'autoreplace' property
 * is enabled, then we relabel the disk (if specified), and attempt a 'zpool
 * replace'.  If the online is successful, but the new state is something else
 * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
 * race, and we should avoid attempting to relabel the disk.
 *
 * Also can arrive here from a ESC_ZFS_VDEV_CHECK event
 */
static void
zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
{
	char *path;
	vdev_state_t newstate;
	nvlist_t *nvroot, *newvd;
	pendingdev_t *device;
	uint64_t wholedisk = 0ULL;
	uint64_t offline = 0ULL;
	uint64_t guid = 0ULL;
	char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
	char rawpath[PATH_MAX], fullpath[PATH_MAX];
	char devpath[PATH_MAX];
	int ret;
	int is_dm = 0;
	int is_sd = 0;
	uint_t c;
	vdev_stat_t *vs;

	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
		return;

	/* Skip healthy disks */
	verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
	    (uint64_t **)&vs, &c) == 0);
	if (vs->vs_state == VDEV_STATE_HEALTHY) {
		zed_log_msg(LOG_INFO, "%s: %s is already healthy, skip it.",
		    __func__, path);
		return;
	}

	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
	    &enc_sysfs_path);
	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid);

	if (offline)
		return;  /* don't intervene if it was taken offline */

	is_dm = zfs_dev_is_dm(path);
	zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
	    " wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path,
	    physpath ? physpath : "NULL", wholedisk, is_dm,
	    (long long unsigned int)guid);

	/*
	 * The VDEV guid is preferred for identification (gets passed in path)
	 */
	if (guid != 0) {
		(void) snprintf(fullpath, sizeof (fullpath), "%llu",
		    (long long unsigned int)guid);
	} else {
		/*
		 * otherwise use path sans partition suffix for whole disks
		 */
		(void) strlcpy(fullpath, path, sizeof (fullpath));
		if (wholedisk) {
			char *spath = zfs_strip_partition(fullpath);
			if (!spath) {
				zed_log_msg(LOG_INFO, "%s: Can't alloc",
				    __func__);
				return;
			}

			(void) strlcpy(fullpath, spath, sizeof (fullpath));
			free(spath);
		}
	}

	/*
	 * Attempt to online the device.
	 */
	if (zpool_vdev_online(zhp, fullpath,
	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
	    (newstate == VDEV_STATE_HEALTHY ||
	    newstate == VDEV_STATE_DEGRADED)) {
		zed_log_msg(LOG_INFO, "  zpool_vdev_online: vdev %s is %s",
		    fullpath, (newstate == VDEV_STATE_HEALTHY) ?
		    "HEALTHY" : "DEGRADED");
		return;
	}

	/*
	 * vdev_id alias rule for using scsi_debug devices (FMA automated
	 * testing)
	 */
	if (physpath != NULL && strcmp("scsidebug", physpath) == 0)
		is_sd = 1;

	/*
	 * If the pool doesn't have the autoreplace property set, then use
	 * vdev online to trigger a FMA fault by posting an ereport.
	 */
	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
	    !(wholedisk || is_dm) || (physpath == NULL)) {
		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
		    &newstate);
		zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or "
		    "not a whole disk for '%s'", fullpath);
		return;
	}

	/*
	 * Convert physical path into its current device node.  Rawpath
	 * needs to be /dev/disk/by-vdev for a scsi_debug device since
	 * /dev/disk/by-path will not be present.
	 */
	(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
	    is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);

	if (realpath(rawpath, devpath) == NULL && !is_dm) {
		zed_log_msg(LOG_INFO, "  realpath: %s failed (%s)",
		    rawpath, strerror(errno));

		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
		    &newstate);

		zed_log_msg(LOG_INFO, "  zpool_vdev_online: %s FORCEFAULT (%s)",
		    fullpath, libzfs_error_description(g_zfshdl));
		return;
	}

	/* Only autoreplace bad disks */
	if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
	    (vs->vs_state != VDEV_STATE_FAULTED) &&
	    (vs->vs_state != VDEV_STATE_CANT_OPEN)) {
		return;
	}

	nvlist_lookup_string(vdev, "new_devid", &new_devid);

	if (is_dm) {
		/* Don't label device mapper or multipath disks. */
	} else if (!labeled) {
		/*
		 * we're auto-replacing a raw disk, so label it first
		 */
		char *leafname;

		/*
		 * If this is a request to label a whole disk, then attempt to
		 * write out the label.  Before we can label the disk, we need
		 * to map the physical string that was matched on to the under
		 * lying device node.
		 *
		 * If any part of this process fails, then do a force online
		 * to trigger a ZFS fault for the device (and any hot spare
		 * replacement).
		 */
		leafname = strrchr(devpath, '/') + 1;

		/*
		 * If this is a request to label a whole disk, then attempt to
		 * write out the label.
		 */
		if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) {
			zed_log_msg(LOG_INFO, "  zpool_label_disk: could not "
			    "label '%s' (%s)", leafname,
			    libzfs_error_description(g_zfshdl));

			(void) zpool_vdev_online(zhp, fullpath,
			    ZFS_ONLINE_FORCEFAULT, &newstate);
			return;
		}

		/*
		 * The disk labeling is asynchronous on Linux. Just record
		 * this label request and return as there will be another
		 * disk add event for the partition after the labeling is
		 * completed.
		 */
		device = malloc(sizeof (pendingdev_t));
		(void) strlcpy(device->pd_physpath, physpath,
		    sizeof (device->pd_physpath));
		list_insert_tail(&g_device_list, device);

		zed_log_msg(LOG_INFO, "  zpool_label_disk: async '%s' (%llu)",
		    leafname, (u_longlong_t)guid);

		return;	/* resumes at EC_DEV_ADD.ESC_DISK for partition */

	} else /* labeled */ {
		boolean_t found = B_FALSE;
		/*
		 * match up with request above to label the disk
		 */
		for (device = list_head(&g_device_list); device != NULL;
		    device = list_next(&g_device_list, device)) {
			if (strcmp(physpath, device->pd_physpath) == 0) {
				list_remove(&g_device_list, device);
				free(device);
				found = B_TRUE;
				break;
			}
			zed_log_msg(LOG_INFO, "zpool_label_disk: %s != %s",
			    physpath, device->pd_physpath);
		}
		if (!found) {
			/* unexpected partition slice encountered */
			zed_log_msg(LOG_INFO, "labeled disk %s unexpected here",
			    fullpath);
			(void) zpool_vdev_online(zhp, fullpath,
			    ZFS_ONLINE_FORCEFAULT, &newstate);
			return;
		}

		zed_log_msg(LOG_INFO, "  zpool_label_disk: resume '%s' (%llu)",
		    physpath, (u_longlong_t)guid);

		(void) snprintf(devpath, sizeof (devpath), "%s%s",
		    DEV_BYID_PATH, new_devid);
	}

	/*
	 * Construct the root vdev to pass to zpool_vdev_attach().  While adding
	 * the entire vdev structure is harmless, we construct a reduced set of
	 * path/physpath/wholedisk to keep it simple.
	 */
	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) {
		zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory");
		return;
	}
	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
		zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory");
		nvlist_free(nvroot);
		return;
	}

	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
	    nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 ||
	    (physpath != NULL && nvlist_add_string(newvd,
	    ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
	    (enc_sysfs_path != NULL && nvlist_add_string(newvd,
	    ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, enc_sysfs_path) != 0) ||
	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
	    1) != 0) {
		zed_log_msg(LOG_WARNING, "zfs_mod: unable to add nvlist pairs");
		nvlist_free(newvd);
		nvlist_free(nvroot);
		return;
	}

	nvlist_free(newvd);

	/*
	 * Wait for udev to verify the links exist, then auto-replace
	 * the leaf disk at same physical location.
	 */
	if (zpool_label_disk_wait(path, 3000) != 0) {
		zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement "
		    "disk %s is missing", path);
		nvlist_free(nvroot);
		return;
	}

	ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);

	zed_log_msg(LOG_INFO, "  zpool_vdev_replace: %s with %s (%s)",
	    fullpath, path, (ret == 0) ? "no errors" :
	    libzfs_error_description(g_zfshdl));

	nvlist_free(nvroot);
}
Exemple #23
0
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
	spa_t		*spa = arg1;
	history_arg_t	*hap = arg2;
	const char	*history_str = hap->ha_history_str;
	objset_t	*mos = spa->spa_meta_objset;
	dmu_buf_t	*dbp;
	spa_history_phys_t *shpp;
	size_t		reclen;
	uint64_t	le_len;
	nvlist_t	*nvrecord;
	char		*record_packed = NULL;
	int		ret;

	/*
	 * If we have an older pool that doesn't have a command
	 * history object, create it now.
	 */
	mutex_enter(&spa->spa_history_lock);
	if (!spa->spa_history)
		spa_history_create_obj(spa, tx);
	mutex_exit(&spa->spa_history_lock);

	/*
	 * Get the offset of where we need to write via the bonus buffer.
	 * Update the offset when the write completes.
	 */
	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
	shpp = dbp->db_data;

	dmu_buf_will_dirty(dbp, tx);

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0);
	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
	    gethrestime_sec()) == 0);
	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
	if (hap->ha_zone != NULL)
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
		    hap->ha_zone) == 0);
#ifdef _KERNEL
	VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
	    utsname.nodename) == 0);
#endif
	if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
	    hap->ha_log_type == LOG_CMD_NORMAL) {
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
		    history_str) == 0);

		zfs_dbgmsg("command: %s", history_str);
	} else {
		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
		    hap->ha_event) == 0);
		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
		    tx->tx_txg) == 0);
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
		    history_str) == 0);

		zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
		    zfs_history_event_names[hap->ha_event], spa_name(spa),
		    (longlong_t)tx->tx_txg, history_str);

	}

	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
	record_packed = kmem_alloc(reclen, KM_SLEEP);

	VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);

	mutex_enter(&spa->spa_history_lock);
	if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
		VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);

	/* write out the packed length as little endian */
	le_len = LE_64((uint64_t)reclen);
	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
	if (!ret)
		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);

	if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
		shpp->sh_pool_create_len += sizeof (le_len) + reclen;
		shpp->sh_bof = shpp->sh_pool_create_len;
	}

	mutex_exit(&spa->spa_history_lock);
	nvlist_free(nvrecord);
	kmem_free(record_packed, reclen);
	dmu_buf_rele(dbp, FTAG);

	strfree(hap->ha_history_str);
	if (hap->ha_zone != NULL)
		strfree(hap->ha_zone);
	kmem_free(hap, sizeof (history_arg_t));
}
Exemple #24
0
/*
 * Convert our list of pools into the definitive set of configurations.  We
 * start by picking the best config for each toplevel vdev.  Once that's done,
 * we assemble the toplevel vdevs into a full config for the pool.  We make a
 * pass to fix up any incorrect paths, and then add it to the main list to
 * return to the user.
 */
static nvlist_t *
get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
    nvlist_t *policy)
{
	pool_entry_t *pe;
	vdev_entry_t *ve;
	config_entry_t *ce;
	nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
	nvlist_t **spares, **l2cache;
	uint_t i, nspares, nl2cache;
	boolean_t config_seen;
	uint64_t best_txg;
	char *name, *hostname = NULL;
	uint64_t guid;
	uint_t children = 0;
	nvlist_t **child = NULL;
	uint_t holes;
	uint64_t *hole_array, max_id;
	uint_t c;
	boolean_t isactive;
	uint64_t hostid;
	nvlist_t *nvl;
	boolean_t valid_top_config = B_FALSE;

	if (nvlist_alloc(&ret, 0, 0) != 0)
		goto nomem;

	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
		uint64_t id, max_txg = 0;

		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
			goto nomem;
		config_seen = B_FALSE;

		/*
		 * Iterate over all toplevel vdevs.  Grab the pool configuration
		 * from the first one we find, and then go through the rest and
		 * add them as necessary to the 'vdevs' member of the config.
		 */
		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {

			/*
			 * Determine the best configuration for this vdev by
			 * selecting the config with the latest transaction
			 * group.
			 */
			best_txg = 0;
			for (ce = ve->ve_configs; ce != NULL;
			    ce = ce->ce_next) {

				if (ce->ce_txg > best_txg) {
					tmp = ce->ce_config;
					best_txg = ce->ce_txg;
				}
			}

			/*
			 * We rely on the fact that the max txg for the
			 * pool will contain the most up-to-date information
			 * about the valid top-levels in the vdev namespace.
			 */
			if (best_txg > max_txg) {
				(void) nvlist_remove(config,
				    ZPOOL_CONFIG_VDEV_CHILDREN,
				    DATA_TYPE_UINT64);
				(void) nvlist_remove(config,
				    ZPOOL_CONFIG_HOLE_ARRAY,
				    DATA_TYPE_UINT64_ARRAY);

				max_txg = best_txg;
				hole_array = NULL;
				holes = 0;
				max_id = 0;
				valid_top_config = B_FALSE;

				if (nvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
					verify(nvlist_add_uint64(config,
					    ZPOOL_CONFIG_VDEV_CHILDREN,
					    max_id) == 0);
					valid_top_config = B_TRUE;
				}

				if (nvlist_lookup_uint64_array(tmp,
				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
				    &holes) == 0) {
					verify(nvlist_add_uint64_array(config,
					    ZPOOL_CONFIG_HOLE_ARRAY,
					    hole_array, holes) == 0);
				}
			}

			if (!config_seen) {
				/*
				 * Copy the relevant pieces of data to the pool
				 * configuration:
				 *
				 *	version
				 *	pool guid
				 *	name
				 *	pool txg (if available)
				 *	comment (if available)
				 *	pool state
				 *	hostid (if available)
				 *	hostname (if available)
				 */
				uint64_t state, version, pool_txg;
				char *comment = NULL;

				version = fnvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_VERSION);
				fnvlist_add_uint64(config,
				    ZPOOL_CONFIG_VERSION, version);
				guid = fnvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_POOL_GUID);
				fnvlist_add_uint64(config,
				    ZPOOL_CONFIG_POOL_GUID, guid);
				name = fnvlist_lookup_string(tmp,
				    ZPOOL_CONFIG_POOL_NAME);
				fnvlist_add_string(config,
				    ZPOOL_CONFIG_POOL_NAME, name);
				if (nvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_POOL_TXG, &pool_txg) == 0)
					fnvlist_add_uint64(config,
					    ZPOOL_CONFIG_POOL_TXG, pool_txg);

				if (nvlist_lookup_string(tmp,
				    ZPOOL_CONFIG_COMMENT, &comment) == 0)
					fnvlist_add_string(config,
					    ZPOOL_CONFIG_COMMENT, comment);

				state = fnvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_POOL_STATE);
				fnvlist_add_uint64(config,
				    ZPOOL_CONFIG_POOL_STATE, state);

				hostid = 0;
				if (nvlist_lookup_uint64(tmp,
				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
					fnvlist_add_uint64(config,
					    ZPOOL_CONFIG_HOSTID, hostid);
					hostname = fnvlist_lookup_string(tmp,
					    ZPOOL_CONFIG_HOSTNAME);
					fnvlist_add_string(config,
					    ZPOOL_CONFIG_HOSTNAME, hostname);
				}

				config_seen = B_TRUE;
			}

			/*
			 * Add this top-level vdev to the child array.
			 */
			verify(nvlist_lookup_nvlist(tmp,
			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
			    &id) == 0);

			if (id >= children) {
				nvlist_t **newchild;

				newchild = zfs_alloc(hdl, (id + 1) *
				    sizeof (nvlist_t *));
				if (newchild == NULL)
					goto nomem;

				for (c = 0; c < children; c++)
					newchild[c] = child[c];

				free(child);
				child = newchild;
				children = id + 1;
			}
			if (nvlist_dup(nvtop, &child[id], 0) != 0)
				goto nomem;

		}

		/*
		 * If we have information about all the top-levels then
		 * clean up the nvlist which we've constructed. This
		 * means removing any extraneous devices that are
		 * beyond the valid range or adding devices to the end
		 * of our array which appear to be missing.
		 */
		if (valid_top_config) {
			if (max_id < children) {
				for (c = max_id; c < children; c++)
					nvlist_free(child[c]);
				children = max_id;
			} else if (max_id > children) {
				nvlist_t **newchild;

				newchild = zfs_alloc(hdl, (max_id) *
				    sizeof (nvlist_t *));
				if (newchild == NULL)
					goto nomem;

				for (c = 0; c < children; c++)
					newchild[c] = child[c];

				free(child);
				child = newchild;
				children = max_id;
			}
		}

		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
		    &guid) == 0);

		/*
		 * The vdev namespace may contain holes as a result of
		 * device removal. We must add them back into the vdev
		 * tree before we process any missing devices.
		 */
		if (holes > 0) {
			ASSERT(valid_top_config);

			for (c = 0; c < children; c++) {
				nvlist_t *holey;

				if (child[c] != NULL ||
				    !vdev_is_hole(hole_array, holes, c))
					continue;

				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
				    0) != 0)
					goto nomem;

				/*
				 * Holes in the namespace are treated as
				 * "hole" top-level vdevs and have a
				 * special flag set on them.
				 */
				if (nvlist_add_string(holey,
				    ZPOOL_CONFIG_TYPE,
				    VDEV_TYPE_HOLE) != 0 ||
				    nvlist_add_uint64(holey,
				    ZPOOL_CONFIG_ID, c) != 0 ||
				    nvlist_add_uint64(holey,
				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
					nvlist_free(holey);
					goto nomem;
				}
				child[c] = holey;
			}
		}

		/*
		 * Look for any missing top-level vdevs.  If this is the case,
		 * create a faked up 'missing' vdev as a placeholder.  We cannot
		 * simply compress the child array, because the kernel performs
		 * certain checks to make sure the vdev IDs match their location
		 * in the configuration.
		 */
		for (c = 0; c < children; c++) {
			if (child[c] == NULL) {
				nvlist_t *missing;
				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
				    0) != 0)
					goto nomem;
				if (nvlist_add_string(missing,
				    ZPOOL_CONFIG_TYPE,
				    VDEV_TYPE_MISSING) != 0 ||
				    nvlist_add_uint64(missing,
				    ZPOOL_CONFIG_ID, c) != 0 ||
				    nvlist_add_uint64(missing,
				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
					nvlist_free(missing);
					goto nomem;
				}
				child[c] = missing;
			}
		}

		/*
		 * Put all of this pool's top-level vdevs into a root vdev.
		 */
		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
			goto nomem;
		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
		    VDEV_TYPE_ROOT) != 0 ||
		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
		    child, children) != 0) {
			nvlist_free(nvroot);
			goto nomem;
		}

		for (c = 0; c < children; c++)
			nvlist_free(child[c]);
		free(child);
		children = 0;
		child = NULL;

		/*
		 * Go through and fix up any paths and/or devids based on our
		 * known list of vdev GUID -> path mappings.
		 */
		if (fix_paths(nvroot, pl->names) != 0) {
			nvlist_free(nvroot);
			goto nomem;
		}

		/*
		 * Add the root vdev to this pool's configuration.
		 */
		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
		    nvroot) != 0) {
			nvlist_free(nvroot);
			goto nomem;
		}
		nvlist_free(nvroot);

		/*
		 * zdb uses this path to report on active pools that were
		 * imported or created using -R.
		 */
		if (active_ok)
			goto add_pool;

		/*
		 * Determine if this pool is currently active, in which case we
		 * can't actually import it.
		 */
		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
		    &name) == 0);
		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
		    &guid) == 0);

		if (pool_active(hdl, name, guid, &isactive) != 0)
			goto error;

		if (isactive) {
			nvlist_free(config);
			config = NULL;
			continue;
		}

		if (policy != NULL) {
			if (nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
			    policy) != 0)
				goto nomem;
		}

		if ((nvl = refresh_config(hdl, config)) == NULL) {
			nvlist_free(config);
			config = NULL;
			continue;
		}

		nvlist_free(config);
		config = nvl;

		/*
		 * Go through and update the paths for spares, now that we have
		 * them.
		 */
		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
		    &nvroot) == 0);
		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
		    &spares, &nspares) == 0) {
			for (i = 0; i < nspares; i++) {
				if (fix_paths(spares[i], pl->names) != 0)
					goto nomem;
			}
		}

		/*
		 * Update the paths for l2cache devices.
		 */
		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
		    &l2cache, &nl2cache) == 0) {
			for (i = 0; i < nl2cache; i++) {
				if (fix_paths(l2cache[i], pl->names) != 0)
					goto nomem;
			}
		}

		/*
		 * Restore the original information read from the actual label.
		 */
		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
		    DATA_TYPE_UINT64);
		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
		    DATA_TYPE_STRING);
		if (hostid != 0) {
			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
			    hostid) == 0);
			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
			    hostname) == 0);
		}

add_pool:
		/*
		 * Add this pool to the list of configs.
		 */
		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
		    &name) == 0);
		if (nvlist_add_nvlist(ret, name, config) != 0)
			goto nomem;

		nvlist_free(config);
		config = NULL;
	}

	return (ret);

nomem:
	(void) no_memory(hdl);
error:
	nvlist_free(config);
	nvlist_free(ret);
	for (c = 0; c < children; c++)
		nvlist_free(child[c]);
	free(child);

	return (NULL);
}
Exemple #25
0
/* Topo Methods */
static int
mem_asru_compute(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    nvlist_t *in, nvlist_t **out)
{
	nvlist_t *asru, *pargs, *args, *hcsp;
	int err;
	char *serial = NULL, *label = NULL;
	uint64_t pa, offset;

	if (version > TOPO_METH_ASRU_COMPUTE_VERSION)
		return (topo_mod_seterrno(mod, EMOD_VER_NEW));

	if (strcmp(topo_node_name(node), DIMM) != 0)
		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));

	pargs = NULL;

	if (nvlist_lookup_nvlist(in, TOPO_PROP_PARGS, &pargs) == 0)
		(void) nvlist_lookup_string(pargs, FM_FMRI_HC_SERIAL_ID,
		    &serial);
	if (serial == NULL &&
	    nvlist_lookup_nvlist(in, TOPO_PROP_ARGS, &args) == 0)
		(void) nvlist_lookup_string(args, FM_FMRI_HC_SERIAL_ID,
		    &serial);

	(void) topo_node_label(node, &label, &err);

	asru = mem_fmri_create(mod, serial, label);

	if (label != NULL)
		topo_mod_strfree(mod, label);

	if (asru == NULL)
		return (topo_mod_seterrno(mod, EMOD_NOMEM));

	err = 0;

	/*
	 * For a memory page, 'in' includes an hc-specific member which
	 * specifies physaddr and/or offset. Set them in asru as well.
	 */
	if (pargs && nvlist_lookup_nvlist(pargs,
	    FM_FMRI_HC_SPECIFIC, &hcsp) == 0) {
		if (nvlist_lookup_uint64(hcsp,
		    FM_FMRI_HC_SPECIFIC_PHYSADDR, &pa) == 0)
			err += nvlist_add_uint64(asru, FM_FMRI_MEM_PHYSADDR,
			    pa);
		if (nvlist_lookup_uint64(hcsp,
		    FM_FMRI_HC_SPECIFIC_OFFSET, &offset) == 0)
			err += nvlist_add_uint64(asru, FM_FMRI_MEM_OFFSET,
			    offset);
	}


	if (err != 0 || topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) < 0) {
		nvlist_free(asru);
		return (topo_mod_seterrno(mod, EMOD_NOMEM));
	}

	err = nvlist_add_string(*out, TOPO_PROP_VAL_NAME, TOPO_PROP_ASRU);
	err |= nvlist_add_uint32(*out, TOPO_PROP_VAL_TYPE, TOPO_TYPE_FMRI);
	err |= nvlist_add_nvlist(*out, TOPO_PROP_VAL_VAL, asru);
	nvlist_free(asru);

	if (err != 0) {
		nvlist_free(*out);
		*out = NULL;
		return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
	}

	return (0);
}
Exemple #26
0
void
fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
{
	VERIFY0(nvlist_add_uint64(nvl, name, val));
}
Exemple #27
0
/*
 * Generate the pool's configuration based on the current in-core state.
 *
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
	boolean_t locked = B_FALSE;
	uint64_t split_guid;

	if (vd == NULL) {
		vd = rvd;
		locked = B_TRUE;
		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
	}

	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
	    (SCL_CONFIG | SCL_STATE));

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    spa_name(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
	VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
	    ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);


#ifdef	_KERNEL
	hostid = zone_get_hostid(NULL);
#else	/* _KERNEL */
	/*
	 * We're emulating the system's hostid in userland, so we can't use
	 * zone_get_hostid().
	 */
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif	/* _KERNEL */
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname.nodename) == 0);

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	} else {
		/*
		 * Only add the (potentially large) split information
		 * in the mos config, and not in the vdev labels
		 */
		if (spa->spa_config_splitting != NULL)
			VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
			    spa->spa_config_splitting) == 0);
	}

	/*
	 * Add the top-level config.  We even add this on pools which
	 * don't support holes in the namespace.
	 */
	vdev_top_config_generate(spa, config);

	/*
	 * If we're splitting, record the original pool's guid.
	 */
	if (spa->spa_config_splitting != NULL &&
	    nvlist_lookup_uint64(spa->spa_config_splitting,
	    ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
		    split_guid) == 0);
	}

	nvroot = vdev_config_generate(spa, vd, getstats, 0);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	/*
	 * Store what's necessary for reading the MOS in the label.
	 */
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
	    spa->spa_label_features) == 0);

	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
		ddt_histogram_t *ddh;
		ddt_stat_t *dds;
		ddt_object_t *ddo;

		ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
		ddt_get_dedup_histogram(spa, ddh);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_HISTOGRAM,
		    (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
		kmem_free(ddh, sizeof (ddt_histogram_t));

		ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
		ddt_get_dedup_object_stats(spa, ddo);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_OBJ_STATS,
		    (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
		kmem_free(ddo, sizeof (ddt_object_t));

		dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
		ddt_get_dedup_stats(spa, dds);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_STATS,
		    (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
		kmem_free(dds, sizeof (ddt_stat_t));
	}

	if (locked)
		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);

	return (config);
}
Exemple #28
0
static int
topo_add_bay(topo_hdl_t *thp, tnode_t *node, walk_diskmon_t *wdp)
{
	diskmon_t *target_diskp = wdp->target;
	nvlist_t	*nvlp = find_disk_monitor_private_pgroup(node);
	nvlist_t	*prop_nvlp;
	nvpair_t	*nvp = NULL;
	char		*prop_name, *prop_value;
#define	PNAME_MAX 128
	char		pname[PNAME_MAX];
	char		msgbuf[MAX_CONF_MSG_LEN];
	char		*indicator_name, *indicator_action;
	char		*indrule_states, *indrule_actions;
	int		err = 0, i;
	conf_err_t	conferr;
	boolean_t	conf_failure = B_FALSE;
	char		*unadj_physid = NULL;
	char		physid[MAXPATHLEN];
	char		*label;
	nvlist_t	*diskprops = NULL;
	char		*cstr = NULL;
	indicator_t	*indp = NULL;
	indrule_t	*indrp = NULL;
	void		*p;
	diskmon_t	*diskp;
	void		*ptr;

	/* No private properties -- just ignore the port */
	if (nvlp == NULL)
		return (0);

	/*
	 * Look for a diskmon based on this node's FMRI string.
	 * Once a diskmon has been created, it's not re-created.  This is
	 * essential for the times when the tree-walk is called after a
	 * disk is inserted (or removed) -- in that case, the disk node
	 * handler simply updates the FRU information in the diskmon.
	 */
	if ((p = fmri2ptr(thp, node, &cstr, &err)) != NULL) {

		diskp = (diskmon_t *)p;

		/*
		 * Delete the FRU information from the diskmon.  If a disk
		 * is connected, its FRU information will be refreshed by
		 * the disk node code.
		 */
		if (diskp->frup && (target_diskp == NULL ||
		    diskp == target_diskp)) {
			dm_assert(pthread_mutex_lock(&diskp->fru_mutex) == 0);
			dmfru_free(diskp->frup);
			diskp->frup = NULL;
			dm_assert(pthread_mutex_unlock(&diskp->fru_mutex) == 0);
		}

		wdp->pfmri = cstr;
		nvlist_free(nvlp);
		return (0);
	}

	/*
	 * Determine the physical path to the attachment point
	 */
	if (topo_prop_get_string(node, TOPO_PGROUP_IO,
	    TOPO_IO_AP_PATH, &unadj_physid, &err) == 0) {

		adjust_dynamic_ap(unadj_physid, physid);
		topo_hdl_strfree(thp, unadj_physid);
	} else {

		/* unadj_physid cannot have been allocated */
		if (cstr)
			dstrfree(cstr);
		nvlist_free(nvlp);
		return (-1);
	}

	/*
	 */

	/*
	 * Process the properties.  If we encounter a property that
	 * is not an indicator name, action, or rule, add it to the
	 * disk's props list.
	 */

	/* Process indicators */
	i = 0;
	indicator_name = NULL;
	indicator_action = NULL;
	do {
		if (indicator_name != NULL && indicator_action != NULL) {

			if (topoprop_indicator_add(&indp, indicator_name,
			    indicator_action) != 0) {

				conf_failure = B_TRUE;
			}

			topo_hdl_strfree(thp, indicator_name);
			topo_hdl_strfree(thp, indicator_action);
		}

		(void) snprintf(pname, PNAME_MAX, BAY_IND_NAME "-%d", i);
		if (topo_prop_get_string(node, DISK_MONITOR_PROPERTIES,
		    pname, &indicator_name, &err) != 0)
			break;

		(void) snprintf(pname, PNAME_MAX, BAY_IND_ACTION "-%d", i);
		if (topo_prop_get_string(node, DISK_MONITOR_PROPERTIES,
		    pname, &indicator_action, &err) != 0)
			break;

		i++;
	} while (!conf_failure && indicator_name != NULL &&
	    indicator_action != NULL);

	if (!conf_failure && indp != NULL &&
	    (conferr = check_inds(indp)) != E_NO_ERROR) {
		conf_error_msg(conferr, msgbuf, MAX_CONF_MSG_LEN, NULL);
		log_msg(MM_CONF, "%s: Not adding disk to list\n", msgbuf);
		conf_failure = B_TRUE;
	}

	/* Process state rules and indicator actions */
	i = 0;
	indrule_states = NULL;
	indrule_actions = NULL;
	do {
		if (indrule_states != NULL && indrule_actions != NULL) {

			if (topoprop_indrule_add(&indrp, indrule_states,
			    indrule_actions) != 0) {

				conf_failure = B_TRUE;
			}

			topo_hdl_strfree(thp, indrule_states);
			topo_hdl_strfree(thp, indrule_actions);
		}

		(void) snprintf(pname, PNAME_MAX, BAY_INDRULE_STATES "-%d", i);
		if (topo_prop_get_string(node, DISK_MONITOR_PROPERTIES,
		    pname, &indrule_states, &err) != 0)
			break;

		(void) snprintf(pname, PNAME_MAX, BAY_INDRULE_ACTIONS "-%d",
		    i);
		if (topo_prop_get_string(node, DISK_MONITOR_PROPERTIES,
		    pname, &indrule_actions, &err) != 0)
			break;

		i++;
	} while (!conf_failure && indrule_states != NULL &&
	    indrule_actions != NULL);

	if (!conf_failure && indrp != NULL && indp != NULL &&
	    ((conferr = check_indrules(indrp, (state_transition_t **)&ptr))
	    != E_NO_ERROR ||
	    (conferr = check_consistent_ind_indrules(indp, indrp,
	    (ind_action_t **)&ptr)) != E_NO_ERROR)) {

		conf_error_msg(conferr, msgbuf, MAX_CONF_MSG_LEN, ptr);
		log_msg(MM_CONF, "%s: Not adding disk to list\n", msgbuf);
		conf_failure = B_TRUE;

	}

	/*
	 * Now collect miscellaneous properties.
	 * Each property is stored as an embedded nvlist named
	 * TOPO_PROP_VAL.  The property name is stored in the value for
	 * key=TOPO_PROP_VAL_NAME and the property's value is
	 * stored in the value for key=TOPO_PROP_VAL_VAL.  This is all
	 * necessary so we can subtractively decode the properties that
	 * we do not directly handle (so that these properties are added to
	 * the per-disk properties nvlist), increasing flexibility.
	 */
	(void) nvlist_alloc(&diskprops, NV_UNIQUE_NAME, 0);
	while ((nvp = nvlist_next_nvpair(nvlp, nvp)) != NULL) {
		/* Only care about embedded nvlists named TOPO_PROP_VAL */
		if (nvpair_type(nvp) != DATA_TYPE_NVLIST ||
		    strcmp(nvpair_name(nvp), TOPO_PROP_VAL) != 0 ||
		    nvpair_value_nvlist(nvp, &prop_nvlp) != 0)
			continue;

		if (nonunique_nvlist_lookup_string(prop_nvlp,
		    TOPO_PROP_VAL_NAME, &prop_name) != 0)
			continue;

		/* Filter out indicator properties */
		if (strstr(prop_name, BAY_IND_NAME) != NULL ||
		    strstr(prop_name, BAY_IND_ACTION) != NULL ||
		    strstr(prop_name, BAY_INDRULE_STATES) != NULL ||
		    strstr(prop_name, BAY_INDRULE_ACTIONS) != NULL)
			continue;

		if (nonunique_nvlist_lookup_string(prop_nvlp, TOPO_PROP_VAL_VAL,
		    &prop_value) != 0)
			continue;

		/* Add the property to the disk's prop list: */
		if (nvlist_add_string(diskprops, prop_name, prop_value) != 0)
			log_msg(MM_TOPO,
			    "Could not add disk property `%s' with "
			    "value `%s'\n", prop_name, prop_value);
	}

	nvlist_free(nvlp);

	if (cstr != NULL) {
		namevalpr_t nvpr;
		nvlist_t *dmap_nvl;

		nvpr.name = DISK_AP_PROP_APID;
		nvpr.value = strncmp(physid, "/devices", 8) == 0 ?
		    (physid + 8) : physid;

		/*
		 * Set the diskmon's location to the value in this port's label.
		 * If there's a disk plugged in, the location will be updated
		 * to be the disk label (e.g. HD_ID_00).  Until a disk is
		 * inserted, though, there won't be a disk libtopo node
		 * created.
		 */

		/* Pass physid without the leading "/devices": */
		dmap_nvl = namevalpr_to_nvlist(&nvpr);

		diskp = new_diskmon(dmap_nvl, indp, indrp, diskprops);

		if (topo_node_label(node, &label, &err) == 0) {
			diskp->location = dstrdup(label);
			topo_hdl_strfree(thp, label);
		} else
			diskp->location = dstrdup("unknown location");

		if (!conf_failure && diskp != NULL) {
			/* Add this diskmon to the disk list */
			cfgdata_add_diskmon(config_data, diskp);
			if (nvlist_add_uint64(g_topo2diskmon, cstr,
			    (uint64_t)(uintptr_t)diskp) != 0) {
				log_msg(MM_TOPO,
				    "Could not add pointer to nvlist "
				    "for `%s'!\n", cstr);
			}
		} else if (diskp != NULL) {
			diskmon_free(diskp);
		} else {
			if (dmap_nvl)
				nvlist_free(dmap_nvl);
			if (indp)
				ind_free(indp);
			if (indrp)
				indrule_free(indrp);
			if (diskprops)
				nvlist_free(diskprops);
		}

		wdp->pfmri = cstr;
	}


	return (0);
}
Exemple #29
0
/*
 * Generate the pool's configuration based on the current in-core state.
 *
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
	boolean_t locked = B_FALSE;
	uint64_t split_guid;
	char *pool_name;

	if (vd == NULL) {
		vd = rvd;
		locked = B_TRUE;
		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
	}

	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
	    (SCL_CONFIG | SCL_STATE));

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	/*
	 * Originally, users had to handle spa namespace collisions by either
	 * exporting the already imported pool or by specifying a new name for
	 * the pool with a conflicting name. In the case of root pools from
	 * virtual guests, neither approach to collision resolution is
	 * reasonable. This is addressed by extending the new name syntax with
	 * an option to specify that the new name is temporary. When specified,
	 * ZFS_IMPORT_TEMP_NAME will be set in spa->spa_import_flags to tell us
	 * to use the previous name, which we do below.
	 */
	if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
		VERIFY0(nvlist_lookup_string(spa->spa_config,
			ZPOOL_CONFIG_POOL_NAME, &pool_name));
	} else
		pool_name = spa_name(spa);

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    pool_name) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
	    spa->spa_errata) == 0);
	VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
	    ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);


#ifdef	_KERNEL
	hostid = zone_get_hostid(NULL);
#else	/* _KERNEL */
	/*
	 * We're emulating the system's hostid in userland, so we can't use
	 * zone_get_hostid().
	 */
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif	/* _KERNEL */
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	VERIFY0(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname()->nodename));

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	} else {
		/*
		 * Only add the (potentially large) split information
		 * in the mos config, and not in the vdev labels
		 */
		if (spa->spa_config_splitting != NULL)
			VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
			    spa->spa_config_splitting) == 0);
	}

	/*
	 * Add the top-level config.  We even add this on pools which
	 * don't support holes in the namespace.
	 */
	vdev_top_config_generate(spa, config);

	/*
	 * If we're splitting, record the original pool's guid.
	 */
	if (spa->spa_config_splitting != NULL &&
	    nvlist_lookup_uint64(spa->spa_config_splitting,
	    ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
		    split_guid) == 0);
	}

	nvroot = vdev_config_generate(spa, vd, getstats, 0);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	/*
	 * Store what's necessary for reading the MOS in the label.
	 */
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
	    spa->spa_label_features) == 0);

	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
		ddt_histogram_t *ddh;
		ddt_stat_t *dds;
		ddt_object_t *ddo;

		ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
		ddt_get_dedup_histogram(spa, ddh);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_HISTOGRAM,
		    (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
		kmem_free(ddh, sizeof (ddt_histogram_t));

		ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
		ddt_get_dedup_object_stats(spa, ddo);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_OBJ_STATS,
		    (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
		kmem_free(ddo, sizeof (ddt_object_t));

		dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
		ddt_get_dedup_stats(spa, dds);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_STATS,
		    (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
		kmem_free(dds, sizeof (ddt_stat_t));
	}

	if (locked)
		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);

	return (config);
}
Exemple #30
0
/*
 * sync out AVL trees to persistent storage.
 */
void
zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
#ifdef HAVE_ZPL
	nvlist_t *nvp;
	nvlist_t **fuids;
	size_t nvsize = 0;
	char *packed;
	dmu_buf_t *db;
	fuid_domain_t *domnode;
	int numnodes;
	int i;

	if (!zfsvfs->z_fuid_dirty) {
		return;
	}

	rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);

	/*
	 * First see if table needs to be created?
	 */
	if (zfsvfs->z_fuid_obj == 0) {
		zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
		    DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
		    sizeof (uint64_t), tx);
		VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
		    ZFS_FUID_TABLES, sizeof (uint64_t), 1,
		    &zfsvfs->z_fuid_obj, tx) == 0);
	}

	VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
	fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
	for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
	    domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
		VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
		    domnode->f_idx) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
		VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
		    domnode->f_ksid->kd_name) == 0);
	}
	VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
	    fuids, numnodes) == 0);
	for (i = 0; i != numnodes; i++)
		nvlist_free(fuids[i]);
	kmem_free(fuids, numnodes * sizeof (void *));
	VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
	packed = kmem_alloc(nvsize, KM_SLEEP);
	VERIFY(nvlist_pack(nvp, &packed, &nvsize,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);
	nvlist_free(nvp);
	zfsvfs->z_fuid_size = nvsize;
	dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
	    zfsvfs->z_fuid_size, packed, tx);
	kmem_free(packed, zfsvfs->z_fuid_size);
	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
	    FTAG, &db));
	dmu_buf_will_dirty(db, tx);
	*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
	dmu_buf_rele(db, FTAG);

	zfsvfs->z_fuid_dirty = B_FALSE;
	rw_exit(&zfsvfs->z_fuid_lock);
#endif /* HAVE_ZPL */
}