/* * Enable processor set plugin. */ int pool_pset_enable(void) { int error; nvlist_t *props; ASSERT(pool_lock_held()); ASSERT(INGLOBALZONE(curproc)); /* * Can't enable pools if there are existing cpu partitions. */ mutex_enter(&cpu_lock); if (cp_numparts > 1) { mutex_exit(&cpu_lock); return (EEXIST); } /* * We want to switch things such that everything that was tagged with * the special ALL_ZONES token now is explicitly visible to all zones: * first add individual zones to the visibility list then remove the * special "ALL_ZONES" token. There must only be the default pset * (PS_NONE) active if pools are being enabled, so we only need to * deal with it. * * We want to make pool_pset_enabled() start returning B_TRUE before * we call any of the visibility update functions. */ global_zone->zone_psetid = PS_NONE; /* * We need to explicitly handle the global zone since * zone_pset_set() won't modify it. */ pool_pset_visibility_add(PS_NONE, global_zone); /* * A NULL argument means the ALL_ZONES token. */ pool_pset_visibility_remove(PS_NONE, NULL); error = zone_walk(pool_pset_zone_pset_set, (void *)PS_NONE); ASSERT(error == 0); /* * It is safe to drop cpu_lock here. We're still * holding pool_lock so no new cpu partitions can * be created while we're here. */ mutex_exit(&cpu_lock); (void) nvlist_alloc(&pool_pset_default->pset_props, NV_UNIQUE_NAME, KM_SLEEP); props = pool_pset_default->pset_props; (void) nvlist_add_string(props, "pset.name", "pset_default"); (void) nvlist_add_string(props, "pset.comment", ""); (void) nvlist_add_int64(props, "pset.sys_id", PS_NONE); (void) nvlist_add_string(props, "pset.units", "population"); (void) nvlist_add_byte(props, "pset.default", 1); (void) nvlist_add_uint64(props, "pset.max", 65536); (void) nvlist_add_uint64(props, "pset.min", 1); pool_pset_mod = pool_cpu_mod = gethrtime(); return (0); }
/* * Post the PICL_FRU_ADDED/PICL_FRU_REMOVED event */ static void post_frudr_event(char *ename, picl_nodehdl_t parenth, picl_nodehdl_t fruh) { nvlist_t *nvl; char *ev_name; ev_name = strdup(ename); if (ev_name == NULL) return; if (nvlist_alloc(&nvl, NV_UNIQUE_NAME_TYPE, NULL)) { free(ev_name); return; } if (parenth != 0L && nvlist_add_uint64(nvl, PICLEVENTARG_PARENTHANDLE, parenth)) { free(ev_name); nvlist_free(nvl); return; } if (fruh != 0L && nvlist_add_uint64(nvl, PICLEVENTARG_FRUHANDLE, fruh)) { free(ev_name); nvlist_free(nvl); return; } if (ptree_post_event(ev_name, nvl, sizeof (nvl), frudr_completion_handler) != 0) { free(ev_name); nvlist_free(nvl); } }
/** * Create the vdev leaf for the given path. * The function assume that the path is a block device or a file. * Log devices and hot spares are not supported * @param psz_path: path to the device to use * @return the new vdev or NULL in case of error. */ nvlist_t *lzwu_make_leaf_vdev(const char *psz_path) { struct stat64 statbuf; nvlist_t *p_vdev; const char *psz_type; if(stat64(psz_path, &statbuf) != 0) return NULL; if(S_ISBLK(statbuf.st_mode)) psz_type = VDEV_TYPE_DISK; else if(S_ISREG(statbuf.st_mode)) psz_type = VDEV_TYPE_FILE; else return NULL; nvlist_alloc(&p_vdev, NV_UNIQUE_NAME, 0); nvlist_add_string(p_vdev, ZPOOL_CONFIG_PATH, psz_path); nvlist_add_string(p_vdev, ZPOOL_CONFIG_TYPE, psz_type); nvlist_add_string(p_vdev, ZPOOL_CONFIG_IS_LOG, 0); if(!strcmp(psz_type, VDEV_TYPE_DISK)) nvlist_add_uint64(p_vdev, ZPOOL_CONFIG_WHOLE_DISK, 0); return p_vdev; }
static int logpage_selftest_analyze(ds_scsi_info_t *sip, scsi_log_parameter_header_t *lphp, int log_length) { int i, plen = 0; int entries = 0; ushort_t param_code; scsi_selftest_log_param_t *stp; nvlist_t *nvl; assert(sip->si_dsp->ds_testfail == NULL); if (nvlist_alloc(&sip->si_dsp->ds_testfail, NV_UNIQUE_NAME, 0) != 0) return (scsi_set_errno(sip, EDS_NOMEM)); nvl = sip->si_dsp->ds_testfail; for (i = 0; i < log_length; i += plen, entries++) { lphp = (scsi_log_parameter_header_t *)((char *)lphp + plen); param_code = BE_16(lphp->lph_param); stp = (scsi_selftest_log_param_t *)lphp; if (param_code >= LOGPAGE_SELFTEST_MIN_PARAM_CODE && param_code <= LOGPAGE_SELFTEST_MAX_PARAM_CODE && lphp->lph_length >= LOGPAGE_SELFTEST_PARAM_LEN) { /* * We always log the last result, or the result of the * last completed test. */ if ((param_code == 1 || SELFTEST_COMPLETE(stp->st_results))) { if (nvlist_add_uint8(nvl, FM_EREPORT_PAYLOAD_SCSI_RESULTCODE, stp->st_results) != 0 || nvlist_add_uint16(nvl, FM_EREPORT_PAYLOAD_SCSI_TIMESTAMP, BE_16(stp->st_timestamp)) != 0 || nvlist_add_uint8(nvl, FM_EREPORT_PAYLOAD_SCSI_SEGMENT, stp->st_number) != 0 || nvlist_add_uint64(nvl, FM_EREPORT_PAYLOAD_SCSI_ADDRESS, BE_64(stp->st_lba)) != 0) return (scsi_set_errno(sip, EDS_NOMEM)); if (SELFTEST_COMPLETE(stp->st_results)) { if (stp->st_results != SELFTEST_OK) sip->si_dsp->ds_faults |= DS_FAULT_TESTFAIL; return (0); } } } plen = lphp->lph_length + sizeof (scsi_log_parameter_header_t); } return (0); }
int pi_walker_init(topo_mod_t *mod) { int result; pi_enum_functions_t *fp; pi_methods_t *mp; result = topo_mod_nvalloc(mod, &pi_enum_fns, NV_UNIQUE_NAME); result |= topo_mod_nvalloc(mod, &pi_meths, NV_UNIQUE_NAME); if (result != 0) { topo_mod_dprintf(mod, "pi_walker_init failed\n"); nvlist_free(pi_enum_fns); nvlist_free(pi_meths); return (-1); } /* Add the builtin functions to the list */ fp = pi_enum_fns_builtin; while (fp != NULL && fp->hc_name != NULL) { uint64_t faddr; faddr = (uint64_t)(uintptr_t)*(fp->func); result |= nvlist_add_uint64(pi_enum_fns, fp->hc_name, faddr); fp++; } /* Add the builtin methods to the list */ mp = pi_meths_builtin; while (mp != NULL && mp->hc_name != NULL) { uint64_t maddr; maddr = (uint64_t)(uintptr_t)mp->meths; result |= nvlist_add_uint64(pi_meths, mp->hc_name, maddr); mp++; } if (result != 0) { topo_mod_dprintf(mod, "pi_walker_init failed\n"); nvlist_free(pi_enum_fns); nvlist_free(pi_meths); return (-1); } return (0); }
static nvlist_t * inhm_dimm(nhm_dimm_t *nhm_dimm, uint32_t node, uint8_t channel, uint32_t dimm) { nvlist_t *newdimm; uint8_t t; char sbuf[65]; (void) nvlist_alloc(&newdimm, NV_UNIQUE_NAME, KM_SLEEP); (void) nvlist_add_uint32(newdimm, "dimm-number", dimm); if (nhm_dimm->dimm_size >= 1024*1024*1024) { (void) snprintf(sbuf, sizeof (sbuf), "%dG", (int)(nhm_dimm->dimm_size / (1024*1024*1024))); } else { (void) snprintf(sbuf, sizeof (sbuf), "%dM", (int)(nhm_dimm->dimm_size / (1024*1024))); } (void) nvlist_add_string(newdimm, "dimm-size", sbuf); (void) nvlist_add_uint64(newdimm, "size", nhm_dimm->dimm_size); (void) nvlist_add_uint32(newdimm, "nbanks", (uint32_t)nhm_dimm->nbanks); (void) nvlist_add_uint32(newdimm, "ncolumn", (uint32_t)nhm_dimm->ncolumn); (void) nvlist_add_uint32(newdimm, "nrow", (uint32_t)nhm_dimm->nrow); (void) nvlist_add_uint32(newdimm, "width", (uint32_t)nhm_dimm->width); (void) nvlist_add_uint32(newdimm, "ranks", (uint32_t)nhm_dimm->nranks); inhm_rank(newdimm, nhm_dimm, node, channel, dimm, nhm_dimm->dimm_size / nhm_dimm->nranks); if (nhm_dimm->manufacturer && nhm_dimm->manufacturer[0]) { t = sizeof (nhm_dimm->manufacturer); (void) strncpy(sbuf, nhm_dimm->manufacturer, t); sbuf[t] = 0; (void) nvlist_add_string(newdimm, "manufacturer", sbuf); } if (nhm_dimm->serial_number && nhm_dimm->serial_number[0]) { t = sizeof (nhm_dimm->serial_number); (void) strncpy(sbuf, nhm_dimm->serial_number, t); sbuf[t] = 0; (void) nvlist_add_string(newdimm, FM_FMRI_HC_SERIAL_ID, sbuf); } if (nhm_dimm->part_number && nhm_dimm->part_number[0]) { t = sizeof (nhm_dimm->part_number); (void) strncpy(sbuf, nhm_dimm->part_number, t); sbuf[t] = 0; (void) nvlist_add_string(newdimm, FM_FMRI_HC_PART, sbuf); } if (nhm_dimm->revision && nhm_dimm->revision[0]) { t = sizeof (nhm_dimm->revision); (void) strncpy(sbuf, nhm_dimm->revision, t); sbuf[t] = 0; (void) nvlist_add_string(newdimm, FM_FMRI_HC_REVISION, sbuf); } t = sizeof (nhm_dimm->label); (void) strncpy(sbuf, nhm_dimm->label, t); sbuf[t] = 0; (void) nvlist_add_string(newdimm, FM_FAULT_FRU_LABEL, sbuf); return (newdimm); }
int fmd_fmri_expand(nvlist_t *nvl) { uint8_t version; uint32_t cpuid; uint64_t serialid; char *serstr, serbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ int rc, err; topo_hdl_t *thp; if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid) != 0) return (fmd_fmri_set_errno(EINVAL)); /* * If the cpu-scheme topology exports this method expand(), invoke it. */ if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) return (fmd_fmri_set_errno(EINVAL)); rc = topo_fmri_expand(thp, nvl, &err); fmd_fmri_topo_rele(thp); if (err != ETOPO_METHOD_NOTSUP) return (rc); if (version == CPU_SCHEME_VERSION0) { if ((rc = nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID, &serialid)) != 0) { if (rc != ENOENT) return (fmd_fmri_set_errno(rc)); if (cpu_get_serialid_V0(cpuid, &serialid) != 0) return (-1); /* errno is set for us */ if ((rc = nvlist_add_uint64(nvl, FM_FMRI_CPU_SERIAL_ID, serialid)) != 0) return (fmd_fmri_set_errno(rc)); } } else if (version == CPU_SCHEME_VERSION1) { if ((rc = nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID, &serstr)) != 0) { if (rc != ENOENT) return (fmd_fmri_set_errno(rc)); if (cpu_get_serialid_V1(cpuid, serbuf, 21) != 0) return (0); /* Serial number is optional */ if ((rc = nvlist_add_string(nvl, FM_FMRI_CPU_SERIAL_ID, serbuf)) != 0) return (fmd_fmri_set_errno(rc)); } } else { return (fmd_fmri_set_errno(EINVAL)); } return (0); }
/* * Solve a given ZFS case. This first checks to make sure the diagnosis is * still valid, as well as cleaning up any pending timer associated with the * case. */ static void zfs_case_solve(fmd_hdl_t *hdl, zfs_case_t *zcp, const char *faultname, boolean_t checkunusable) { nvlist_t *detector, *fault; boolean_t serialize; nvlist_t *fru = NULL; fmd_hdl_debug(hdl, "solving fault '%s'", faultname); /* * Construct the detector from the case data. The detector is in the * ZFS scheme, and is either the pool or the vdev, depending on whether * this is a vdev or pool fault. */ detector = fmd_nvl_alloc(hdl, FMD_SLEEP); (void) nvlist_add_uint8(detector, FM_VERSION, ZFS_SCHEME_VERSION0); (void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS); (void) nvlist_add_uint64(detector, FM_FMRI_ZFS_POOL, zcp->zc_data.zc_pool_guid); if (zcp->zc_data.zc_vdev_guid != 0) { (void) nvlist_add_uint64(detector, FM_FMRI_ZFS_VDEV, zcp->zc_data.zc_vdev_guid); } fault = fmd_nvl_create_fault(hdl, faultname, 100, detector, fru, detector); fmd_case_add_suspect(hdl, zcp->zc_case, fault); nvlist_free(fru); fmd_case_solve(hdl, zcp->zc_case); serialize = B_FALSE; if (zcp->zc_data.zc_has_remove_timer) { fmd_timer_remove(hdl, zcp->zc_remove_timer); zcp->zc_data.zc_has_remove_timer = 0; serialize = B_TRUE; } if (serialize) zfs_case_serialize(hdl, zcp); nvlist_free(detector); }
/* * There can be more than one kstat value when we have multi-path drives * that are not under mpxio (since there is more than one kstat name for * the drive in this case). So, we may have merge all of the kstat values * to give an accurate set of stats for the drive. */ static int update_stat64(nvlist_t *stats, char *attr, uint64_t value) { int64_t currval; if (nvlist_lookup_int64(stats, attr, &currval) == 0) { value += currval; } return (nvlist_add_uint64(stats, attr, value)); }
static nvlist_t * dict2nvl(PyObject *d) { nvlist_t *nvl; int err; PyObject *key, *value; int pos = 0; if (!PyDict_Check(d)) { PyErr_SetObject(PyExc_ValueError, d); return (NULL); } err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); assert(err == 0); while (PyDict_Next(d, &pos, &key, &value)) { char *keystr = PyString_AsString(key); if (keystr == NULL) { PyErr_SetObject(PyExc_KeyError, key); nvlist_free(nvl); return (NULL); } if (PyDict_Check(value)) { nvlist_t *valnvl = dict2nvl(value); err = nvlist_add_nvlist(nvl, keystr, valnvl); nvlist_free(valnvl); } else if (value == Py_None) { err = nvlist_add_boolean(nvl, keystr); } else if (PyString_Check(value)) { char *valstr = PyString_AsString(value); err = nvlist_add_string(nvl, keystr, valstr); } else if (PyInt_Check(value)) { uint64_t valint = PyInt_AsUnsignedLongLongMask(value); err = nvlist_add_uint64(nvl, keystr, valint); } else if (PyBool_Check(value)) { boolean_t valbool = value == Py_True ? B_TRUE : B_FALSE; err = nvlist_add_boolean_value(nvl, keystr, valbool); } else { PyErr_SetObject(PyExc_ValueError, value); nvlist_free(nvl); return (NULL); } assert(err == 0); } return (nvl); }
/* * Set the named uint64 in the given nvlist_t. * * @param attrs * the nvlist_t to search * * @param which * the string key for this element in the list * * @param val * the value to set * * @return 0 * if successful * * @return EINVAL * if there is an invalid argument * * @return ENOMEM * if there is insufficient memory */ int set_uint64( nvlist_t *attrs, char *which, uint64_t val) { int error = 0; if ((error = nvlist_add_uint64(attrs, which, val)) != 0) { volume_set_error( gettext("nvlist_add_int64(%s) failed: %d\n"), which, error); } return (error); }
static void inhm_vrank(nvlist_t *vrank, int num, uint64_t dimm_base, uint64_t limit, uint32_t sinterleave, uint32_t cinterleave, uint32_t rinterleave, uint32_t sway, uint32_t cway, uint32_t rway) { char buf[128]; (void) snprintf(buf, sizeof (buf), "dimm-rank-base-%d", num); (void) nvlist_add_uint64(vrank, buf, dimm_base); (void) snprintf(buf, sizeof (buf), "dimm-rank-limit-%d", num); (void) nvlist_add_uint64(vrank, buf, dimm_base + limit); if (sinterleave > 1) { (void) snprintf(buf, sizeof (buf), "dimm-socket-interleave-%d", num); (void) nvlist_add_uint32(vrank, buf, sinterleave); (void) snprintf(buf, sizeof (buf), "dimm-socket-interleave-way-%d", num); (void) nvlist_add_uint32(vrank, buf, sway); } if (cinterleave > 1) { (void) snprintf(buf, sizeof (buf), "dimm-channel-interleave-%d", num); (void) nvlist_add_uint32(vrank, buf, cinterleave); (void) snprintf(buf, sizeof (buf), "dimm-channel-interleave-way-%d", num); (void) nvlist_add_uint32(vrank, buf, cway); } if (rinterleave > 1) { (void) snprintf(buf, sizeof (buf), "dimm-rank-interleave-%d", num); (void) nvlist_add_uint32(vrank, buf, rinterleave); (void) snprintf(buf, sizeof (buf), "dimm-rank-interleave-way-%d", num); (void) nvlist_add_uint32(vrank, buf, rway); } }
/* ARGSUSED */ int fab_prep_basic_erpt(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *erpt, boolean_t isRC) { uint64_t *now; uint64_t ena; uint_t nelem; nvlist_t *detector, *new_detector; char rcpath[255]; int err = 0; /* Grab the tod, ena and detector(FMRI) */ err |= nvlist_lookup_uint64_array(nvl, "__tod", &now, &nelem); err |= nvlist_lookup_uint64(nvl, "ena", &ena); err |= nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &detector); if (err) return (err); /* Make a copy of the detector */ err = nvlist_dup(detector, &new_detector, NV_UNIQUE_NAME); if (err) return (err); /* Copy the tod and ena to erpt */ (void) nvlist_add_uint64(erpt, FM_EREPORT_ENA, ena); (void) nvlist_add_uint64_array(erpt, "__tod", now, nelem); /* * Create the correct ROOT FMRI from PCIe leaf fabric ereports. Used * only by fab_prep_fake_rc_erpt. See the fab_pciex_fake_rc_erpt_tbl * comments for more information. */ if (isRC && fab_get_rcpath(hdl, nvl, rcpath)) { /* Create the correct PCIe RC new_detector aka FMRI */ (void) nvlist_remove(new_detector, FM_FMRI_DEV_PATH, DATA_TYPE_STRING); (void) nvlist_add_string(new_detector, FM_FMRI_DEV_PATH, rcpath); } /* Copy the FMRI to erpt */ (void) nvlist_add_nvlist(erpt, FM_EREPORT_DETECTOR, new_detector); nvlist_free(new_detector); return (err); }
/* * Validate a proposed value against the iSER and/or iSCSI RFC's minimum and * maximum values, and set an alternate, if necessary. Note that the value * 'iser_max_value" represents our implementation maximum (typically the max). */ static kv_status_t iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx, uint64_t min_value, uint64_t max_value, uint64_t iser_max_value, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) { kv_status_t kvrc; int nvrc; boolean_t respond; /* Validate against standard */ if ((value < min_value) || (value > max_value)) { kvrc = KV_VALUE_ERROR; } else { if (value > iser_max_value) { /* * Respond back to initiator with our value, and * set the return value to unset the transit bit. */ value = iser_max_value; kvrc = KV_HANDLED_NO_TRANSIT; nvrc = 0; respond = B_TRUE; } else { /* Add this to our negotiated values */ nvrc = nvlist_add_nvpair(negotiated_nvl, nvp); /* Respond if this is not a declarative */ respond = (ikvx->ik_declarative == B_FALSE); } /* Response of Simple-value Negotiation */ if (nvrc == 0 && respond) { nvrc = nvlist_add_uint64(response_nvl, ikvx->ik_key_name, value); /* Remove from the request (we've handled it) */ (void) nvlist_remove_all(request_nvl, ikvx->ik_key_name); } } if (kvrc == KV_HANDLED_NO_TRANSIT) { return (kvrc); } return (idm_nvstat_to_kvstat(nvrc)); }
/*ARGSUSED*/ static int label_set(tnode_t *tn, did_t *pd, const char *dpnm, const char *tpgrp, const char *tpnm) { topo_mod_t *mp; nvlist_t *in, *out; char *label; int err; mp = did_mod(pd); /* * If this is a PCIEX_BUS and its parent is a PCIEX_ROOT, * check for a CPUBOARD predecessor. If found, inherit its * parent's Label. Otherwise, continue with label set. */ if ((strcmp(topo_node_name(tn), PCIEX_BUS) == 0) && (strcmp(topo_node_name(topo_node_parent(tn)), PCIEX_ROOT) == 0)) { if (use_predecessor_label(mp, tn, CPUBOARD) == 0) return (0); } if (topo_mod_nvalloc(mp, &in, NV_UNIQUE_NAME) != 0) return (topo_mod_seterrno(mp, EMOD_FMRI_NVL)); if (nvlist_add_uint64(in, TOPO_METH_LABEL_ARG_NVL, (uintptr_t)pd) != 0) { nvlist_free(in); return (topo_mod_seterrno(mp, EMOD_NOMEM)); } if (topo_method_invoke(tn, TOPO_METH_LABEL, TOPO_METH_LABEL_VERSION, in, &out, &err) != 0) { nvlist_free(in); return (topo_mod_seterrno(mp, err)); } nvlist_free(in); if (out != NULL && nvlist_lookup_string(out, TOPO_METH_LABEL_RET_STR, &label) == 0) { if (topo_prop_set_string(tn, TOPO_PGROUP_PROTOCOL, TOPO_PROP_LABEL, TOPO_PROP_IMMUTABLE, label, &err) != 0) { nvlist_free(out); return (topo_mod_seterrno(mp, err)); } nvlist_free(out); } return (0); }
/* * Generate the pool's configuration based on the current in-core state. * We infer whether to generate a complete config or just one top-level config * based on whether vd is the root vdev. */ nvlist_t * spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) { nvlist_t *config, *nvroot; vdev_t *rvd = spa->spa_root_vdev; ASSERT(spa_config_held(spa, RW_READER)); if (vd == NULL) vd = rvd; /* * If txg is -1, report the current value of spa->spa_config_txg. */ if (txg == -1ULL) txg = spa->spa_config_txg; VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, spa_name(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); if (vd != rvd) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, vd->vdev_top->vdev_guid) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); return (config); }
/* ARGSUSED */ static kv_status_t iser_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl, nvlist_t *outgoing_nvl) { kv_status_t kvrc; int nvrc = 0; int rc; uint64_t uint64_val; if ((rc = nvlist_lookup_uint64(config_nvl, ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, &uint64_val)) != ENOENT) { ASSERT(rc == 0); if (outgoing_nvl) { nvrc = nvlist_add_uint64(outgoing_nvl, ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, uint64_val); } } kvrc = idm_nvstat_to_kvstat(nvrc); return (kvrc); }
/* * Target is the dataset whose pool we want to open. */ static void zhack_import(char *target, boolean_t readonly) { nvlist_t *config; nvlist_t *props; int error; kernel_init(readonly ? FREAD : (FREAD | FWRITE)); g_zfs = libzfs_init(); ASSERT(g_zfs != NULL); dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb); g_readonly = readonly; g_importargs.unique = B_TRUE; g_importargs.can_be_active = readonly; g_pool = strdup(target); error = zpool_tryimport(g_zfs, target, &config, &g_importargs); if (error) fatal(NULL, FTAG, "cannot import '%s': %s", target, libzfs_error_description(g_zfs)); props = NULL; if (readonly) { VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0); } zfeature_checks_disable = B_TRUE; error = spa_import(target, config, props, (readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL)); zfeature_checks_disable = B_FALSE; if (error == EEXIST) error = 0; if (error) fatal(NULL, FTAG, "can't import '%s': %s", target, strerror(error)); }
/* * zfs_init_fs - Initialize the zfsvfs struct and the file system * incore "master" object. Verify version compatibility. */ int zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr) { extern int zfsfstype; objset_t *os = zfsvfs->z_os; int i, error; dmu_object_info_t doi; uint64_t fsid_guid; uint64_t zval; *zpp = NULL; /* * XXX - hack to auto-create the pool root filesystem at * the first attempted mount. */ if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) { dmu_tx_t *tx = dmu_tx_create(os); uint64_t zpl_version; nvlist_t *zprops; dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */ dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */ error = dmu_tx_assign(tx, TXG_WAIT); ASSERT3U(error, ==, 0); if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID) zpl_version = ZPL_VERSION; else zpl_version = ZPL_VERSION_FUID - 1; VERIFY(nvlist_alloc(&zprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(zprops, zfs_prop_to_name(ZFS_PROP_VERSION), zpl_version) == 0); zfs_create_fs(os, cr, zprops, tx); nvlist_free(zprops); dmu_tx_commit(tx); }
/* * Get dynamic property for processor sets. * The only dynamic property currently implemented is "pset.load". */ int pool_pset_propget(psetid_t psetid, char *name, nvlist_t *nvl) { cpupart_t *cpupart; pool_pset_t *pset; int ret = ESRCH; ASSERT(pool_lock_held()); mutex_enter(&cpu_lock); pset = pool_lookup_pset_by_id(psetid); cpupart = cpupart_find(psetid); if (cpupart == NULL || pset == NULL) { mutex_exit(&cpu_lock); return (EINVAL); } if (strcmp(name, "pset.load") == 0) ret = nvlist_add_uint64(nvl, "pset.load", (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0])); else ret = EINVAL; mutex_exit(&cpu_lock); return (ret); }
static #endif int zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain, dmu_tx_t *tx) { fuid_domain_t searchnode, *findnode; avl_index_t loc; /* * If the dummy "nobody" domain then return an index of 0 * to cause the created FUID to be a standard POSIX id * for the user nobody. */ if (domain[0] == '\0') { *retdomain = ""; return (0); } searchnode.f_ksid = ksid_lookupdomain(domain); if (retdomain) { *retdomain = searchnode.f_ksid->kd_name; } if (!zfsvfs->z_fuid_loaded) zfs_fuid_init(zfsvfs, tx); rw_enter(&zfsvfs->z_fuid_lock, RW_READER); findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc); rw_exit(&zfsvfs->z_fuid_lock); if (findnode) { ksiddomain_rele(searchnode.f_ksid); return (findnode->f_idx); } else { fuid_domain_t *domnode; nvlist_t *nvp; nvlist_t **fuids; uint64_t retidx; size_t nvsize = 0; char *packed; dmu_buf_t *db; int i = 0; domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP); domnode->f_ksid = searchnode.f_ksid; rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1; avl_add(&zfsvfs->z_fuid_domain, domnode); avl_add(&zfsvfs->z_fuid_idx, domnode); /* * Now resync the on-disk nvlist. */ VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); domnode = avl_first(&zfsvfs->z_fuid_domain); fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP); while (domnode) { VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX, domnode->f_idx) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0); VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN, domnode->f_ksid->kd_name) == 0); domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode); } VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY, fuids, retidx) == 0); for (i = 0; i != retidx; i++) nvlist_free(fuids[i]); kmem_free(fuids, retidx * sizeof (void *)); VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0); packed = kmem_alloc(nvsize, KM_SLEEP); VERIFY(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); nvlist_free(nvp); zfsvfs->z_fuid_size = nvsize; dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, zfsvfs->z_fuid_size, packed, tx); kmem_free(packed, zfsvfs->z_fuid_size); VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = zfsvfs->z_fuid_size; dmu_buf_rele(db, FTAG); rw_exit(&zfsvfs->z_fuid_lock); return (retidx); } }
/* * The device associated with the given vdev (either by devid or physical path) * has been added to the system. If 'isdisk' is set, then we only attempt a * replacement if it's a whole disk. This also implies that we should label the * disk first. * * First, we attempt to online the device (making sure to undo any spare * operation when finished). If this succeeds, then we're done. If it fails, * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened, * but that the label was not what we expected. If the 'autoreplace' property * is enabled, then we relabel the disk (if specified), and attempt a 'zpool * replace'. If the online is successful, but the new state is something else * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of * race, and we should avoid attempting to relabel the disk. * * Also can arrive here from a ESC_ZFS_VDEV_CHECK event */ static void zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) { char *path; vdev_state_t newstate; nvlist_t *nvroot, *newvd; pendingdev_t *device; uint64_t wholedisk = 0ULL; uint64_t offline = 0ULL; uint64_t guid = 0ULL; char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL; char rawpath[PATH_MAX], fullpath[PATH_MAX]; char devpath[PATH_MAX]; int ret; int is_dm = 0; int is_sd = 0; uint_t c; vdev_stat_t *vs; if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0) return; /* Skip healthy disks */ verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); if (vs->vs_state == VDEV_STATE_HEALTHY) { zed_log_msg(LOG_INFO, "%s: %s is already healthy, skip it.", __func__, path); return; } (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath); (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &enc_sysfs_path); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid); if (offline) return; /* don't intervene if it was taken offline */ is_dm = zfs_dev_is_dm(path); zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'" " wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path, physpath ? physpath : "NULL", wholedisk, is_dm, (long long unsigned int)guid); /* * The VDEV guid is preferred for identification (gets passed in path) */ if (guid != 0) { (void) snprintf(fullpath, sizeof (fullpath), "%llu", (long long unsigned int)guid); } else { /* * otherwise use path sans partition suffix for whole disks */ (void) strlcpy(fullpath, path, sizeof (fullpath)); if (wholedisk) { char *spath = zfs_strip_partition(fullpath); if (!spath) { zed_log_msg(LOG_INFO, "%s: Can't alloc", __func__); return; } (void) strlcpy(fullpath, spath, sizeof (fullpath)); free(spath); } } /* * Attempt to online the device. */ if (zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 && (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED)) { zed_log_msg(LOG_INFO, " zpool_vdev_online: vdev %s is %s", fullpath, (newstate == VDEV_STATE_HEALTHY) ? "HEALTHY" : "DEGRADED"); return; } /* * vdev_id alias rule for using scsi_debug devices (FMA automated * testing) */ if (physpath != NULL && strcmp("scsidebug", physpath) == 0) is_sd = 1; /* * If the pool doesn't have the autoreplace property set, then use * vdev online to trigger a FMA fault by posting an ereport. */ if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) || !(wholedisk || is_dm) || (physpath == NULL)) { (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or " "not a whole disk for '%s'", fullpath); return; } /* * Convert physical path into its current device node. Rawpath * needs to be /dev/disk/by-vdev for a scsi_debug device since * /dev/disk/by-path will not be present. */ (void) snprintf(rawpath, sizeof (rawpath), "%s%s", is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath); if (realpath(rawpath, devpath) == NULL && !is_dm) { zed_log_msg(LOG_INFO, " realpath: %s failed (%s)", rawpath, strerror(errno)); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)", fullpath, libzfs_error_description(g_zfshdl)); return; } /* Only autoreplace bad disks */ if ((vs->vs_state != VDEV_STATE_DEGRADED) && (vs->vs_state != VDEV_STATE_FAULTED) && (vs->vs_state != VDEV_STATE_CANT_OPEN)) { return; } nvlist_lookup_string(vdev, "new_devid", &new_devid); if (is_dm) { /* Don't label device mapper or multipath disks. */ } else if (!labeled) { /* * we're auto-replacing a raw disk, so label it first */ char *leafname; /* * If this is a request to label a whole disk, then attempt to * write out the label. Before we can label the disk, we need * to map the physical string that was matched on to the under * lying device node. * * If any part of this process fails, then do a force online * to trigger a ZFS fault for the device (and any hot spare * replacement). */ leafname = strrchr(devpath, '/') + 1; /* * If this is a request to label a whole disk, then attempt to * write out the label. */ if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) { zed_log_msg(LOG_INFO, " zpool_label_disk: could not " "label '%s' (%s)", leafname, libzfs_error_description(g_zfshdl)); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; } /* * The disk labeling is asynchronous on Linux. Just record * this label request and return as there will be another * disk add event for the partition after the labeling is * completed. */ device = malloc(sizeof (pendingdev_t)); (void) strlcpy(device->pd_physpath, physpath, sizeof (device->pd_physpath)); list_insert_tail(&g_device_list, device); zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)", leafname, (u_longlong_t)guid); return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */ } else /* labeled */ { boolean_t found = B_FALSE; /* * match up with request above to label the disk */ for (device = list_head(&g_device_list); device != NULL; device = list_next(&g_device_list, device)) { if (strcmp(physpath, device->pd_physpath) == 0) { list_remove(&g_device_list, device); free(device); found = B_TRUE; break; } zed_log_msg(LOG_INFO, "zpool_label_disk: %s != %s", physpath, device->pd_physpath); } if (!found) { /* unexpected partition slice encountered */ zed_log_msg(LOG_INFO, "labeled disk %s unexpected here", fullpath); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; } zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)", physpath, (u_longlong_t)guid); (void) snprintf(devpath, sizeof (devpath), "%s%s", DEV_BYID_PATH, new_devid); } /* * Construct the root vdev to pass to zpool_vdev_attach(). While adding * the entire vdev structure is harmless, we construct a reduced set of * path/physpath/wholedisk to keep it simple. */ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory"); return; } if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory"); nvlist_free(nvroot); return; } if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 || (physpath != NULL && nvlist_add_string(newvd, ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) || (enc_sysfs_path != NULL && nvlist_add_string(newvd, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, enc_sysfs_path) != 0) || nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, 1) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: unable to add nvlist pairs"); nvlist_free(newvd); nvlist_free(nvroot); return; } nvlist_free(newvd); /* * Wait for udev to verify the links exist, then auto-replace * the leaf disk at same physical location. */ if (zpool_label_disk_wait(path, 3000) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement " "disk %s is missing", path); nvlist_free(nvroot); return; } ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE); zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)", fullpath, path, (ret == 0) ? "no errors" : libzfs_error_description(g_zfshdl)); nvlist_free(nvroot); }
/*ARGSUSED*/ static void spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) { spa_t *spa = arg1; history_arg_t *hap = arg2; const char *history_str = hap->ha_history_str; objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; spa_history_phys_t *shpp; size_t reclen; uint64_t le_len; nvlist_t *nvrecord; char *record_packed = NULL; int ret; /* * If we have an older pool that doesn't have a command * history object, create it now. */ mutex_enter(&spa->spa_history_lock); if (!spa->spa_history) spa_history_create_obj(spa, tx); mutex_exit(&spa->spa_history_lock); /* * Get the offset of where we need to write via the bonus buffer. * Update the offset when the write completes. */ VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); shpp = dbp->db_data; dmu_buf_will_dirty(dbp, tx); #ifdef ZFS_DEBUG { dmu_object_info_t doi; dmu_object_info_from_db(dbp, &doi); ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); } #endif VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, gethrestime_sec()) == 0); VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); if (hap->ha_zone != NULL) VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, hap->ha_zone) == 0); #ifdef _KERNEL VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, utsname.nodename) == 0); #endif if (hap->ha_log_type == LOG_CMD_POOL_CREATE || hap->ha_log_type == LOG_CMD_NORMAL) { VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, history_str) == 0); zfs_dbgmsg("command: %s", history_str); } else { VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, hap->ha_event) == 0); VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, tx->tx_txg) == 0); VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, history_str) == 0); zfs_dbgmsg("internal %s pool:%s txg:%llu %s", zfs_history_event_names[hap->ha_event], spa_name(spa), (longlong_t)tx->tx_txg, history_str); } VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); record_packed = kmem_alloc(reclen, KM_SLEEP); VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, NV_ENCODE_XDR, KM_SLEEP) == 0); mutex_enter(&spa->spa_history_lock); if (hap->ha_log_type == LOG_CMD_POOL_CREATE) VERIFY(shpp->sh_eof == shpp->sh_pool_create_len); /* write out the packed length as little endian */ le_len = LE_64((uint64_t)reclen); ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx); if (!ret) ret = spa_history_write(spa, record_packed, reclen, shpp, tx); if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { shpp->sh_pool_create_len += sizeof (le_len) + reclen; shpp->sh_bof = shpp->sh_pool_create_len; } mutex_exit(&spa->spa_history_lock); nvlist_free(nvrecord); kmem_free(record_packed, reclen); dmu_buf_rele(dbp, FTAG); strfree(hap->ha_history_str); if (hap->ha_zone != NULL) strfree(hap->ha_zone); kmem_free(hap, sizeof (history_arg_t)); }
/* * Convert our list of pools into the definitive set of configurations. We * start by picking the best config for each toplevel vdev. Once that's done, * we assemble the toplevel vdevs into a full config for the pool. We make a * pass to fix up any incorrect paths, and then add it to the main list to * return to the user. */ static nvlist_t * get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok, nvlist_t *policy) { pool_entry_t *pe; vdev_entry_t *ve; config_entry_t *ce; nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot; nvlist_t **spares, **l2cache; uint_t i, nspares, nl2cache; boolean_t config_seen; uint64_t best_txg; char *name, *hostname = NULL; uint64_t guid; uint_t children = 0; nvlist_t **child = NULL; uint_t holes; uint64_t *hole_array, max_id; uint_t c; boolean_t isactive; uint64_t hostid; nvlist_t *nvl; boolean_t valid_top_config = B_FALSE; if (nvlist_alloc(&ret, 0, 0) != 0) goto nomem; for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { uint64_t id, max_txg = 0; if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) goto nomem; config_seen = B_FALSE; /* * Iterate over all toplevel vdevs. Grab the pool configuration * from the first one we find, and then go through the rest and * add them as necessary to the 'vdevs' member of the config. */ for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { /* * Determine the best configuration for this vdev by * selecting the config with the latest transaction * group. */ best_txg = 0; for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { if (ce->ce_txg > best_txg) { tmp = ce->ce_config; best_txg = ce->ce_txg; } } /* * We rely on the fact that the max txg for the * pool will contain the most up-to-date information * about the valid top-levels in the vdev namespace. */ if (best_txg > max_txg) { (void) nvlist_remove(config, ZPOOL_CONFIG_VDEV_CHILDREN, DATA_TYPE_UINT64); (void) nvlist_remove(config, ZPOOL_CONFIG_HOLE_ARRAY, DATA_TYPE_UINT64_ARRAY); max_txg = best_txg; hole_array = NULL; holes = 0; max_id = 0; valid_top_config = B_FALSE; if (nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { verify(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, max_id) == 0); valid_top_config = B_TRUE; } if (nvlist_lookup_uint64_array(tmp, ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, &holes) == 0) { verify(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY, hole_array, holes) == 0); } } if (!config_seen) { /* * Copy the relevant pieces of data to the pool * configuration: * * version * pool guid * name * pool txg (if available) * comment (if available) * pool state * hostid (if available) * hostname (if available) */ uint64_t state, version, pool_txg; char *comment = NULL; version = fnvlist_lookup_uint64(tmp, ZPOOL_CONFIG_VERSION); fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, version); guid = fnvlist_lookup_uint64(tmp, ZPOOL_CONFIG_POOL_GUID); fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, guid); name = fnvlist_lookup_string(tmp, ZPOOL_CONFIG_POOL_NAME); fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, name); if (nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_POOL_TXG, &pool_txg) == 0) fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, pool_txg); if (nvlist_lookup_string(tmp, ZPOOL_CONFIG_COMMENT, &comment) == 0) fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, comment); state = fnvlist_lookup_uint64(tmp, ZPOOL_CONFIG_POOL_STATE); fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state); hostid = 0; if (nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_HOSTID, &hostid) == 0) { fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid); hostname = fnvlist_lookup_string(tmp, ZPOOL_CONFIG_HOSTNAME); fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, hostname); } config_seen = B_TRUE; } /* * Add this top-level vdev to the child array. */ verify(nvlist_lookup_nvlist(tmp, ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, &id) == 0); if (id >= children) { nvlist_t **newchild; newchild = zfs_alloc(hdl, (id + 1) * sizeof (nvlist_t *)); if (newchild == NULL) goto nomem; for (c = 0; c < children; c++) newchild[c] = child[c]; free(child); child = newchild; children = id + 1; } if (nvlist_dup(nvtop, &child[id], 0) != 0) goto nomem; } /* * If we have information about all the top-levels then * clean up the nvlist which we've constructed. This * means removing any extraneous devices that are * beyond the valid range or adding devices to the end * of our array which appear to be missing. */ if (valid_top_config) { if (max_id < children) { for (c = max_id; c < children; c++) nvlist_free(child[c]); children = max_id; } else if (max_id > children) { nvlist_t **newchild; newchild = zfs_alloc(hdl, (max_id) * sizeof (nvlist_t *)); if (newchild == NULL) goto nomem; for (c = 0; c < children; c++) newchild[c] = child[c]; free(child); child = newchild; children = max_id; } } verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); /* * The vdev namespace may contain holes as a result of * device removal. We must add them back into the vdev * tree before we process any missing devices. */ if (holes > 0) { ASSERT(valid_top_config); for (c = 0; c < children; c++) { nvlist_t *holey; if (child[c] != NULL || !vdev_is_hole(hole_array, holes, c)) continue; if (nvlist_alloc(&holey, NV_UNIQUE_NAME, 0) != 0) goto nomem; /* * Holes in the namespace are treated as * "hole" top-level vdevs and have a * special flag set on them. */ if (nvlist_add_string(holey, ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) != 0 || nvlist_add_uint64(holey, ZPOOL_CONFIG_ID, c) != 0 || nvlist_add_uint64(holey, ZPOOL_CONFIG_GUID, 0ULL) != 0) { nvlist_free(holey); goto nomem; } child[c] = holey; } } /* * Look for any missing top-level vdevs. If this is the case, * create a faked up 'missing' vdev as a placeholder. We cannot * simply compress the child array, because the kernel performs * certain checks to make sure the vdev IDs match their location * in the configuration. */ for (c = 0; c < children; c++) { if (child[c] == NULL) { nvlist_t *missing; if (nvlist_alloc(&missing, NV_UNIQUE_NAME, 0) != 0) goto nomem; if (nvlist_add_string(missing, ZPOOL_CONFIG_TYPE, VDEV_TYPE_MISSING) != 0 || nvlist_add_uint64(missing, ZPOOL_CONFIG_ID, c) != 0 || nvlist_add_uint64(missing, ZPOOL_CONFIG_GUID, 0ULL) != 0) { nvlist_free(missing); goto nomem; } child[c] = missing; } } /* * Put all of this pool's top-level vdevs into a root vdev. */ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) goto nomem; if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, child, children) != 0) { nvlist_free(nvroot); goto nomem; } for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); children = 0; child = NULL; /* * Go through and fix up any paths and/or devids based on our * known list of vdev GUID -> path mappings. */ if (fix_paths(nvroot, pl->names) != 0) { nvlist_free(nvroot); goto nomem; } /* * Add the root vdev to this pool's configuration. */ if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) != 0) { nvlist_free(nvroot); goto nomem; } nvlist_free(nvroot); /* * zdb uses this path to report on active pools that were * imported or created using -R. */ if (active_ok) goto add_pool; /* * Determine if this pool is currently active, in which case we * can't actually import it. */ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); if (pool_active(hdl, name, guid, &isactive) != 0) goto error; if (isactive) { nvlist_free(config); config = NULL; continue; } if (policy != NULL) { if (nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY, policy) != 0) goto nomem; } if ((nvl = refresh_config(hdl, config)) == NULL) { nvlist_free(config); config = NULL; continue; } nvlist_free(config); config = nvl; /* * Go through and update the paths for spares, now that we have * them. */ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { for (i = 0; i < nspares; i++) { if (fix_paths(spares[i], pl->names) != 0) goto nomem; } } /* * Update the paths for l2cache devices. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { for (i = 0; i < nl2cache; i++) { if (fix_paths(l2cache[i], pl->names) != 0) goto nomem; } } /* * Restore the original information read from the actual label. */ (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID, DATA_TYPE_UINT64); (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME, DATA_TYPE_STRING); if (hostid != 0) { verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid) == 0); verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, hostname) == 0); } add_pool: /* * Add this pool to the list of configs. */ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); if (nvlist_add_nvlist(ret, name, config) != 0) goto nomem; nvlist_free(config); config = NULL; } return (ret); nomem: (void) no_memory(hdl); error: nvlist_free(config); nvlist_free(ret); for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); return (NULL); }
/* Topo Methods */ static int mem_asru_compute(topo_mod_t *mod, tnode_t *node, topo_version_t version, nvlist_t *in, nvlist_t **out) { nvlist_t *asru, *pargs, *args, *hcsp; int err; char *serial = NULL, *label = NULL; uint64_t pa, offset; if (version > TOPO_METH_ASRU_COMPUTE_VERSION) return (topo_mod_seterrno(mod, EMOD_VER_NEW)); if (strcmp(topo_node_name(node), DIMM) != 0) return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL)); pargs = NULL; if (nvlist_lookup_nvlist(in, TOPO_PROP_PARGS, &pargs) == 0) (void) nvlist_lookup_string(pargs, FM_FMRI_HC_SERIAL_ID, &serial); if (serial == NULL && nvlist_lookup_nvlist(in, TOPO_PROP_ARGS, &args) == 0) (void) nvlist_lookup_string(args, FM_FMRI_HC_SERIAL_ID, &serial); (void) topo_node_label(node, &label, &err); asru = mem_fmri_create(mod, serial, label); if (label != NULL) topo_mod_strfree(mod, label); if (asru == NULL) return (topo_mod_seterrno(mod, EMOD_NOMEM)); err = 0; /* * For a memory page, 'in' includes an hc-specific member which * specifies physaddr and/or offset. Set them in asru as well. */ if (pargs && nvlist_lookup_nvlist(pargs, FM_FMRI_HC_SPECIFIC, &hcsp) == 0) { if (nvlist_lookup_uint64(hcsp, FM_FMRI_HC_SPECIFIC_PHYSADDR, &pa) == 0) err += nvlist_add_uint64(asru, FM_FMRI_MEM_PHYSADDR, pa); if (nvlist_lookup_uint64(hcsp, FM_FMRI_HC_SPECIFIC_OFFSET, &offset) == 0) err += nvlist_add_uint64(asru, FM_FMRI_MEM_OFFSET, offset); } if (err != 0 || topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) < 0) { nvlist_free(asru); return (topo_mod_seterrno(mod, EMOD_NOMEM)); } err = nvlist_add_string(*out, TOPO_PROP_VAL_NAME, TOPO_PROP_ASRU); err |= nvlist_add_uint32(*out, TOPO_PROP_VAL_TYPE, TOPO_TYPE_FMRI); err |= nvlist_add_nvlist(*out, TOPO_PROP_VAL_VAL, asru); nvlist_free(asru); if (err != 0) { nvlist_free(*out); *out = NULL; return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); } return (0); }
void fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val) { VERIFY0(nvlist_add_uint64(nvl, name, val)); }
/* * Generate the pool's configuration based on the current in-core state. * * We infer whether to generate a complete config or just one top-level config * based on whether vd is the root vdev. */ nvlist_t * spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) { nvlist_t *config, *nvroot; vdev_t *rvd = spa->spa_root_vdev; unsigned long hostid = 0; boolean_t locked = B_FALSE; uint64_t split_guid; if (vd == NULL) { vd = rvd; locked = B_TRUE; spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); } ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == (SCL_CONFIG | SCL_STATE)); /* * If txg is -1, report the current value of spa->spa_config_txg. */ if (txg == -1ULL) txg = spa->spa_config_txg; VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, spa_name(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); VERIFY(spa->spa_comment == NULL || nvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0); #ifdef _KERNEL hostid = zone_get_hostid(NULL); #else /* _KERNEL */ /* * We're emulating the system's hostid in userland, so we can't use * zone_get_hostid(). */ (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); #endif /* _KERNEL */ if (hostid != 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid) == 0); } VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname.nodename) == 0); if (vd != rvd) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, vd->vdev_top->vdev_guid) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 1ULL) == 0); if (vd->vdev_islog) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } else { /* * Only add the (potentially large) split information * in the mos config, and not in the vdev labels */ if (spa->spa_config_splitting != NULL) VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT, spa->spa_config_splitting) == 0); } /* * Add the top-level config. We even add this on pools which * don't support holes in the namespace. */ vdev_top_config_generate(spa, config); /* * If we're splitting, record the original pool's guid. */ if (spa->spa_config_splitting != NULL && nvlist_lookup_uint64(spa->spa_config_splitting, ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, split_guid) == 0); } nvroot = vdev_config_generate(spa, vd, getstats, 0); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); /* * Store what's necessary for reading the MOS in the label. */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, spa->spa_label_features) == 0); if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { ddt_histogram_t *ddh; ddt_stat_t *dds; ddt_object_t *ddo; ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddt_get_dedup_histogram(spa, ddh); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0); kmem_free(ddh, sizeof (ddt_histogram_t)); ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); ddt_get_dedup_object_stats(spa, ddo); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0); kmem_free(ddo, sizeof (ddt_object_t)); dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); ddt_get_dedup_stats(spa, dds); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0); kmem_free(dds, sizeof (ddt_stat_t)); } if (locked) spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); return (config); }
static int topo_add_bay(topo_hdl_t *thp, tnode_t *node, walk_diskmon_t *wdp) { diskmon_t *target_diskp = wdp->target; nvlist_t *nvlp = find_disk_monitor_private_pgroup(node); nvlist_t *prop_nvlp; nvpair_t *nvp = NULL; char *prop_name, *prop_value; #define PNAME_MAX 128 char pname[PNAME_MAX]; char msgbuf[MAX_CONF_MSG_LEN]; char *indicator_name, *indicator_action; char *indrule_states, *indrule_actions; int err = 0, i; conf_err_t conferr; boolean_t conf_failure = B_FALSE; char *unadj_physid = NULL; char physid[MAXPATHLEN]; char *label; nvlist_t *diskprops = NULL; char *cstr = NULL; indicator_t *indp = NULL; indrule_t *indrp = NULL; void *p; diskmon_t *diskp; void *ptr; /* No private properties -- just ignore the port */ if (nvlp == NULL) return (0); /* * Look for a diskmon based on this node's FMRI string. * Once a diskmon has been created, it's not re-created. This is * essential for the times when the tree-walk is called after a * disk is inserted (or removed) -- in that case, the disk node * handler simply updates the FRU information in the diskmon. */ if ((p = fmri2ptr(thp, node, &cstr, &err)) != NULL) { diskp = (diskmon_t *)p; /* * Delete the FRU information from the diskmon. If a disk * is connected, its FRU information will be refreshed by * the disk node code. */ if (diskp->frup && (target_diskp == NULL || diskp == target_diskp)) { dm_assert(pthread_mutex_lock(&diskp->fru_mutex) == 0); dmfru_free(diskp->frup); diskp->frup = NULL; dm_assert(pthread_mutex_unlock(&diskp->fru_mutex) == 0); } wdp->pfmri = cstr; nvlist_free(nvlp); return (0); } /* * Determine the physical path to the attachment point */ if (topo_prop_get_string(node, TOPO_PGROUP_IO, TOPO_IO_AP_PATH, &unadj_physid, &err) == 0) { adjust_dynamic_ap(unadj_physid, physid); topo_hdl_strfree(thp, unadj_physid); } else { /* unadj_physid cannot have been allocated */ if (cstr) dstrfree(cstr); nvlist_free(nvlp); return (-1); } /* */ /* * Process the properties. If we encounter a property that * is not an indicator name, action, or rule, add it to the * disk's props list. */ /* Process indicators */ i = 0; indicator_name = NULL; indicator_action = NULL; do { if (indicator_name != NULL && indicator_action != NULL) { if (topoprop_indicator_add(&indp, indicator_name, indicator_action) != 0) { conf_failure = B_TRUE; } topo_hdl_strfree(thp, indicator_name); topo_hdl_strfree(thp, indicator_action); } (void) snprintf(pname, PNAME_MAX, BAY_IND_NAME "-%d", i); if (topo_prop_get_string(node, DISK_MONITOR_PROPERTIES, pname, &indicator_name, &err) != 0) break; (void) snprintf(pname, PNAME_MAX, BAY_IND_ACTION "-%d", i); if (topo_prop_get_string(node, DISK_MONITOR_PROPERTIES, pname, &indicator_action, &err) != 0) break; i++; } while (!conf_failure && indicator_name != NULL && indicator_action != NULL); if (!conf_failure && indp != NULL && (conferr = check_inds(indp)) != E_NO_ERROR) { conf_error_msg(conferr, msgbuf, MAX_CONF_MSG_LEN, NULL); log_msg(MM_CONF, "%s: Not adding disk to list\n", msgbuf); conf_failure = B_TRUE; } /* Process state rules and indicator actions */ i = 0; indrule_states = NULL; indrule_actions = NULL; do { if (indrule_states != NULL && indrule_actions != NULL) { if (topoprop_indrule_add(&indrp, indrule_states, indrule_actions) != 0) { conf_failure = B_TRUE; } topo_hdl_strfree(thp, indrule_states); topo_hdl_strfree(thp, indrule_actions); } (void) snprintf(pname, PNAME_MAX, BAY_INDRULE_STATES "-%d", i); if (topo_prop_get_string(node, DISK_MONITOR_PROPERTIES, pname, &indrule_states, &err) != 0) break; (void) snprintf(pname, PNAME_MAX, BAY_INDRULE_ACTIONS "-%d", i); if (topo_prop_get_string(node, DISK_MONITOR_PROPERTIES, pname, &indrule_actions, &err) != 0) break; i++; } while (!conf_failure && indrule_states != NULL && indrule_actions != NULL); if (!conf_failure && indrp != NULL && indp != NULL && ((conferr = check_indrules(indrp, (state_transition_t **)&ptr)) != E_NO_ERROR || (conferr = check_consistent_ind_indrules(indp, indrp, (ind_action_t **)&ptr)) != E_NO_ERROR)) { conf_error_msg(conferr, msgbuf, MAX_CONF_MSG_LEN, ptr); log_msg(MM_CONF, "%s: Not adding disk to list\n", msgbuf); conf_failure = B_TRUE; } /* * Now collect miscellaneous properties. * Each property is stored as an embedded nvlist named * TOPO_PROP_VAL. The property name is stored in the value for * key=TOPO_PROP_VAL_NAME and the property's value is * stored in the value for key=TOPO_PROP_VAL_VAL. This is all * necessary so we can subtractively decode the properties that * we do not directly handle (so that these properties are added to * the per-disk properties nvlist), increasing flexibility. */ (void) nvlist_alloc(&diskprops, NV_UNIQUE_NAME, 0); while ((nvp = nvlist_next_nvpair(nvlp, nvp)) != NULL) { /* Only care about embedded nvlists named TOPO_PROP_VAL */ if (nvpair_type(nvp) != DATA_TYPE_NVLIST || strcmp(nvpair_name(nvp), TOPO_PROP_VAL) != 0 || nvpair_value_nvlist(nvp, &prop_nvlp) != 0) continue; if (nonunique_nvlist_lookup_string(prop_nvlp, TOPO_PROP_VAL_NAME, &prop_name) != 0) continue; /* Filter out indicator properties */ if (strstr(prop_name, BAY_IND_NAME) != NULL || strstr(prop_name, BAY_IND_ACTION) != NULL || strstr(prop_name, BAY_INDRULE_STATES) != NULL || strstr(prop_name, BAY_INDRULE_ACTIONS) != NULL) continue; if (nonunique_nvlist_lookup_string(prop_nvlp, TOPO_PROP_VAL_VAL, &prop_value) != 0) continue; /* Add the property to the disk's prop list: */ if (nvlist_add_string(diskprops, prop_name, prop_value) != 0) log_msg(MM_TOPO, "Could not add disk property `%s' with " "value `%s'\n", prop_name, prop_value); } nvlist_free(nvlp); if (cstr != NULL) { namevalpr_t nvpr; nvlist_t *dmap_nvl; nvpr.name = DISK_AP_PROP_APID; nvpr.value = strncmp(physid, "/devices", 8) == 0 ? (physid + 8) : physid; /* * Set the diskmon's location to the value in this port's label. * If there's a disk plugged in, the location will be updated * to be the disk label (e.g. HD_ID_00). Until a disk is * inserted, though, there won't be a disk libtopo node * created. */ /* Pass physid without the leading "/devices": */ dmap_nvl = namevalpr_to_nvlist(&nvpr); diskp = new_diskmon(dmap_nvl, indp, indrp, diskprops); if (topo_node_label(node, &label, &err) == 0) { diskp->location = dstrdup(label); topo_hdl_strfree(thp, label); } else diskp->location = dstrdup("unknown location"); if (!conf_failure && diskp != NULL) { /* Add this diskmon to the disk list */ cfgdata_add_diskmon(config_data, diskp); if (nvlist_add_uint64(g_topo2diskmon, cstr, (uint64_t)(uintptr_t)diskp) != 0) { log_msg(MM_TOPO, "Could not add pointer to nvlist " "for `%s'!\n", cstr); } } else if (diskp != NULL) { diskmon_free(diskp); } else { if (dmap_nvl) nvlist_free(dmap_nvl); if (indp) ind_free(indp); if (indrp) indrule_free(indrp); if (diskprops) nvlist_free(diskprops); } wdp->pfmri = cstr; } return (0); }
/* * Generate the pool's configuration based on the current in-core state. * * We infer whether to generate a complete config or just one top-level config * based on whether vd is the root vdev. */ nvlist_t * spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) { nvlist_t *config, *nvroot; vdev_t *rvd = spa->spa_root_vdev; unsigned long hostid = 0; boolean_t locked = B_FALSE; uint64_t split_guid; char *pool_name; if (vd == NULL) { vd = rvd; locked = B_TRUE; spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); } ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == (SCL_CONFIG | SCL_STATE)); /* * If txg is -1, report the current value of spa->spa_config_txg. */ if (txg == -1ULL) txg = spa->spa_config_txg; /* * Originally, users had to handle spa namespace collisions by either * exporting the already imported pool or by specifying a new name for * the pool with a conflicting name. In the case of root pools from * virtual guests, neither approach to collision resolution is * reasonable. This is addressed by extending the new name syntax with * an option to specify that the new name is temporary. When specified, * ZFS_IMPORT_TEMP_NAME will be set in spa->spa_import_flags to tell us * to use the previous name, which we do below. */ if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) { VERIFY0(nvlist_lookup_string(spa->spa_config, ZPOOL_CONFIG_POOL_NAME, &pool_name)); } else pool_name = spa_name(spa); VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, pool_name) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata) == 0); VERIFY(spa->spa_comment == NULL || nvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0); #ifdef _KERNEL hostid = zone_get_hostid(NULL); #else /* _KERNEL */ /* * We're emulating the system's hostid in userland, so we can't use * zone_get_hostid(). */ (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); #endif /* _KERNEL */ if (hostid != 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid) == 0); } VERIFY0(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname()->nodename)); if (vd != rvd) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, vd->vdev_top->vdev_guid) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 1ULL) == 0); if (vd->vdev_islog) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } else { /* * Only add the (potentially large) split information * in the mos config, and not in the vdev labels */ if (spa->spa_config_splitting != NULL) VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT, spa->spa_config_splitting) == 0); } /* * Add the top-level config. We even add this on pools which * don't support holes in the namespace. */ vdev_top_config_generate(spa, config); /* * If we're splitting, record the original pool's guid. */ if (spa->spa_config_splitting != NULL && nvlist_lookup_uint64(spa->spa_config_splitting, ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, split_guid) == 0); } nvroot = vdev_config_generate(spa, vd, getstats, 0); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); /* * Store what's necessary for reading the MOS in the label. */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, spa->spa_label_features) == 0); if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { ddt_histogram_t *ddh; ddt_stat_t *dds; ddt_object_t *ddo; ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddt_get_dedup_histogram(spa, ddh); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0); kmem_free(ddh, sizeof (ddt_histogram_t)); ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); ddt_get_dedup_object_stats(spa, ddo); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0); kmem_free(ddo, sizeof (ddt_object_t)); dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); ddt_get_dedup_stats(spa, dds); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0); kmem_free(dds, sizeof (ddt_stat_t)); } if (locked) spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); return (config); }
/* * sync out AVL trees to persistent storage. */ void zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx) { #ifdef HAVE_ZPL nvlist_t *nvp; nvlist_t **fuids; size_t nvsize = 0; char *packed; dmu_buf_t *db; fuid_domain_t *domnode; int numnodes; int i; if (!zfsvfs->z_fuid_dirty) { return; } rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); /* * First see if table needs to be created? */ if (zfsvfs->z_fuid_obj == 0) { zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os, DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE, sizeof (uint64_t), tx); VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, sizeof (uint64_t), 1, &zfsvfs->z_fuid_obj, tx) == 0); } VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); numnodes = avl_numnodes(&zfsvfs->z_fuid_idx); fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP); for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++, domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) { VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX, domnode->f_idx) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0); VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN, domnode->f_ksid->kd_name) == 0); } VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY, fuids, numnodes) == 0); for (i = 0; i != numnodes; i++) nvlist_free(fuids[i]); kmem_free(fuids, numnodes * sizeof (void *)); VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0); packed = kmem_alloc(nvsize, KM_SLEEP); VERIFY(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); nvlist_free(nvp); zfsvfs->z_fuid_size = nvsize; dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, zfsvfs->z_fuid_size, packed, tx); kmem_free(packed, zfsvfs->z_fuid_size); VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = zfsvfs->z_fuid_size; dmu_buf_rele(db, FTAG); zfsvfs->z_fuid_dirty = B_FALSE; rw_exit(&zfsvfs->z_fuid_lock); #endif /* HAVE_ZPL */ }