Ejemplo n.º 1
0
/*ARGSUSED*/
static void
disklights_topo(fmd_hdl_t *hdl, topo_hdl_t *thp)
{
	disk_lights_t *dl = fmd_hdl_getspecific(hdl);

	dl_trigger_enum(dl);
}
Ejemplo n.º 2
0
/*ARGSUSED*/
static void
sp_timeout(fmd_hdl_t *hdl, id_t id, void *data)
{
	sp_monitor_t *smp = fmd_hdl_getspecific(hdl);
	uint32_t seconds, generation;

	if (ipmi_sunoem_uptime(smp->sm_hdl, &seconds, &generation) != 0) {
		/*
		 * Ignore uptime failures.  We will generate the appropriate
		 * event when it comes back online.
		 */
		fmd_hdl_debug(hdl, "failed to get uptime: %s",
		    ipmi_errmsg(smp->sm_hdl));
	} else {
		/*
		 * We want to catch cases where the generation number is
		 * explicitly reset, or when the SP configuration is reset after
		 * a reboot (and the generation number is 0).  We also post a
		 * sysevent when the module initially loads, since we can't be
		 * sure if we missed a SP reset or not.
		 */
		if (seconds < smp->sm_seconds ||
		    generation != smp->sm_generation ||
		    smp->sm_seconds == 0)
			sp_post_sysevent(hdl);

		smp->sm_seconds = seconds;
		smp->sm_generation = generation;
	}

	(void) fmd_timer_install(hdl, NULL, NULL, smp->sm_interval);
}
Ejemplo n.º 3
0
void
_fmd_fini(fmd_hdl_t *hdl)
{
	sp_monitor_t *smp = fmd_hdl_getspecific(hdl);

	if (smp) {
		ipmi_close(smp->sm_hdl);
		fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t));
	}
}
Ejemplo n.º 4
0
/*ARGSUSED*/
static void
disklights_timeout(fmd_hdl_t *hdl, id_t id, void *data)
{
	disk_lights_t *dl = fmd_hdl_getspecific(hdl);

	dl->dl_triggered = B_FALSE;

	dl_examine_topo(dl);

	/*
	 * Install the long-interval timer for the next poll.
	 */
	dl->dl_timer = fmd_timer_install(hdl, NULL, NULL, dl->dl_poll_interval);
}
Ejemplo n.º 5
0
static void
zfs_purge_cases(fmd_hdl_t *hdl)
{
	zfs_case_t *zcp;
	uu_list_walk_t *walk;
	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);

	/*
	 * There is no way to open a pool by GUID, or lookup a vdev by GUID.  No
	 * matter what we do, we're going to have to stomach an O(vdevs * cases)
	 * algorithm.  In reality, both quantities are likely so small that
	 * neither will matter. Given that iterating over pools is more
	 * expensive than iterating over the in-memory case list, we opt for a
	 * 'present' flag in each case that starts off cleared.  We then iterate
	 * over all pools, marking those that are still present, and removing
	 * those that aren't found.
	 *
	 * Note that we could also construct an FMRI and rely on
	 * fmd_nvl_fmri_present(), but this would end up doing the same search.
	 */

	/*
	 * Mark the cases as not present.
	 */
	for (zcp = uu_list_first(zfs_cases); zcp != NULL;
	    zcp = uu_list_next(zfs_cases, zcp))
		zcp->zc_present = B_FALSE;

	/*
	 * Iterate over all pools and mark the pools and vdevs found.  If this
	 * fails (most probably because we're out of memory), then don't close
	 * any of the cases and we cannot be sure they are accurate.
	 */
	if (zpool_iter(zhdl, zfs_mark_pool, NULL) != 0)
		return;

	/*
	 * Remove those cases which were not found.
	 */
	walk = uu_list_walk_start(zfs_cases, UU_WALK_ROBUST);
	while ((zcp = uu_list_walk_next(walk)) != NULL) {
		if (!zcp->zc_present)
			fmd_case_close(hdl, zcp->zc_case);
	}
	uu_list_walk_end(walk);
}
Ejemplo n.º 6
0
static void
sp_post_sysevent(fmd_hdl_t *hdl)
{
	sp_monitor_t *smp = fmd_hdl_getspecific(hdl);
	sysevent_id_t eid;

	fmd_hdl_debug(hdl, "SP reset detected, posting sysevent");

	if (sysevent_post_event(EC_PLATFORM, ESC_PLATFORM_SP_RESET,
	    SUNW_VENDOR, "fmd", NULL, &eid) != 0) {
		fmd_hdl_debug(hdl, "failed to send sysevent: %s",
		    strerror(errno));
		/*
		 * We reset the seconds and generation so that the next time
		 * through we will try to post the sysevent again.
		 */
		smp->sm_seconds = -1U;
		smp->sm_generation = -1U;
	}
}
Ejemplo n.º 7
0
 *
 * The fmdo_recv entry point.  See which sub de/response agents have a
 * matching subscription and callback for the first match from each.
 * The sub de/response agents should dispatch *all* their subscriptions
 * via their registered dispatch table, including things like list.repaired.
 */
void
sw_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
{
	struct sw_modspecific *msinfo;
	int calls = 0;
	int mod;

	BUMPSTAT(sw_recv_total);

	msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl);

	/*
	 * For each sub module that has a matching class pattern call the
	 * registered callback for that sub DE.  Only one match per sub module
	 * is allowed (the first match in its table, others are not checked).
	 */
	for (mod = 0; mod < msinfo->swms_dispcnt; mod++) {
		const struct sw_disp *dp;
		sw_dispfunc_t *dispf = NULL;

		for (dp = (*msinfo->swms_disptbl)[mod];
		    dp != NULL && dp->swd_classpat != NULL; dp++) {
			if (fmd_nvl_class_match(hdl, nvl, dp->swd_classpat)) {
				dispf = dp->swd_func;
				break;
Ejemplo n.º 8
0
/*ARGSUSED*/
static void
disklights_topo(fmd_hdl_t *hdl, topo_hdl_t *thp)
{
	disk_lights_t *dl = fmd_hdl_getspecific(hdl);

	dl_trigger_enum(dl);
}

/*ARGSUSED*/
static void
disklights_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
    const char *class)
{
	disk_lights_t *dl = fmd_hdl_getspecific(hdl);

	dl_trigger_enum(dl);
}

void
_fmd_init(fmd_hdl_t *hdl)
{
	disk_lights_t *dl;

	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
		return;

	dl = fmd_hdl_zalloc(hdl, sizeof (*dl), FMD_SLEEP);
	fmd_hdl_setspecific(hdl, dl);
Ejemplo n.º 9
0
/*ARGSUSED*/
static void
zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
    const char *class)
{
	uint64_t pool_guid, vdev_guid;
	char *dev_name;
	zpool_handle_t *zhp;
	nvlist_t *resource, *config, *nvroot;
	nvlist_t *vdev;
	nvlist_t **spares, **faults;
	uint_t s, nspares, f, nfaults;
	nvlist_t *replacement;
	find_cbdata_t cb;
	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);

	/*
	 * Get information from the fault.
	 */
	if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
	    &faults, &nfaults) != 0)
		return;

	for (f = 0; f < nfaults; f++) {
		if (nvlist_lookup_nvlist(faults[f], FM_FAULT_RESOURCE,
		    &resource) != 0 ||
		    nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL,
		    &pool_guid) != 0 ||
		    nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV,
		    &vdev_guid) != 0)
Ejemplo n.º 10
0
/*
 * Solve a given ZFS case.  This first checks to make sure the diagnosis is
 * still valid, as well as cleaning up any pending timer associated with the
 * case.
 */
static void
zfs_case_solve(fmd_hdl_t *hdl, zfs_case_t *zcp, const char *faultname,
    boolean_t checkunusable)
{
	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
	nvlist_t *detector, *fault;
	boolean_t serialize;
	nvlist_t *fmri, *fru;
	topo_hdl_t *thp;
	int err;

	/*
	 * Construct the detector from the case data.  The detector is in the
	 * ZFS scheme, and is either the pool or the vdev, depending on whether
	 * this is a vdev or pool fault.
	 */
	detector = fmd_nvl_alloc(hdl, FMD_SLEEP);

	(void) nvlist_add_uint8(detector, FM_VERSION, ZFS_SCHEME_VERSION0);
	(void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS);
	(void) nvlist_add_uint64(detector, FM_FMRI_ZFS_POOL,
	    zcp->zc_data.zc_pool_guid);
	if (zcp->zc_data.zc_vdev_guid != 0) {
		(void) nvlist_add_uint64(detector, FM_FMRI_ZFS_VDEV,
		    zcp->zc_data.zc_vdev_guid);
	}

	/*
	 * We also want to make sure that the detector (pool or vdev) properly
	 * reflects the diagnosed state, when the fault corresponds to internal
	 * ZFS state (i.e. not checksum or I/O error-induced).  Otherwise, a
	 * device which was unavailable early in boot (because the driver/file
	 * wasn't available) and is now healthy will be mis-diagnosed.
	 */
	if (!fmd_nvl_fmri_present(hdl, detector) ||
	    (checkunusable && !fmd_nvl_fmri_unusable(hdl, detector))) {
		fmd_case_close(hdl, zcp->zc_case);
		nvlist_free(detector);
		return;
	}


	fru = NULL;
	if (zcp->zc_fru != NULL &&
	    (thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) != NULL) {
		/*
		 * If the vdev had an associated FRU, then get the FRU nvlist
		 * from the topo handle and use that in the suspect list.  We
		 * explicitly lookup the FRU because the fmri reported from the
		 * kernel may not have up to date details about the disk itself
		 * (serial, part, etc).
		 */
		if (topo_fmri_str2nvl(thp, zcp->zc_fru, &fmri, &err) == 0) {
			/*
			 * If the disk is part of the system chassis, but the
			 * FRU indicates a different chassis ID than our
			 * current system, then ignore the error.  This
			 * indicates that the device was part of another
			 * cluster head, and for obvious reasons cannot be
			 * imported on this system.
			 */
			if (libzfs_fru_notself(zhdl, zcp->zc_fru)) {
				fmd_case_close(hdl, zcp->zc_case);
				nvlist_free(fmri);
				fmd_hdl_topo_rele(hdl, thp);
				nvlist_free(detector);
				return;
			}

			/*
			 * If the device is no longer present on the system, or
			 * topo_fmri_fru() fails for other reasons, then fall
			 * back to the fmri specified in the vdev.
			 */
			if (topo_fmri_fru(thp, fmri, &fru, &err) != 0)
				fru = fmd_nvl_dup(hdl, fmri, FMD_SLEEP);
			nvlist_free(fmri);
		}

		fmd_hdl_topo_rele(hdl, thp);
	}

	fault = fmd_nvl_create_fault(hdl, faultname, 100, detector,
	    fru, detector);
	fmd_case_add_suspect(hdl, zcp->zc_case, fault);

	nvlist_free(fru);

	fmd_case_solve(hdl, zcp->zc_case);

	serialize = B_FALSE;
	if (zcp->zc_data.zc_has_remove_timer) {
		fmd_timer_remove(hdl, zcp->zc_remove_timer);
		zcp->zc_data.zc_has_remove_timer = 0;
		serialize = B_TRUE;
	}
	if (serialize)
		zfs_case_serialize(hdl, zcp);

	nvlist_free(detector);
}