Esempio n. 1
0
void
fmd_rpc_fini(void)
{
	rpcprog_t prog;

	svc_exit(); /* force svc_run() threads to exit */

	(void) fmd_conf_getprop(fmd.d_conf, "rpc.adm.prog", &prog);
	svc_unreg(prog, FMD_ADM_VERSION_1);

	(void) fmd_conf_getprop(fmd.d_conf, "rpc.api.prog", &prog);
	svc_unreg(prog, FMD_API_VERSION_1);
}
Esempio n. 2
0
/*
 * Look up the diagcode for this case and cache it in ci_code.  If no suspects
 * were defined for this case or if the lookup fails, the event dictionary or
 * module code is broken, and we set the event code to a precomputed default.
 */
static const char *
fmd_case_mkcode(fmd_case_t *cp)
{
	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
	fmd_case_susp_t *cis;

	char **keys, **keyp;
	const char *s;

	ASSERT(MUTEX_HELD(&cip->ci_lock));
	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);

	fmd_free(cip->ci_code, cip->ci_codelen);
	cip->ci_codelen = cip->ci_mod->mod_codelen;
	cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
	keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));

	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
		if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
			keyp++;
	}

	*keyp = NULL; /* mark end of keys[] array for libdiagcode */

	if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
	    cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
		(void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
		fmd_free(cip->ci_code, cip->ci_codelen);
		cip->ci_codelen = strlen(s) + 1;
		cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
		(void) strcpy(cip->ci_code, s);
	}

	return (cip->ci_code);
}
Esempio n. 3
0
fmd_tracebuf_t *
fmd_trace_create(void)
{
	fmd_tracebuf_t *tbp = fmd_zalloc(sizeof (fmd_tracebuf_t), FMD_SLEEP);
	size_t bufsize;

	(void) fmd_conf_getprop(fmd.d_conf, "trace.frames", &tbp->tb_frames);
	(void) fmd_conf_getprop(fmd.d_conf, "trace.recs", &tbp->tb_recs);

	/*
	 * We require 8-byte alignment of fmd_tracerec_t to store hrtime_t's.
	 * Since the trailing flexible array member is of type uintptr_t, we
	 * may need to allocate an additional element if we are compiling
	 * 32-bit; otherwise uintptr_t is 8 bytes so any value of tb_frames is
	 * acceptable.
	 *
	 * tb_frames includes the first element, whose size is reflected in
	 * sizeof (fmd_tracerec_t).  Therefore, if fmd_tracerec_t's size is
	 * 0 mod 8, we must be sure the total number of frames is odd.
	 * Otherwise, we need at least one extra frame, so the total count
	 * must be even.  This will continue to work even if the sizes or
	 * types of other fmd_tracerec_t members are changed.
	 */
#ifdef _ILP32
	/*CONSTCOND*/
	if (sizeof (fmd_tracerec_t) % sizeof (hrtime_t) == 0)
		tbp->tb_frames = (tbp->tb_frames & ~1UL) + 1;
	else
		tbp->tb_frames = P2ROUNDUP(tbp->tb_frames, 2);
#endif
	tbp->tb_size = sizeof (fmd_tracerec_t) +
	    sizeof (uintptr_t) * (MAX(tbp->tb_frames, 1) - 1);

	bufsize = tbp->tb_size * tbp->tb_recs;

	tbp->tb_buf = fmd_zalloc(bufsize, FMD_SLEEP);
	tbp->tb_end = (void *)((uintptr_t)tbp->tb_buf + bufsize - tbp->tb_size);
	tbp->tb_ptr = tbp->tb_buf;

	return (tbp);
}
Esempio n. 4
0
void
fmd_rpc_init(void)
{
	int err, prog, mode = RPC_SVC_MT_USER;
	uint64_t sndsize = 0, rcvsize = 0;
	const char *s;

	if (rpc_control(RPC_SVC_MTMODE_SET, &mode) == FALSE)
		fmd_panic("failed to enable user-MT rpc mode");

	(void) fmd_conf_getprop(fmd.d_conf, "rpc.sndsize", &sndsize);
	(void) fmd_conf_getprop(fmd.d_conf, "rpc.rcvsize", &rcvsize);

	/*
	 * Infer whether we are the "default" fault manager or an alternate one
	 * based on whether the initial setting of rpc.adm.prog is non-zero.
	 */
	(void) fmd_conf_getprop(fmd.d_conf, "rpc.adm.prog", &prog);
	(void) fmd_conf_getprop(fmd.d_conf, "rpc.adm.path", &s);

	if (prog != 0) {
		err = fmd_rpc_svc_init(fmd_adm_1, "FMD_ADM", s, "rpc.adm.prog",
		    FMD_ADM, FMD_ADM, FMD_ADM_VERSION_1,
		    (uint_t)sndsize, (uint_t)rcvsize, TRUE);
	} else {
		err = fmd_rpc_svc_init(fmd_adm_1, "FMD_ADM", s, "rpc.adm.prog",
		    RPC_TRANS_MIN, RPC_TRANS_MAX, FMD_ADM_VERSION_1,
		    (uint_t)sndsize, (uint_t)rcvsize, FALSE);
	}

	if (err != 0)
		fmd_error(EFMD_EXIT, "failed to create rpc server bindings");

	if (fmd_thread_create(fmd.d_rmod, (fmd_thread_f *)svc_run, 0) == NULL)
		fmd_error(EFMD_EXIT, "failed to create rpc server thread");
}
Esempio n. 5
0
/*ARGSUSED*/
static void
fmd_gc(fmd_t *dp, id_t id, hrtime_t hrt)
{
	hrtime_t delta;

	if (id != 0) {
		TRACE((FMD_DBG_MOD, "garbage collect start"));
		fmd_modhash_apply(dp->d_mod_hash, fmd_module_gc);
		TRACE((FMD_DBG_MOD, "garbage collect end"));

		(void) pthread_rwlock_rdlock(&dp->d_log_lock);
		fmd_log_update(dp->d_errlog);
		(void) pthread_rwlock_unlock(&dp->d_log_lock);
	}

	(void) fmd_conf_getprop(dp->d_conf, "gc_interval", &delta);
	(void) fmd_timerq_install(dp->d_timers, dp->d_rmod->mod_timerids,
	    (fmd_timer_f *)fmd_gc, dp, NULL, delta);
}
Esempio n. 6
0
static int
fmd_ckpt_create(fmd_ckpt_t *ckp, fmd_module_t *mp)
{
	const char *dir = mp->mod_ckpt;
	const char *name = mp->mod_name;
	mode_t mode;

	bzero(ckp, sizeof (fmd_ckpt_t));
	ckp->ckp_mp = mp;

	ckp->ckp_size = sizeof (fcf_hdr_t);
	ckp->ckp_strn = 1; /* for \0 */

	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s+", dir, name);
	(void) snprintf(ckp->ckp_dst, PATH_MAX, "%s/%s", dir, name);

	(void) unlink(ckp->ckp_src);
	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.mode", &mode);
	ckp->ckp_fd = open64(ckp->ckp_src, O_WRONLY | O_CREAT | O_EXCL, mode);

	return (ckp->ckp_fd);
}
Esempio n. 7
0
void
fmd_run(fmd_t *dp, int pfd)
{
	char *nodc_key[] = { FMD_FLT_NODC, NULL };
	char nodc_str[128];
	struct sigaction act;

	int status = FMD_EXIT_SUCCESS;
	const char *name;
	fmd_conf_path_t *pap;
	fmd_event_t *e;
	int dbout, err;

	/*
	 * Cache all the current debug property settings in d_fmd_debug,
	 * d_fmd_dbout, d_hdl_debug, and d_hdl_dbout.  If a given debug mask
	 * is non-zero and the corresponding dbout mask is zero, set dbout
	 * to a sensible default value based on whether we have daemonized.
	 */
	(void) fmd_conf_getprop(dp->d_conf, "dbout", &dbout);

	if (dp->d_fmd_debug != 0 && dbout == 0)
		dp->d_fmd_dbout = dp->d_fg? FMD_DBOUT_STDERR : FMD_DBOUT_SYSLOG;
	else
		dp->d_fmd_dbout = dbout;

	(void) fmd_conf_getprop(dp->d_conf, "client.debug", &dp->d_hdl_debug);
	(void) fmd_conf_getprop(dp->d_conf, "client.dbout", &dbout);

	if (dp->d_hdl_debug != 0 && dbout == 0)
		dp->d_hdl_dbout = dp->d_fg? FMD_DBOUT_STDERR : FMD_DBOUT_SYSLOG;
	else
		dp->d_hdl_dbout = dbout;

	/*
	 * Initialize remaining major program data structures such as the
	 * clock, dispatch queues, log files, module hash collections, etc.
	 * This work is done here rather than in fmd_create() to permit the -o
	 * command-line option to modify properties after fmd_create() is done.
	 */
	name = dp->d_rootdir != NULL &&
	    *dp->d_rootdir != '\0' ? dp->d_rootdir : NULL;

	if ((dp->d_topo = topo_open(TOPO_VERSION, name, &err)) == NULL) {
		fmd_error(EFMD_EXIT, "failed to initialize "
		    "topology library: %s\n", topo_strerror(err));
	}

	dp->d_clockptr = dp->d_clockops->fto_init();
	dp->d_xprt_ids = fmd_idspace_create("xprt_ids", 1, INT_MAX);
	fmd_xprt_suspend_all();

	(void) door_server_create(fmd_door);
	fmd_dr_init();

	dp->d_rmod->mod_timerids = fmd_idspace_create(dp->d_pname, 1, 16);
	dp->d_timers = fmd_timerq_create();
	dp->d_disp = fmd_dispq_create();
	dp->d_cases = fmd_case_hash_create();

	/*
	 * The root module's mod_queue is created with limit zero, making it
	 * act like /dev/null; anything inserted here is simply ignored.
	 */
	dp->d_rmod->mod_queue = fmd_eventq_create(dp->d_rmod,
	    &dp->d_rmod->mod_stats->ms_evqstat, &dp->d_rmod->mod_stats_lock, 0);

	/*
	 * Once our subsystems that use signals have been set up, install the
	 * signal handler for the fmd_thr_signal() API.  Verify that the signal
	 * being used for this purpose doesn't conflict with something else.
	 */
	(void) fmd_conf_getprop(dp->d_conf, "client.thrsig", &dp->d_thr_sig);

	if (sigaction(dp->d_thr_sig, NULL, &act) != 0) {
		fmd_error(EFMD_EXIT, "invalid signal selected for "
		    "client.thrsig property: %d\n", dp->d_thr_sig);
	}

	if (act.sa_handler != SIG_IGN && act.sa_handler != SIG_DFL) {
		fmd_error(EFMD_EXIT, "signal selected for client.thrsig "
		    "property is already in use: %d\n", dp->d_thr_sig);
	}

	act.sa_handler = fmd_signal;
	act.sa_flags = 0;

	(void) sigemptyset(&act.sa_mask);
	(void) sigaction(dp->d_thr_sig, &act, NULL);

	(void) fmd_conf_getprop(dp->d_conf, "schemedir", &name);
	dp->d_schemes = fmd_scheme_hash_create(dp->d_rootdir, name);

	(void) fmd_conf_getprop(dp->d_conf, "log.rsrc", &name);
	dp->d_asrus = fmd_asru_hash_create(dp->d_rootdir, name);

	(void) fmd_conf_getprop(dp->d_conf, "log.error", &name);
	dp->d_errlog = fmd_log_open(dp->d_rootdir, name, FMD_LOG_ERROR);

	(void) fmd_conf_getprop(dp->d_conf, "log.fault", &name);
	dp->d_fltlog = fmd_log_open(dp->d_rootdir, name, FMD_LOG_FAULT);

	if (dp->d_asrus == NULL || dp->d_errlog == NULL || dp->d_fltlog == NULL)
		fmd_error(EFMD_EXIT, "failed to initialize log files\n");

	/*
	 * Before loading modules, create an empty control event which will act
	 * as a global barrier for module event processing.  Each module we
	 * load successfully will insert it at their head of their event queue,
	 * and then pause inside of fmd_ctl_rele() after dequeuing the event.
	 * This module barrier is required for two reasons:
	 *
	 * (a) During module loading, the restoration of case checkpoints may
	 *    result in a list.* event being recreated for which the intended
	 *    subscriber has not yet loaded depending on the load order. Such
	 *    events could then result in spurious "no subscriber" errors.
	 *
	 * (b) During errlog replay, a sequence of errors from a long time ago
	 *    may be replayed, and the module may attempt to install relative
	 *    timers associated with one or more of these events.  If errlog
	 *    replay were "racing" with active module threads, an event E1
	 *    that resulted in a relative timer T at time E1 + N nsec could
	 *    fire prior to an event E2 being enqueued, even if the relative
	 *    time ordering was E1 < E2 < E1 + N, causing mis-diagnosis.
	 */
	dp->d_mod_event = e = fmd_event_create(FMD_EVT_CTL,
	    FMD_HRT_NOW, NULL, fmd_ctl_init(NULL));

	fmd_event_hold(e);

	/*
	 * Once all data structures are initialized, we load all of our modules
	 * in order according to class in order to load up any subscriptions.
	 * Once built-in modules are loaded, we detach from our waiting parent.
	 */
	dp->d_mod_hash = fmd_modhash_create();

	if (fmd_builtin_loadall(dp->d_mod_hash) != 0 && !dp->d_fg)
		fmd_error(EFMD_EXIT, "failed to initialize fault manager\n");

	(void) fmd_conf_getprop(dp->d_conf, "self.name", &name);
	dp->d_self = fmd_modhash_lookup(dp->d_mod_hash, name);

	if (dp->d_self != NULL && fmd_module_dc_key2code(dp->d_self,
	    nodc_key, nodc_str, sizeof (nodc_str)) == 0)
		(void) fmd_conf_setprop(dp->d_conf, "nodiagcode", nodc_str);

	fmd_rpc_init();
	dp->d_running = 1; /* we are now officially an active fmd */

	/*
	 * Now that we're running, if a pipe fd was specified, write an exit
	 * status to it to indicate that our parent process can safely detach.
	 * Then proceed to loading the remaining non-built-in modules.
	 */
	if (pfd >= 0)
		(void) write(pfd, &status, sizeof (status));

	/*
	 * Before loading all modules, repopulate the ASRU cache from its
	 * persistent repository on disk.  Then during module loading, the
	 * restoration of checkpoint files will reparent any active cases.
	 */
	fmd_asru_hash_refresh(dp->d_asrus);

	(void) fmd_conf_getprop(dp->d_conf, "plugin.path", &pap);
	fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_rtld_ops, ".so");

	(void) fmd_conf_getprop(dp->d_conf, "agent.path", &pap);
	fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_proc_ops, NULL);

	/*
	 * With all modules loaded, replay fault events from the ASRU cache for
	 * any ASRUs that must be retired, replay error events from the errlog
	 * that did not finish processing the last time ran, and then release
	 * the global module barrier by executing a final rele on d_mod_event.
	 */
	fmd_asru_hash_replay(dp->d_asrus);

	(void) pthread_rwlock_rdlock(&dp->d_log_lock);
	fmd_log_replay(dp->d_errlog, (fmd_log_f *)fmd_err_replay, dp);
	fmd_log_update(dp->d_errlog);
	(void) pthread_rwlock_unlock(&dp->d_log_lock);

	dp->d_mod_event = NULL;
	fmd_event_rele(e);

	/*
	 * Finally, awaken any threads associated with receiving events from
	 * open transports and tell them to proceed with fmd_xprt_recv().
	 */
	fmd_xprt_resume_all();
	fmd_gc(dp, 0, 0);

	dp->d_booted = 1;
}
Esempio n. 8
0
void
fmd_destroy(fmd_t *dp)
{
	fmd_module_t *mp;
	fmd_case_t *cp;
	int core;

	(void) fmd_conf_getprop(fmd.d_conf, "core", &core);

	fmd_rpc_fini();
	fmd_dr_fini();

	if (dp->d_xprt_ids != NULL)
		fmd_xprt_suspend_all();

	/*
	 * Unload the self-diagnosis module first.  This ensures that it does
	 * not get confused as we start unloading other modules, etc.  We must
	 * hold the dispq lock as a writer while doing so since it uses d_self.
	 */
	if (dp->d_self != NULL) {
		fmd_module_t *self;

		(void) pthread_rwlock_wrlock(&dp->d_disp->dq_lock);
		self = dp->d_self;
		dp->d_self = NULL;
		(void) pthread_rwlock_unlock(&dp->d_disp->dq_lock);

		fmd_module_unload(self);
		fmd_module_rele(self);
	}

	/*
	 * Unload modules in reverse order *except* for the root module, which
	 * is first in the list.  This allows it to keep its thread and trace.
	 */
	for (mp = fmd_list_prev(&dp->d_mod_list); mp != dp->d_rmod; ) {
		fmd_module_unload(mp);
		mp = fmd_list_prev(mp);
	}

	if (dp->d_mod_hash != NULL) {
		fmd_modhash_destroy(dp->d_mod_hash);
		dp->d_mod_hash = NULL;
	}

	/*
	 * Close both log files now that modules are no longer active.  We must
	 * set these pointers to NULL in case any subsequent errors occur.
	 */
	if (dp->d_errlog != NULL) {
		fmd_log_rele(dp->d_errlog);
		dp->d_errlog = NULL;
	}

	if (dp->d_fltlog != NULL) {
		fmd_log_rele(dp->d_fltlog);
		dp->d_fltlog = NULL;
	}

	/*
	 * Now destroy the resource cache: each ASRU contains a case reference,
	 * which may in turn contain a pointer to a referenced owning module.
	 */
	if (dp->d_asrus != NULL) {
		fmd_asru_hash_destroy(dp->d_asrus);
		dp->d_asrus = NULL;
	}

	/*
	 * Now that all data structures that refer to modules are torn down,
	 * no modules should be remaining on the module list except for d_rmod.
	 * If we trip one of these assertions, we're missing a rele somewhere.
	 */
	ASSERT(fmd_list_prev(&dp->d_mod_list) == dp->d_rmod);
	ASSERT(fmd_list_next(&dp->d_mod_list) == dp->d_rmod);

	/*
	 * Now destroy the root module.  We clear its thread key first so any
	 * calls to fmd_trace() inside of the module code will be ignored.
	 */
	(void) pthread_setspecific(dp->d_key, NULL);
	fmd_module_lock(dp->d_rmod);

	while ((cp = fmd_list_next(&dp->d_rmod->mod_cases)) != NULL)
		fmd_case_discard(cp);

	fmd_module_unlock(dp->d_rmod);
	fmd_free(dp->d_rmod->mod_stats, sizeof (fmd_modstat_t));
	dp->d_rmod->mod_stats = NULL;

	(void) pthread_mutex_lock(&dp->d_rmod->mod_lock);
	dp->d_rmod->mod_flags |= FMD_MOD_FINI;
	(void) pthread_mutex_unlock(&dp->d_rmod->mod_lock);

	fmd_module_rele(dp->d_rmod);
	ASSERT(fmd_list_next(&dp->d_mod_list) == NULL);

	/*
	 * Now destroy the remaining global data structures.  If 'core' was
	 * set to true, force a core dump so we can check for memory leaks.
	 */
	if (dp->d_cases != NULL)
		fmd_case_hash_destroy(dp->d_cases);
	if (dp->d_disp != NULL)
		fmd_dispq_destroy(dp->d_disp);
	if (dp->d_timers != NULL)
		fmd_timerq_destroy(dp->d_timers);
	if (dp->d_schemes != NULL)
		fmd_scheme_hash_destroy(dp->d_schemes);
	if (dp->d_xprt_ids != NULL)
		fmd_idspace_destroy(dp->d_xprt_ids);

	if (dp->d_errstats != NULL) {
		fmd_free(dp->d_errstats,
		    sizeof (fmd_stat_t) * (EFMD_END - EFMD_UNKNOWN));
	}

	if (dp->d_conf != NULL)
		fmd_conf_close(dp->d_conf);

	if (dp->d_topo != NULL)
		topo_close(dp->d_topo);

	nvlist_free(dp->d_auth);
	(void) nv_alloc_fini(&dp->d_nva);
	dp->d_clockops->fto_fini(dp->d_clockptr);

	(void) pthread_key_delete(dp->d_key);
	bzero(dp, sizeof (fmd_t));

	if (core)
		fmd_panic("forcing core dump at user request\n");
}
Esempio n. 9
0
void
fmd_create(fmd_t *dp, const char *arg0, const char *root, const char *conf)
{
	fmd_conf_path_t *pap;
	char file[PATH_MAX];
	const char *name;
	fmd_stat_t *sp;
	int i;

	smbios_hdl_t *shp;
	smbios_system_t s1;
	smbios_info_t s2;
	id_t id;

	di_prom_handle_t promh = DI_PROM_HANDLE_NIL;
	di_node_t rooth = DI_NODE_NIL;
	char *bufp;

	(void) sysinfo(SI_PLATFORM, _fmd_plat, sizeof (_fmd_plat));
	(void) sysinfo(SI_ARCHITECTURE, _fmd_isa, sizeof (_fmd_isa));
	(void) uname(&_fmd_uts);

	if ((shp = smbios_open(NULL, SMB_VERSION, 0, NULL)) != NULL) {
		if ((id = smbios_info_system(shp, &s1)) != SMB_ERR &&
		    smbios_info_common(shp, id, &s2) != SMB_ERR) {
			(void) strlcpy(_fmd_prod, s2.smbi_product, MAXNAMELEN);
			(void) strlcpy(_fmd_csn, s2.smbi_serial, MAXNAMELEN);
		}
		smbios_close(shp);
	} else if ((rooth = di_init("/", DINFOPROP)) != DI_NODE_NIL &&
	    (promh = di_prom_init()) != DI_PROM_HANDLE_NIL) {
		if (di_prom_prop_lookup_bytes(promh, rooth, "chassis-sn",
		    (unsigned char **)&bufp) != -1) {
			(void) strlcpy(_fmd_csn, bufp, MAXNAMELEN);
		}
	}

	if (promh != DI_PROM_HANDLE_NIL)
		di_prom_fini(promh);
	if (rooth != DI_NODE_NIL)
		di_fini(rooth);

	bzero(dp, sizeof (fmd_t));

	dp->d_version = _fmd_version;
	dp->d_pname = fmd_strbasename(arg0);
	dp->d_pid = getpid();

	if (pthread_key_create(&dp->d_key, NULL) != 0)
		fmd_error(EFMD_EXIT, "failed to create pthread key");

	(void) pthread_mutex_init(&dp->d_xprt_lock, NULL);
	(void) pthread_mutex_init(&dp->d_err_lock, NULL);
	(void) pthread_mutex_init(&dp->d_thr_lock, NULL);
	(void) pthread_mutex_init(&dp->d_mod_lock, NULL);
	(void) pthread_mutex_init(&dp->d_stats_lock, NULL);
	(void) pthread_rwlock_init(&dp->d_log_lock, NULL);

	/*
	 * A small number of properties must be set manually before we open
	 * the root configuration file.  These include any settings for our
	 * memory allocator and path expansion token values, because these
	 * values are needed by the routines in fmd_conf.c itself.  After
	 * the root configuration file is processed, we reset these properties
	 * based upon the latest values from the configuration file.
	 */
	dp->d_alloc_msecs = 10;
	dp->d_alloc_tries = 3;
	dp->d_str_buckets = 211;

	dp->d_rootdir = root ? root : "";
	dp->d_platform = _fmd_plat;
	dp->d_machine = _fmd_uts.machine;
	dp->d_isaname = _fmd_isa;

	dp->d_conf = fmd_conf_open(conf, sizeof (_fmd_conf) /
	    sizeof (_fmd_conf[0]), _fmd_conf, FMD_CONF_DEFER);

	if (dp->d_conf == NULL) {
		fmd_error(EFMD_EXIT,
		    "failed to load required configuration properties\n");
	}

	(void) fmd_conf_getprop(dp->d_conf, "alloc.msecs", &dp->d_alloc_msecs);
	(void) fmd_conf_getprop(dp->d_conf, "alloc.tries", &dp->d_alloc_tries);
	(void) fmd_conf_getprop(dp->d_conf, "strbuckets", &dp->d_str_buckets);

	(void) fmd_conf_getprop(dp->d_conf, "platform", &dp->d_platform);
	(void) fmd_conf_getprop(dp->d_conf, "machine", &dp->d_machine);
	(void) fmd_conf_getprop(dp->d_conf, "isaname", &dp->d_isaname);

	/*
	 * Manually specified rootdirs override config files, so only update
	 * d_rootdir based on the config files we parsed if no 'root' was set.
	 */
	if (root == NULL)
		(void) fmd_conf_getprop(dp->d_conf, "rootdir", &dp->d_rootdir);
	else
		(void) fmd_conf_setprop(dp->d_conf, "rootdir", dp->d_rootdir);

	/*
	 * Once the base conf file properties are loaded, lookup the values
	 * of $conf_path and $conf_file and merge in any other conf files.
	 */
	(void) fmd_conf_getprop(dp->d_conf, "conf_path", &pap);
	(void) fmd_conf_getprop(dp->d_conf, "conf_file", &name);

	for (i = 0; i < pap->cpa_argc; i++) {
		(void) snprintf(file, sizeof (file),
		    "%s/%s", pap->cpa_argv[i], name);
		if (access(file, F_OK) == 0)
			fmd_conf_merge(dp->d_conf, file);
	}

	/*
	 * Update the value of fmd.d_fg based on "fg".  We cache this property
	 * because it must be accessed deep within fmd at fmd_verror() time.
	 * Update any other properties that must be cached for performance.
	 */
	(void) fmd_conf_getprop(fmd.d_conf, "fg", &fmd.d_fg);
	(void) fmd_conf_getprop(fmd.d_conf, "xprt.ttl", &fmd.d_xprt_ttl);

	/*
	 * Initialize our custom libnvpair allocator and create an nvlist for
	 * authority elements corresponding to this instance of the daemon.
	 */
	(void) nv_alloc_init(&dp->d_nva, &fmd_nv_alloc_ops);
	dp->d_auth = fmd_protocol_authority();

	/*
	 * The fmd_module_t for the root module must be created manually.  Most
	 * of it remains unused and zero, except for the few things we fill in.
	 */
	dp->d_rmod = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP);
	dp->d_rmod->mod_name = fmd_strdup(dp->d_pname, FMD_SLEEP);
	dp->d_rmod->mod_fmri = fmd_protocol_fmri_module(dp->d_rmod);

	fmd_list_append(&dp->d_mod_list, dp->d_rmod);
	fmd_module_hold(dp->d_rmod);

	(void) pthread_mutex_init(&dp->d_rmod->mod_lock, NULL);
	(void) pthread_cond_init(&dp->d_rmod->mod_cv, NULL);
	(void) pthread_mutex_init(&dp->d_rmod->mod_stats_lock, NULL);

	dp->d_rmod->mod_thread = fmd_thread_xcreate(dp->d_rmod, pthread_self());
	dp->d_rmod->mod_stats = fmd_zalloc(sizeof (fmd_modstat_t), FMD_SLEEP);
	dp->d_rmod->mod_ustat = fmd_ustat_create();

	if (pthread_setspecific(dp->d_key, dp->d_rmod->mod_thread) != 0)
		fmd_error(EFMD_EXIT, "failed to attach main thread key");

	if ((dp->d_stats = (fmd_statistics_t *)fmd_ustat_insert(
	    dp->d_rmod->mod_ustat, FMD_USTAT_NOALLOC, sizeof (_fmd_stats) /
	    sizeof (fmd_stat_t), (fmd_stat_t *)&_fmd_stats, NULL)) == NULL)
		fmd_error(EFMD_EXIT, "failed to initialize statistics");

	(void) pthread_mutex_lock(&dp->d_rmod->mod_lock);
	dp->d_rmod->mod_flags |= FMD_MOD_INIT;
	(void) pthread_mutex_unlock(&dp->d_rmod->mod_lock);

	/*
	 * In addition to inserting the _fmd_stats collection of program-wide
	 * statistics, we also insert a statistic named after each of our
	 * errors and update these counts in fmd_verror() (see fmd_subr.c).
	 */
	dp->d_errstats = sp = fmd_zalloc(sizeof (fmd_stat_t) *
	    (EFMD_END - EFMD_UNKNOWN), FMD_SLEEP);

	for (i = 0; i < EFMD_END - EFMD_UNKNOWN; i++, sp++) {
		(void) snprintf(sp->fmds_name, sizeof (sp->fmds_name), "err.%s",
		    strrchr(fmd_errclass(EFMD_UNKNOWN + i), '.') + 1);
		sp->fmds_type = FMD_TYPE_UINT64;
	}

	(void) fmd_ustat_insert(dp->d_rmod->mod_ustat, FMD_USTAT_NOALLOC,
	    EFMD_END - EFMD_UNKNOWN, dp->d_errstats, NULL);
}
Esempio n. 10
0
void
fmd_ckpt_save(fmd_module_t *mp)
{
	struct stat64 st;
	char path[PATH_MAX];
	mode_t dirmode;

	hrtime_t now = gethrtime();
	fmd_ckpt_t ckp;
	int err;

	ASSERT(fmd_module_locked(mp));

	/*
	 * If checkpointing is disabled for the module, just return.  We must
	 * commit the module state anyway to transition pending log events.
	 */
	if (mp->mod_stats->ms_ckpt_save.fmds_value.b == FMD_B_FALSE) {
		fmd_module_commit(mp);
		return;
	}

	if (!(mp->mod_flags & (FMD_MOD_MDIRTY | FMD_MOD_CDIRTY)))
		return; /* no checkpoint is necessary for this module */

	TRACE((FMD_DBG_CKPT, "ckpt save begin %s %llu",
	    mp->mod_name, mp->mod_gen + 1));

	/*
	 * If the per-module checkpoint directory isn't found or isn't of type
	 * directory, move aside whatever is there (if anything) and attempt
	 * to mkdir(2) a new module checkpoint directory.  If this fails, we
	 * have no choice but to abort the checkpoint and try again later.
	 */
	if (stat64(mp->mod_ckpt, &st) != 0 || !S_ISDIR(st.st_mode)) {
		(void) snprintf(path, sizeof (path), "%s-", mp->mod_ckpt);
		(void) rename(mp->mod_ckpt, path);
		(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dirmode", &dirmode);

		if (mkdir(mp->mod_ckpt, dirmode) != 0) {
			fmd_error(EFMD_CKPT_MKDIR,
			    "failed to mkdir %s", mp->mod_ckpt);
			return; /* return without clearing dirty bits */
		}
	}

	/*
	 * Create a temporary file to write out the checkpoint into, and create
	 * a fmd_ckpt_t structure to manage construction of the checkpoint.  We
	 * then figure out how much space will be required, and allocate it.
	 */
	if (fmd_ckpt_create(&ckp, mp) == -1) {
		fmd_error(EFMD_CKPT_CREATE, "failed to create %s", ckp.ckp_src);
		return;
	}

	fmd_ckpt_resv_module(&ckp, mp);

	if (fmd_ckpt_alloc(&ckp, mp->mod_gen + 1) != 0) {
		fmd_error(EFMD_CKPT_NOMEM, "failed to build %s", ckp.ckp_src);
		fmd_ckpt_destroy(&ckp);
		return;
	}

	/*
	 * Fill in the checkpoint content, write it to disk, sync it, and then
	 * atomically rename it to the destination path.  If this fails, we
	 * have no choice but to leave all our dirty bits set and return.
	 */
	fmd_ckpt_save_module(&ckp, mp);
	err = fmd_ckpt_commit(&ckp);
	fmd_ckpt_destroy(&ckp);

	if (err != 0) {
		fmd_error(EFMD_CKPT_COMMIT, "failed to commit %s", ckp.ckp_dst);
		return; /* return without clearing dirty bits */
	}

	fmd_module_commit(mp);
	TRACE((FMD_DBG_CKPT, "ckpt save end %s", mp->mod_name));

	mp->mod_stats->ms_ckpt_cnt.fmds_value.ui64++;
	mp->mod_stats->ms_ckpt_time.fmds_value.ui64 += gethrtime() - now;

	fmd_dprintf(FMD_DBG_CKPT, "saved checkpoint of %s (%llu)\n",
	    mp->mod_name, mp->mod_gen);
}