Exemple #1
0
/*
 * void method_remove_contract()
 *   Remove any non-permanent contracts from internal structures and
 *   the repository, then abandon them.
 *   Returns
 *     0 - success
 *     ECANCELED - inst was deleted from the repository
 *
 *   If the repository connection was broken, it is rebound.
 */
void
method_remove_contract(restarter_inst_t *inst, boolean_t primary,
    boolean_t abandon)
{
	ctid_t * const ctidp = primary ? &inst->ri_i.i_primary_ctid :
	    &inst->ri_i.i_transient_ctid;

	int r;

	assert(*ctidp != 0);

	log_framework(LOG_DEBUG, "Removing %s contract %lu for %s.\n",
	    primary ? "primary" : "transient", *ctidp, inst->ri_i.i_fmri);

	if (abandon)
		contract_abandon(*ctidp);

again:
	if (inst->ri_mi_deleted) {
		r = ECANCELED;
		goto out;
	}

	r = restarter_remove_contract(inst->ri_m_inst, *ctidp, primary ?
	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
	switch (r) {
	case 0:
		break;

	case ECANCELED:
		inst->ri_mi_deleted = B_TRUE;
		break;

	case ECONNABORTED:
		libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst));
		/* FALLTHROUGH */

	case EBADF:
		libscf_reget_instance(inst);
		goto again;

	case ENOMEM:
	case EPERM:
	case EACCES:
	case EROFS:
		log_error(LOG_INFO, "%s: Couldn't remove contract id %ld: "
		    "%s.\n", inst->ri_i.i_fmri, *ctidp, strerror(r));
		break;

	case EINVAL:
	default:
		bad_error("restarter_remove_contract", r);
	}

out:
	if (primary)
		contract_hash_remove(*ctidp);

	*ctidp = 0;
}
Exemple #2
0
static void
write_status(restarter_inst_t *inst, const char *mname, int stat)
{
	int r;

again:
	if (inst->ri_mi_deleted)
		return;

	r = libscf_write_method_status(inst->ri_m_inst, mname, stat);
	switch (r) {
	case 0:
		break;

	case ECONNABORTED:
		libscf_reget_instance(inst);
		goto again;

	case ECANCELED:
		inst->ri_mi_deleted = 1;
		break;

	case EPERM:
	case EACCES:
	case EROFS:
		log_framework(LOG_INFO, "Could not write exit status "
		    "for %s method of %s: %s.\n", mname,
		    inst->ri_i.i_fmri, strerror(r));
		break;

	case ENAMETOOLONG:
	default:
		bad_error("libscf_write_method_status", r);
	}
}
Exemple #3
0
/*
 * void utmpx_clear_old(void)
 *   At boot and only at boot, truncate the utmpx file.
 *
 */
void
utmpx_clear_old(void)
{
	int fd;
	mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;

	if (!st->st_initial || utmpx_truncated)
		return;

	MUTEX_LOCK(&utmpx_lock);

	if ((fd = open(_UTMPX_FILE,
	    O_WRONLY | O_CREAT | O_TRUNC, mode)) != -1) {
		(void) fchmod(fd, mode); /* force mode regardless of umask() */
		(void) fchown(fd, 0, 2); /* force owner to root/bin */
		(void) close(fd);
	} else {
		log_framework(LOG_NOTICE, "Unable to create %s: %s\n",
		    _UTMPX_FILE, strerror(errno));
	}

	utmpx_truncated = 1;

	MUTEX_UNLOCK(&utmpx_lock);
}
Exemple #4
0
void
utmpx_mark_dead(pid_t pid, int status, boolean_t blocking)
{
	struct utmpx *up;
	int logged = 0;

	for (;;) {
		int found = 0;

		MUTEX_LOCK(&utmpx_lock);
		setutxent();

		while (up = getutxent()) {
			if (up->ut_pid == pid) {
				found = 1;

				if (up->ut_type == DEAD_PROCESS) {
					/*
					 * Cleaned up elsewhere.
					 */
					endutxent();
					MUTEX_UNLOCK(&utmpx_lock);
					return;
				}

				up->ut_type = DEAD_PROCESS;
				up->ut_exit.e_termination = WTERMSIG(status);
				up->ut_exit.e_exit = WEXITSTATUS(status);
				(void) time(&up->ut_tv.tv_sec);

				if (pututxline(up) != NULL) {
					/*
					 * Now attempt to add to the end of the
					 * wtmp and wtmpx files.  Do not create
					 * if they don't already exist.
					 */
					updwtmpx(WTMPX_FILE, up);
					endutxent();
					MUTEX_UNLOCK(&utmpx_lock);

					return;
				}
			}
		}

		endutxent();
		MUTEX_UNLOCK(&utmpx_lock);

		if (!found || !blocking)
			return;

		if (!logged) {
			log_framework(LOG_INFO, "retrying utmpx_dead on PID "
			    "%ld\n", pid);
			logged++;
		}

		(void) sleep(1);
	}
}
Exemple #5
0
/*
 * int wait_register(pid_t, char *, int, int)
 *   wait_register is called after we have called fork(2), and know which pid we
 *   wish to monitor.  However, since the child may have already exited by the
 *   time we are called, we must handle the error cases from open(2)
 *   appropriately.  The am_parent flag is recorded to handle waitpid(2)
 *   behaviour on removal; similarly, the direct flag is passed through to a
 *   potential call to wait_remove() to govern its behaviour in different
 *   contexts.
 *
 *   Returns 0 if registration successful, 1 if child pid did not exist, and -1
 *   if a different error occurred.
 */
int
wait_register(pid_t pid, const char *inst_fmri, int am_parent, int direct)
{
	char *fname = uu_msprintf("/proc/%ld/psinfo", pid);
	int fd;
	wait_info_t *wi;

	assert(pid != 0);

	if (fname == NULL)
		return (-1);

	wi = startd_alloc(sizeof (wait_info_t));

	uu_list_node_init(wi, &wi->wi_link, wait_info_pool);

	wi->wi_fd = -1;
	wi->wi_pid = pid;
	wi->wi_fmri = inst_fmri;
	wi->wi_parent = am_parent;
	wi->wi_ignore = 0;

	MUTEX_LOCK(&wait_info_lock);
	(void) uu_list_insert_before(wait_info_list, NULL, wi);
	MUTEX_UNLOCK(&wait_info_lock);

	if ((fd = open(fname, O_RDONLY)) == -1) {
		if (errno == ENOENT) {
			/*
			 * Child has already exited.
			 */
			wait_remove(wi, direct);
			uu_free(fname);
			return (1);
		} else {
			log_error(LOG_WARNING,
			    "open %s failed; not monitoring %s: %s\n", fname,
			    inst_fmri, strerror(errno));
			uu_free(fname);
			return (-1);
		}
	}

	uu_free(fname);

	wi->wi_fd = fd;

	if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) {
		log_error(LOG_WARNING,
		    "initial port_association of %d / %s failed: %s\n", fd,
		    inst_fmri, strerror(errno));
		return (-1);
	}

	log_framework(LOG_DEBUG, "monitoring PID %ld on fd %d (%s)\n", pid, fd,
	    inst_fmri);

	return (0);
}
Exemple #6
0
/*
 * The method thread executes a service method to effect a state transition.
 * The next_state of info->sf_id should be non-_NONE on entrance, and it will
 * be _NONE on exit (state will either be what next_state was (on success), or
 * it will be _MAINT (on error)).
 *
 * There are six classes of methods to consider: start & other (stop, refresh)
 * for each of "normal" services, wait services, and transient services.  For
 * each, the method must be fetched from the repository & executed.  fork()ed
 * methods must be waited on, except for the start method of wait services
 * (which must be registered with the wait subsystem via wait_register()).  If
 * the method succeeded (returned 0), then for start methods its contract
 * should be recorded as the primary contract for the service.  For other
 * methods, it should be abandoned.  If the method fails, then depending on
 * the failure, either the method should be reexecuted or the service should
 * be put into maintenance.  Either way the contract should be abandoned.
 */
void *
method_thread(void *arg)
{
	fork_info_t *info = arg;
	restarter_inst_t *inst;
	scf_handle_t	*local_handle;
	scf_instance_t	*s_inst = NULL;
	int r, exit_code;
	boolean_t retryable;
	const char *aux;

	assert(0 <= info->sf_method_type && info->sf_method_type <= 2);

	/* Get (and lock) the restarter_inst_t. */
	inst = inst_lookup_by_id(info->sf_id);

	assert(inst->ri_method_thread != 0);
	assert(instance_in_transition(inst) == 1);

	/*
	 * We cannot leave this function with inst in transition, because
	 * protocol.c withholds messages for inst otherwise.
	 */

	log_framework(LOG_DEBUG, "method_thread() running %s method for %s.\n",
	    method_names[info->sf_method_type], inst->ri_i.i_fmri);

	local_handle = libscf_handle_create_bound_loop();

rebind_retry:
	/* get scf_instance_t */
	switch (r = libscf_fmri_get_instance(local_handle, inst->ri_i.i_fmri,
	    &s_inst)) {
	case 0:
		break;

	case ECONNABORTED:
		libscf_handle_rebind(local_handle);
		goto rebind_retry;

	case ENOENT:
		/*
		 * It's not there, but we need to call this so protocol.c
		 * doesn't think it's in transition anymore.
		 */
		(void) restarter_instance_update_states(local_handle, inst,
		    inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE,
		    NULL);
		goto out;

	case EINVAL:
	case ENOTSUP:
	default:
		bad_error("libscf_fmri_get_instance", r);
	}

	inst->ri_m_inst = s_inst;
	inst->ri_mi_deleted = B_FALSE;

retry:
	if (info->sf_method_type == METHOD_START)
		log_transition(inst, START_REQUESTED);

	r = method_run(&inst, info->sf_method_type, &exit_code);

	if (r == 0 && exit_code == 0) {
		/* Success! */
		assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);

		/*
		 * When a stop method succeeds, remove the primary contract of
		 * the service, unless we're going to offline, in which case
		 * retain the contract so we can transfer inherited contracts to
		 * the replacement service.
		 */

		if (info->sf_method_type == METHOD_STOP &&
		    inst->ri_i.i_primary_ctid != 0) {
			if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE)
				inst->ri_i.i_primary_ctid_stopped = 1;
			else
				method_remove_contract(inst, B_TRUE, B_TRUE);
		}
		/*
		 * We don't care whether the handle was rebound because this is
		 * the last thing we do with it.
		 */
		(void) restarter_instance_update_states(local_handle, inst,
		    inst->ri_i.i_next_state, RESTARTER_STATE_NONE,
		    info->sf_event_type, NULL);

		(void) update_fault_count(inst, FAULT_COUNT_RESET);

		goto out;
	}

	/* Failure.  Retry or go to maintenance. */

	if (r != 0 && r != EAGAIN) {
		retryable = B_FALSE;
	} else {
		switch (exit_code) {
		case SMF_EXIT_ERR_CONFIG:
		case SMF_EXIT_ERR_NOSMF:
		case SMF_EXIT_ERR_PERM:
		case SMF_EXIT_ERR_FATAL:
			retryable = B_FALSE;
			break;

		default:
			retryable = B_TRUE;
		}
	}

	if (retryable && update_fault_count(inst, FAULT_COUNT_INCR) != 1)
		goto retry;

	/* maintenance */
	if (r == ELOOP)
		log_transition(inst, START_FAILED_REPEATEDLY);
	else if (r == ERANGE)
		log_transition(inst, START_FAILED_TIMEOUT_FATAL);
	else if (exit_code == SMF_EXIT_ERR_CONFIG)
		log_transition(inst, START_FAILED_CONFIGURATION);
	else if (exit_code == SMF_EXIT_ERR_FATAL)
		log_transition(inst, START_FAILED_FATAL);
	else
		log_transition(inst, START_FAILED_OTHER);

	if (r == ELOOP)
		aux = "restarting_too_quickly";
	else if (retryable)
		aux = "fault_threshold_reached";
	else
		aux = "method_failed";

	(void) restarter_instance_update_states(local_handle, inst,
	    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_FAULT,
	    (char *)aux);

	if (!method_is_transient(inst, info->sf_method_type) &&
	    inst->ri_i.i_primary_ctid != 0)
		method_remove_contract(inst, B_TRUE, B_TRUE);

out:
	inst->ri_method_thread = 0;
	MUTEX_UNLOCK(&inst->ri_lock);
	(void) pthread_cond_broadcast(&inst->ri_method_cv);

	scf_instance_destroy(s_inst);
	scf_handle_destroy(local_handle);
	startd_free(info, sizeof (fork_info_t));
	return (NULL);
}
Exemple #7
0
/*
 * int method_run()
 *   Execute the type method of instp.  If it requires a fork(), wait for it
 *   to return and return its exit code in *exit_code.  Otherwise set
 *   *exit_code to 0 if the method succeeds & -1 if it fails.  If the
 *   repository connection is broken, it is rebound, but inst may not be
 *   reset.
 *   Returns
 *     0 - success
 *     EINVAL - A correct method or method context couldn't be retrieved.
 *     EIO - Contract kill failed.
 *     EFAULT - Method couldn't be executed successfully.
 *     ELOOP - Retry threshold exceeded.
 *     ECANCELED - inst was deleted from the repository before method was run
 *     ERANGE - Timeout retry threshold exceeded.
 *     EAGAIN - Failed due to external cause, retry.
 */
int
method_run(restarter_inst_t **instp, int type, int *exit_code)
{
	char *method;
	int ret_status;
	pid_t pid;
	method_restart_t restart_on;
	uint_t cte_mask;
	uint8_t need_session;
	scf_handle_t *h;
	scf_snapshot_t *snap;
	const char *mname;
	const char *errstr;
	struct method_context *mcp;
	int result = 0, timeout_fired = 0;
	int sig, r;
	boolean_t transient;
	uint64_t timeout;
	uint8_t timeout_retry;
	ctid_t ctid;
	int ctfd = -1;
	ct_evthdl_t ctev;
	uint_t evtype;
	restarter_inst_t *inst = *instp;
	int id = inst->ri_id;
	int forkerr;

	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
	assert(instance_in_transition(inst));

	if (inst->ri_mi_deleted)
		return (ECANCELED);

	*exit_code = 0;

	assert(0 <= type && type <= 2);
	mname = method_names[type];

	if (type == METHOD_START)
		inst->ri_pre_online_hook();

	h = scf_instance_handle(inst->ri_m_inst);

	snap = scf_snapshot_create(h);
	if (snap == NULL ||
	    scf_instance_get_snapshot(inst->ri_m_inst, "running", snap) != 0) {
		log_framework(LOG_DEBUG,
		    "Could not get running snapshot for %s.  "
		    "Using editing version to run method %s.\n",
		    inst->ri_i.i_fmri, mname);
		scf_snapshot_destroy(snap);
		snap = NULL;
	}

	/*
	 * After this point, we may be logging to the instance log.
	 * Make sure we've noted where that log is as a property of
	 * the instance.
	 */
	r = libscf_note_method_log(inst->ri_m_inst, st->st_log_prefix,
	    inst->ri_logstem);
	if (r != 0) {
		log_framework(LOG_WARNING,
		    "%s: couldn't note log location: %s\n",
		    inst->ri_i.i_fmri, strerror(r));
	}

	if ((method = libscf_get_method(h, type, inst, snap, &restart_on,
	    &cte_mask, &need_session, &timeout, &timeout_retry)) == NULL) {
		if (errno == LIBSCF_PGROUP_ABSENT)  {
			log_framework(LOG_DEBUG,
			    "%s: instance has no method property group '%s'.\n",
			    inst->ri_i.i_fmri, mname);
			if (type == METHOD_REFRESH)
				log_instance(inst, B_TRUE, "No '%s' method "
				    "defined.  Treating as :true.", mname);
			else
				log_instance(inst, B_TRUE, "Method property "
				    "group '%s' is not present.", mname);
			scf_snapshot_destroy(snap);
			return (0);
		} else if (errno == LIBSCF_PROPERTY_ABSENT)  {
			log_framework(LOG_DEBUG,
			    "%s: instance has no '%s/exec' method property.\n",
			    inst->ri_i.i_fmri, mname);
			log_instance(inst, B_TRUE, "Method property '%s/exec "
			    "is not present.", mname);
			scf_snapshot_destroy(snap);
			return (0);
		} else {
			log_error(LOG_WARNING,
			    "%s: instance libscf_get_method failed\n",
			    inst->ri_i.i_fmri);
			scf_snapshot_destroy(snap);
			return (EINVAL);
		}
	}

	/* open service contract if stopping a non-transient service */
	if (type == METHOD_STOP && (!instance_is_transient_style(inst))) {
		if (inst->ri_i.i_primary_ctid == 0) {
			/* service is not running, nothing to stop */
			log_framework(LOG_DEBUG, "%s: instance has no primary "
			    "contract, no service to stop.\n",
			    inst->ri_i.i_fmri);
			scf_snapshot_destroy(snap);
			return (0);
		}
		if ((ctfd = contract_open(inst->ri_i.i_primary_ctid, "process",
		    "events", O_RDONLY)) < 0) {
			result = EFAULT;
			log_instance(inst, B_TRUE, "Could not open service "
			    "contract %ld.  Stop method not run.\n",
			    inst->ri_i.i_primary_ctid);
			goto out;
		}
	}

	if (restarter_is_null_method(method)) {
		log_framework(LOG_DEBUG, "%s: null method succeeds\n",
		    inst->ri_i.i_fmri);

		log_instance(inst, B_TRUE, "Executing %s method (null)", mname);

		if (type == METHOD_START)
			write_status(inst, mname, 0);
		goto out;
	}

	sig = restarter_is_kill_method(method);
	if (sig >= 0) {

		if (inst->ri_i.i_primary_ctid == 0) {
			log_error(LOG_ERR, "%s: :kill with no contract\n",
			    inst->ri_i.i_fmri);
			result = EINVAL;
			goto out;
		}

		log_framework(LOG_DEBUG,
		    "%s: :killing contract with signal %d\n",
		    inst->ri_i.i_fmri, sig);

		log_instance(inst, B_TRUE, "Executing %s method (:kill)",
		    mname);

		if (contract_kill(inst->ri_i.i_primary_ctid, sig,
		    inst->ri_i.i_fmri) != 0) {
			result = EIO;
			goto out;
		} else
			goto assured_kill;
	}

	log_framework(LOG_DEBUG, "%s: forking to run method %s\n",
	    inst->ri_i.i_fmri, method);

	errstr = restarter_get_method_context(RESTARTER_METHOD_CONTEXT_VERSION,
	    inst->ri_m_inst, snap, mname, method, &mcp);

	if (errstr != NULL) {
		log_error(LOG_WARNING, "%s: %s\n", inst->ri_i.i_fmri, errstr);
		result = EINVAL;
		goto out;
	}

	r = method_ready_contract(inst, type, restart_on, cte_mask);
	if (r != 0) {
		assert(r == ECANCELED);
		assert(inst->ri_mi_deleted);
		restarter_free_method_context(mcp);
		result = ECANCELED;
		goto out;
	}

	/*
	 * Validate safety of method contexts, to save children work.
	 */
	if (!restarter_rm_libs_loadable())
		log_framework(LOG_DEBUG, "%s: method contexts limited "
		    "to root-accessible libraries\n", inst->ri_i.i_fmri);

	/*
	 * If the service is restarting too quickly, send it to
	 * maintenance.
	 */
	if (type == METHOD_START) {
		method_record_start(inst);
		if (method_rate_critical(inst)) {
			log_instance(inst, B_TRUE, "Restarting too quickly, "
			    "changing state to maintenance");
			result = ELOOP;
			goto out;
		}
	}

	pid = startd_fork1(&forkerr);
	if (pid == 0)
		exec_method(inst, type, method, mcp, need_session);

	if (pid == -1) {
		if (forkerr == EAGAIN)
			result = EAGAIN;
		else
			result = EFAULT;

		log_error(LOG_WARNING,
		    "%s: Couldn't fork to execute method %s: %s\n",
		    inst->ri_i.i_fmri, method, strerror(forkerr));

		goto out;
	}

	restarter_free_method_context(mcp);

	/*
	 * Get the contract id, decide whether it is primary or transient, and
	 * stash it in inst & the repository.
	 */
	method_store_contract(inst, type, &ctid);

	/*
	 * Similarly for the start method PID.
	 */
	if (type == METHOD_START && !inst->ri_mi_deleted)
		(void) libscf_write_start_pid(inst->ri_m_inst, pid);

	if (instance_is_wait_style(inst) && type == METHOD_START) {
		/* Wait style instances don't get timeouts on start methods. */
		if (wait_register(pid, inst->ri_i.i_fmri, 1, 0)) {
			log_error(LOG_WARNING,
			    "%s: couldn't register %ld for wait\n",
			    inst->ri_i.i_fmri, pid);
			result = EFAULT;
			goto contract_out;
		}
		write_status(inst, mname, 0);

	} else {
		int r, err;
		time_t start_time;
		time_t end_time;

		/*
		 * Because on upgrade/live-upgrade we may have no chance
		 * to override faulty timeout values on the way to
		 * manifest import, all services on the path to manifest
		 * import are treated the same as INFINITE timeout services.
		 */

		start_time = time(NULL);
		if (timeout != METHOD_TIMEOUT_INFINITE && !is_timeout_ovr(inst))
			timeout_insert(inst, ctid, timeout);
		else
			timeout = METHOD_TIMEOUT_INFINITE;

		/* Unlock the instance while waiting for the method. */
		MUTEX_UNLOCK(&inst->ri_lock);

		do
			r = waitpid(pid, &ret_status, NULL);
		while (r == -1 && errno == EINTR);
		if (r == -1)
			err = errno;

		/* Re-grab the lock. */
		inst = inst_lookup_by_id(id);

		/*
		 * inst can't be removed, as the removal thread waits
		 * for completion of this one.
		 */
		assert(inst != NULL);
		*instp = inst;

		if (inst->ri_timeout != NULL && inst->ri_timeout->te_fired)
			timeout_fired = 1;

		timeout_remove(inst, ctid);

		log_framework(LOG_DEBUG,
		    "%s method for %s exited with status %d.\n", mname,
		    inst->ri_i.i_fmri, WEXITSTATUS(ret_status));

		if (r == -1) {
			log_error(LOG_WARNING,
			    "Couldn't waitpid() for %s method of %s (%s).\n",
			    mname, inst->ri_i.i_fmri, strerror(err));
			result = EFAULT;
			goto contract_out;
		}

		if (type == METHOD_START)
			write_status(inst, mname, ret_status);

		/* return ERANGE if this service doesn't retry on timeout */
		if (timeout_fired == 1 && timeout_retry == 0) {
			result = ERANGE;
			goto contract_out;
		}

		if (!WIFEXITED(ret_status)) {
			/*
			 * If method didn't exit itself (it was killed by an
			 * external entity, etc.), consider the entire
			 * method_run as failed.
			 */
			if (WIFSIGNALED(ret_status)) {
				char buf[SIG2STR_MAX];
				(void) sig2str(WTERMSIG(ret_status), buf);

				log_error(LOG_WARNING, "%s: Method \"%s\" "
				    "failed due to signal %s.\n",
				    inst->ri_i.i_fmri, method, buf);
				log_instance(inst, B_TRUE, "Method \"%s\" "
				    "failed due to signal %s", mname, buf);
			} else {
				log_error(LOG_WARNING, "%s: Method \"%s\" "
				    "failed with exit status %d.\n",
				    inst->ri_i.i_fmri, method,
				    WEXITSTATUS(ret_status));
				log_instance(inst, B_TRUE, "Method \"%s\" "
				    "failed with exit status %d", mname,
				    WEXITSTATUS(ret_status));
			}
			result = EAGAIN;
			goto contract_out;
		}

		*exit_code = WEXITSTATUS(ret_status);
		if (*exit_code != 0) {
			log_error(LOG_WARNING,
			    "%s: Method \"%s\" failed with exit status %d.\n",
			    inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
		}

		log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
		    "%d", mname, *exit_code);

		if (*exit_code != 0)
			goto contract_out;

		end_time = time(NULL);

		/* Give service contract remaining seconds to empty */
		if (timeout != METHOD_TIMEOUT_INFINITE)
			timeout -= (end_time - start_time);
	}

assured_kill:
	/*
	 * For stop methods, assure that the service contract has emptied
	 * before returning.
	 */
	if (type == METHOD_STOP && (!instance_is_transient_style(inst)) &&
	    !(contract_is_empty(inst->ri_i.i_primary_ctid))) {

		if (timeout != METHOD_TIMEOUT_INFINITE)
			timeout_insert(inst, inst->ri_i.i_primary_ctid,
			    timeout);

		for (;;) {
			do {
				r = ct_event_read_critical(ctfd, &ctev);
			} while (r == EINTR);
			if (r != 0)
				break;

			evtype = ct_event_get_type(ctev);
			ct_event_free(ctev);
			if (evtype == CT_PR_EV_EMPTY)
				break;
		}
		if (r) {
			result = EFAULT;
			log_instance(inst, B_TRUE, "Error reading service "
			    "contract %ld.\n", inst->ri_i.i_primary_ctid);
		}

		if (timeout != METHOD_TIMEOUT_INFINITE)
			if (inst->ri_timeout->te_fired)
				result = EFAULT;

		timeout_remove(inst, inst->ri_i.i_primary_ctid);
	}

contract_out:
	/* Abandon contracts for transient methods & methods that fail. */
	transient = method_is_transient(inst, type);
	if ((transient || *exit_code != 0 || result != 0) &&
	    (restarter_is_kill_method(method) < 0))
		method_remove_contract(inst, !transient, B_TRUE);

out:
	if (ctfd >= 0)
		(void) close(ctfd);
	scf_snapshot_destroy(snap);
	free(method);
	return (result);
}
Exemple #8
0
/*
 * void method_store_contract()
 *   Store the newly created contract id into local structures and
 *   the repository.  If the repository connection is broken it is rebound.
 */
static void
method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid)
{
	int r;
	boolean_t primary;

	if (errno = contract_latest(cid))
		uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri);

	primary = !method_is_transient(inst, type);

	if (!primary) {
		if (inst->ri_i.i_transient_ctid != 0) {
			log_framework(LOG_INFO,
			    "%s: transient ctid expected to be 0 but "
			    "was set to %ld\n", inst->ri_i.i_fmri,
			    inst->ri_i.i_transient_ctid);
		}

		inst->ri_i.i_transient_ctid = *cid;
	} else {
		if (inst->ri_i.i_primary_ctid != 0) {
			/*
			 * There was an old contract that we transferred.
			 * Remove it.
			 */
			method_remove_contract(inst, B_TRUE, B_FALSE);
		}

		if (inst->ri_i.i_primary_ctid != 0) {
			log_framework(LOG_INFO,
			    "%s: primary ctid expected to be 0 but "
			    "was set to %ld\n", inst->ri_i.i_fmri,
			    inst->ri_i.i_primary_ctid);
		}

		inst->ri_i.i_primary_ctid = *cid;
		inst->ri_i.i_primary_ctid_stopped = 0;

		contract_hash_store(*cid, inst->ri_id);
	}

again:
	if (inst->ri_mi_deleted)
		return;

	r = restarter_store_contract(inst->ri_m_inst, *cid, primary ?
	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
	switch (r) {
	case 0:
		break;

	case ECANCELED:
		inst->ri_mi_deleted = B_TRUE;
		break;

	case ECONNABORTED:
		libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst));
		/* FALLTHROUGH */

	case EBADF:
		libscf_reget_instance(inst);
		goto again;

	case ENOMEM:
	case EPERM:
	case EACCES:
	case EROFS:
		uu_die("%s: Couldn't store contract id %ld",
		    inst->ri_i.i_fmri, *cid);
		/* NOTREACHED */

	case EINVAL:
	default:
		bad_error("restarter_store_contract", r);
	}
}
Exemple #9
0
/*
 * void wait_remove(wait_info_t *, int)
 *   Remove the given wait_info structure from our list, performing various
 *   cleanup operations along the way.  If the direct flag is false (meaning
 *   that we are being called with from restarter instance list context) and
 *   the instance should not be ignored, then notify the restarter that the
 *   associated instance has exited. If the wi_ignore flag is true then it
 *   means that the stop was initiated from within svc.startd, rather than
 *   from outside it.
 *
 *   Since we may no longer be the startd that started this process, we only are
 *   concerned with a waitpid(3C) failure if the wi_parent field is non-zero.
 */
static void
wait_remove(wait_info_t *wi, int direct)
{
	int status;
	stop_cause_t cause = RSTOP_EXIT;

	if (waitpid(wi->wi_pid, &status, 0) == -1) {
		if (wi->wi_parent)
			log_framework(LOG_INFO,
			    "instance %s waitpid failure: %s\n", wi->wi_fmri,
			    strerror(errno));
	} else {
		if (WEXITSTATUS(status) != 0) {
			log_framework(LOG_NOTICE,
			    "instance %s exited with status %d\n", wi->wi_fmri,
			    WEXITSTATUS(status));
			if (WEXITSTATUS(status) == SMF_EXIT_ERR_CONFIG)
				cause = RSTOP_ERR_CFG;
			else
				cause = RSTOP_ERR_EXIT;
		}
	}

	MUTEX_LOCK(&wait_info_lock);
	if (wi->wi_fd != -1) {
		startd_close(wi->wi_fd);
		wi->wi_fd = -1;
	}
	uu_list_remove(wait_info_list, wi);
	MUTEX_UNLOCK(&wait_info_lock);

	/*
	 * Make an attempt to clear out any utmpx record associated with this
	 * PID.
	 */
	utmpx_mark_dead(wi->wi_pid, status, B_FALSE);

	if (!direct && !wi->wi_ignore) {
		/*
		 * Bind wait_hndl lazily.
		 */
		if (wait_hndl == NULL) {
			for (wait_hndl =
			    libscf_handle_create_bound(SCF_VERSION);
			    wait_hndl == NULL;
			    wait_hndl =
			    libscf_handle_create_bound(SCF_VERSION)) {
				log_error(LOG_INFO, "[wait_remove] Unable to "
				    "bind a new repository handle: %s\n",
				    scf_strerror(scf_error()));
				(void) sleep(2);
			}
		}

		log_framework(LOG_DEBUG,
		    "wait_remove requesting stop of %s\n", wi->wi_fmri);
		(void) stop_instance_fmri(wait_hndl, wi->wi_fmri, cause);
	}

	uu_list_node_fini(wi, &wi->wi_link, wait_info_pool);
	startd_free(wi, sizeof (wait_info_t));
}
Exemple #10
0
/*ARGSUSED*/
void *
wait_thread(void *args)
{
	for (;;) {
		port_event_t pe;
		int fd;
		wait_info_t *wi;

		if (port_get(port_fd, &pe, NULL) != 0) {
			if (errno == EINTR)
				continue;
			else {
				log_error(LOG_WARNING,
				    "port_get() failed with %s\n",
				    strerror(errno));
				bad_error("port_get", errno);
			}
		}

		fd = pe.portev_object;
		wi = pe.portev_user;
		assert(wi != NULL);
		assert(fd == wi->wi_fd);

		if ((pe.portev_events & POLLHUP) == POLLHUP) {
			psinfo_t psi;

			if (lseek(fd, 0, SEEK_SET) != 0 ||
			    read(fd, &psi, sizeof (psinfo_t)) !=
			    sizeof (psinfo_t)) {
				log_framework(LOG_WARNING,
				    "couldn't get psinfo data for %s (%s); "
				    "assuming failed\n", wi->wi_fmri,
				    strerror(errno));
				goto err_remove;
			}

			if (psi.pr_nlwp != 0 ||
			    psi.pr_nzomb != 0 ||
			    psi.pr_lwp.pr_lwpid != 0) {
				/*
				 * We have determined, in accordance with the
				 * definition in proc(4), this process is not a
				 * zombie.  Reassociate.
				 */
				if (port_associate(port_fd, PORT_SOURCE_FD, fd,
				    0, wi))
					log_error(LOG_WARNING,
					    "port_association of %d / %s "
					    "failed\n", fd, wi->wi_fmri);
				continue;
			}
		} else if (
		    (pe.portev_events & POLLERR) == 0) {
			if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi))
				log_error(LOG_WARNING,
				    "port_association of %d / %s "
				    "failed\n", fd, wi->wi_fmri);
			continue;
		}

err_remove:
		wait_remove(wi, 0);
	}

	/*LINTED E_FUNC_HAS_NO_RETURN_STMT*/
}
Exemple #11
0
int
utmpx_mark_init(pid_t pid, char *prefix)
{
	struct utmpx ut, *oldu;
	int tmplen;
	int ret;

	while (st->st_initial && !utmpx_truncated)
		(void) usleep(200 * USEC_PER_MSEC);

	/*
	 * Clean out any preexisting records for this PID, as they must be
	 * inaccurate.
	 */
	utmpx_mark_dead(pid, 0, B_TRUE);

	/*
	 * Construct a new record with the appropriate prefix.
	 */
	(void) memset(&ut, 0, sizeof (ut));
	(void) strncpy(ut.ut_user, ".startd", sizeof (ut.ut_user));
	ut.ut_pid = pid;

	ut.ut_id[0] = ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;

	for (ret = 0; ret < strlen(prefix); ret++)
		ut.ut_id[ret] = prefix[ret];

	ut.ut_type = INIT_PROCESS;
	(void) time(&ut.ut_tv.tv_sec);

	for (;;) {
		MUTEX_LOCK(&utmpx_lock);
		setutxent();

		if ((oldu = getutxid(&ut)) != NULL) {
			/*
			 * Copy in the old "line" and "host" fields.
			 */
			bcopy(oldu->ut_line, ut.ut_line, sizeof (ut.ut_line));
			bcopy(oldu->ut_host, ut.ut_host, sizeof (ut.ut_host));
			ut.ut_syslen = (tmplen = strlen(ut.ut_host)) ?
			    min(tmplen + 1, sizeof (ut.ut_host)) : 0;
		}

		if (makeutx(&ut) != NULL)
			break;

		if (errno != EROFS)
			log_framework(LOG_WARNING,
			    "makeutx failed, retrying: %s\n", strerror(errno));

		MUTEX_UNLOCK(&utmpx_lock);

		(void) sleep(1);
	}

	updwtmpx(WTMPX_FILE, &ut);

	endutxent();
	MUTEX_UNLOCK(&utmpx_lock);

	return (ret);
}