/*
 * mac_soft_ring_bind
 *
 * Bind a soft ring worker thread to supplied CPU.
 */
cpu_t *
mac_soft_ring_bind(mac_soft_ring_t *ringp, processorid_t cpuid)
{
	cpu_t *cp;
	boolean_t clear = B_FALSE;

	ASSERT(MUTEX_HELD(&cpu_lock));

	if (mac_soft_ring_thread_bind == 0) {
		DTRACE_PROBE1(mac__soft__ring__no__cpu__bound,
		    mac_soft_ring_t *, ringp);
		return (NULL);
	}

	cp = cpu_get(cpuid);
	if (cp == NULL || !cpu_is_online(cp))
		return (NULL);

	mutex_enter(&ringp->s_ring_lock);
	ringp->s_ring_state |= S_RING_BOUND;
	if (ringp->s_ring_cpuid != -1)
		clear = B_TRUE;
	ringp->s_ring_cpuid = cpuid;
	mutex_exit(&ringp->s_ring_lock);

	if (clear)
		thread_affinity_clear(ringp->s_ring_worker);

	DTRACE_PROBE2(mac__soft__ring__cpu__bound, mac_soft_ring_t *,
	    ringp, processorid_t, cpuid);

	thread_affinity_set(ringp->s_ring_worker, cpuid);

	return (cp);
}
Пример #2
0
/*
 * Top level routine to direct suspend/resume of a domain.
 */
void
xen_suspend_domain(void)
{
	extern void rtcsync(void);
	extern hrtime_t hres_last_tick;
	mfn_t start_info_mfn;
	ulong_t flags;
	pfn_t pfn;
	int i;

	/*
	 * Check that we are happy to suspend on this hypervisor.
	 */
	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
		cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
		    "version: v%lu.%lu%s, need at least version v3.0.4 or "
		    "-xvm based hypervisor", XENVER_CURRENT(xv_major),
		    XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
		return;
	}

	/*
	 * XXPV - Are we definitely OK to suspend by the time we've connected
	 * the handler?
	 */

	cpr_err(CE_NOTE, "Domain suspending for save/migrate");

	SUSPEND_DEBUG("xen_suspend_domain\n");

	/*
	 * suspend interrupts and devices
	 * XXPV - we use suspend/resume for both save/restore domains (like sun
	 * cpr) and for migration.  Would be nice to know the difference if
	 * possible.  For save/restore where down time may be a long time, we
	 * may want to do more of the things that cpr does.  (i.e. notify user
	 * processes, shrink memory footprint for faster restore, etc.)
	 */
	xen_suspend_devices();
	SUSPEND_DEBUG("xenbus_suspend\n");
	xenbus_suspend();

	pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
	start_info_mfn = pfn_to_mfn(pfn);

	/*
	 * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
	 * wrt xenbus being suspended here?
	 */
	mutex_enter(&cpu_lock);

	/*
	 * Suspend must be done on vcpu 0, as no context for other CPUs is
	 * saved.
	 *
	 * XXPV - add to taskq API ?
	 */
	thread_affinity_set(curthread, 0);
	kpreempt_disable();

	SUSPEND_DEBUG("xen_start_migrate\n");
	xen_start_migrate();
	if (ncpus > 1)
		suspend_cpus();

	/*
	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
	 * any holder would have dropped it to get through suspend_cpus().
	 */
	mutex_enter(&ec_lock);

	/*
	 * From here on in, we can't take locks.
	 */
	SUSPEND_DEBUG("ec_suspend\n");
	ec_suspend();
	SUSPEND_DEBUG("gnttab_suspend\n");
	gnttab_suspend();

	flags = intr_clear();

	xpv_time_suspend();

	/*
	 * Currently, the hypervisor incorrectly fails to bring back
	 * powered-down VCPUs.  Thus we need to record any powered-down VCPUs
	 * to prevent any attempts to operate on them.  But we have to do this
	 * *after* the very first time we do ec_suspend().
	 */
	for (i = 1; i < ncpus; i++) {
		if (cpu[i] == NULL)
			continue;

		if (cpu_get_state(cpu[i]) == P_POWEROFF)
			CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
	}

	/*
	 * The dom0 save/migrate code doesn't automatically translate
	 * these into PFNs, but expects them to be, so we do it here.
	 * We don't use mfn_to_pfn() because so many OS services have
	 * been disabled at this point.
	 */
	xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
	xen_info->console.domU.mfn =
	    mfn_to_pfn_mapping[xen_info->console.domU.mfn];

	if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
		prom_printf("xen_suspend_domain(): "
		    "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
	    0, UVMF_INVLPG)) {
		prom_printf("xen_suspend_domain(): "
		    "HYPERVISOR_update_va_mapping() failed\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	SUSPEND_DEBUG("HYPERVISOR_suspend\n");

	/*
	 * At this point we suspend and sometime later resume.
	 */
	if (HYPERVISOR_suspend(start_info_mfn)) {
		prom_printf("xen_suspend_domain(): "
		    "HYPERVISOR_suspend() failed\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	/*
	 * Point HYPERVISOR_shared_info to its new value.
	 */
	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
	    xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
	    UVMF_INVLPG))
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);

	if (xen_info->nr_pages != mfn_count) {
		prom_printf("xen_suspend_domain(): number of pages"
		    " changed, was 0x%lx, now 0x%lx\n", mfn_count,
		    xen_info->nr_pages);
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	xpv_time_resume();

	cached_max_mfn = 0;

	SUSPEND_DEBUG("gnttab_resume\n");
	gnttab_resume();

	/* XXPV: add a note that this must be lockless. */
	SUSPEND_DEBUG("ec_resume\n");
	ec_resume();

	intr_restore(flags);

	if (ncpus > 1)
		resume_cpus();

	mutex_exit(&ec_lock);
	xen_end_migrate();
	mutex_exit(&cpu_lock);

	/*
	 * Now we can take locks again.
	 */

	/*
	 * Force the tick value used for tv_nsec in hres_tick() to be up to
	 * date. rtcsync() will reset the hrestime value appropriately.
	 */
	hres_last_tick = xpv_gethrtime();

	/*
	 * XXPV: we need to have resumed the CPUs since this takes locks, but
	 * can remote CPUs see bad state? Presumably yes. Should probably nest
	 * taking of todlock inside of cpu_lock, or vice versa, then provide an
	 * unlocked version.  Probably need to call clkinitf to reset cpu freq
	 * and re-calibrate if we migrated to a different speed cpu.  Also need
	 * to make a (re)init_cpu_info call to update processor info structs
	 * and device tree info.  That remains to be written at the moment.
	 */
	rtcsync();

	rebuild_mfn_list();

	SUSPEND_DEBUG("xenbus_resume\n");
	xenbus_resume();
	SUSPEND_DEBUG("xenbus_resume_devices\n");
	xen_resume_devices();

	thread_affinity_clear(curthread);
	kpreempt_enable();

	SUSPEND_DEBUG("finished xen_suspend_domain\n");

	/*
	 * We have restarted our suspended domain, update the hypervisor
	 * details. NB: This must be done at the end of this function,
	 * since we need the domain to be completely resumed before
	 * these functions will work correctly.
	 */
	xen_set_version(XENVER_CURRENT_IDX);

	/*
	 * We can check and report a warning, but we don't stop the
	 * process.
	 */
	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
		cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
		    "but need at least version v3.0.4",
		    XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
		    XENVER_CURRENT(xv_ver));

	cmn_err(CE_NOTE, "domain restore/migrate completed");
}
Пример #3
0
kern_return_t
thread_policy_set_internal(
	thread_t				thread,
	thread_policy_flavor_t	flavor,
	thread_policy_t			policy_info,
	mach_msg_type_number_t	count)
{
	kern_return_t			result = KERN_SUCCESS;
	spl_t					s;

	thread_mtx_lock(thread);
	if (!thread->active) {
		thread_mtx_unlock(thread);

		return (KERN_TERMINATED);
	}

	switch (flavor) {

	case THREAD_EXTENDED_POLICY:
	{
		boolean_t				timeshare = TRUE;

		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
			thread_extended_policy_t	info;

			info = (thread_extended_policy_t)policy_info;
			timeshare = info->timeshare;
		}

		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;

		s = splsched();
		thread_lock(thread);

		thread_set_user_sched_mode_and_recompute_pri(thread, mode);

		thread_unlock(thread);
		splx(s);

		sfi_reevaluate(thread);

		break;
	}

	case THREAD_TIME_CONSTRAINT_POLICY:
	{
		thread_time_constraint_policy_t		info;

		if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		info = (thread_time_constraint_policy_t)policy_info;
		if (	info->constraint < info->computation	||
				info->computation > max_rt_quantum		||
				info->computation < min_rt_quantum		) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		s = splsched();
		thread_lock(thread);

		thread->realtime.period = info->period;
		thread->realtime.computation = info->computation;
		thread->realtime.constraint = info->constraint;
		thread->realtime.preemptible = info->preemptible;

		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);

		thread_unlock(thread);
		splx(s);

		sfi_reevaluate(thread);

		break;
	}

	case THREAD_PRECEDENCE_POLICY:
	{
		thread_precedence_policy_t		info;

		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}
		info = (thread_precedence_policy_t)policy_info;

		s = splsched();
		thread_lock(thread);

		thread->importance = info->importance;

		thread_recompute_priority(thread);

		thread_unlock(thread);
		splx(s);

		break;
	}

	case THREAD_AFFINITY_POLICY:
	{
		thread_affinity_policy_t	info;

		if (!thread_affinity_is_supported()) {
			result = KERN_NOT_SUPPORTED;
			break;
		}
		if (count < THREAD_AFFINITY_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		info = (thread_affinity_policy_t) policy_info;
		/*
		 * Unlock the thread mutex here and
		 * return directly after calling thread_affinity_set().
		 * This is necessary for correct lock ordering because
		 * thread_affinity_set() takes the task lock.
		 */
		thread_mtx_unlock(thread);
		return thread_affinity_set(thread, info->affinity_tag);
	}

	case THREAD_THROUGHPUT_QOS_POLICY:
	{
		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
		int tqos;
		
		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) !=
		    KERN_SUCCESS) {
			break;
		}

		tqos = qos_extract(info->thread_throughput_qos_tier);
		thread->effective_policy.t_through_qos = tqos;
	}
		break;

	case THREAD_LATENCY_QOS_POLICY:
	{
		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
		int lqos;
		
		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) !=
		    KERN_SUCCESS) {
			break;
		}

		lqos = qos_extract(info->thread_latency_qos_tier);
/* The expected use cases (opt-in) of per-thread latency QoS would seem to
 * preclude any requirement at present to re-evaluate timers on a thread level
 * latency QoS change.
 */
		thread->effective_policy.t_latency_qos = lqos;

	}
		break;

	case THREAD_QOS_POLICY:
	case THREAD_QOS_POLICY_OVERRIDE:
	{
		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;

		if (count < THREAD_QOS_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		/*
		 * Going into task policy requires the task mutex,
		 * because of the way synchronization against the IO policy
		 * subsystem works.
		 *
		 * We need to move thread policy to the thread mutex instead.
		 * <rdar://problem/15831652> separate thread policy from task policy
		 */

		if (flavor == THREAD_QOS_POLICY_OVERRIDE) {
			int strongest_override = info->qos_tier;

			if (info->qos_tier != THREAD_QOS_UNSPECIFIED &&
			    thread->requested_policy.thrp_qos_override != THREAD_QOS_UNSPECIFIED)
				strongest_override = MAX(thread->requested_policy.thrp_qos_override, info->qos_tier);

			thread_mtx_unlock(thread);

			/* There is a race here. To be closed in <rdar://problem/15831652> separate thread policy from task policy */

			proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, strongest_override);

			return (result);
		}

		thread_mtx_unlock(thread);

		proc_set_task_policy2(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, info->qos_tier, -info->tier_importance);

		thread_mtx_lock(thread);
		if (!thread->active) {
			thread_mtx_unlock(thread);
			return (KERN_TERMINATED);
		}
		
		break;
	}

	default:
		result = KERN_INVALID_ARGUMENT;
		break;
	}

	thread_mtx_unlock(thread);
	return (result);
}
Пример #4
0
kern_return_t
thread_policy_set(
	thread_t				thread,
	thread_policy_flavor_t	flavor,
	thread_policy_t			policy_info,
	mach_msg_type_number_t	count)
{
	kern_return_t			result = KERN_SUCCESS;
	spl_t					s;

	if (thread == THREAD_NULL)
		return (KERN_INVALID_ARGUMENT);

	thread_mtx_lock(thread);
	if (!thread->active) {
		thread_mtx_unlock(thread);

		return (KERN_TERMINATED);
	}

	if (thread->static_param) {
		thread_mtx_unlock(thread);

		return (KERN_SUCCESS);
	}

	switch (flavor) {

	case THREAD_EXTENDED_POLICY:
	{
		boolean_t				timeshare = TRUE;

		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
			thread_extended_policy_t	info;

			info = (thread_extended_policy_t)policy_info;
			timeshare = info->timeshare;
		}

		s = splsched();
		thread_lock(thread);

		if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
			integer_t	oldmode = (thread->sched_mode & TH_MODE_TIMESHARE);

			thread->sched_mode &= ~TH_MODE_REALTIME;

			if (timeshare && !oldmode) {
				thread->sched_mode |= TH_MODE_TIMESHARE;

				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
					sched_share_incr();
			}
			else
			if (!timeshare && oldmode) {
				thread->sched_mode &= ~TH_MODE_TIMESHARE;

				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
					sched_share_decr();
			}

			thread_recompute_priority(thread);
		}
		else {
			thread->safe_mode &= ~TH_MODE_REALTIME;

			if (timeshare)
				thread->safe_mode |= TH_MODE_TIMESHARE;
			else
				thread->safe_mode &= ~TH_MODE_TIMESHARE;
		}

		thread_unlock(thread);
		splx(s);

		break;
	}

	case THREAD_TIME_CONSTRAINT_POLICY:
	{
		thread_time_constraint_policy_t		info;

		if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		info = (thread_time_constraint_policy_t)policy_info;
		if (	info->constraint < info->computation	||
				info->computation > max_rt_quantum		||
				info->computation < min_rt_quantum		) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		s = splsched();
		thread_lock(thread);

		thread->realtime.period = info->period;
		thread->realtime.computation = info->computation;
		thread->realtime.constraint = info->constraint;
		thread->realtime.preemptible = info->preemptible;

		if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
			if (thread->sched_mode & TH_MODE_TIMESHARE) {
				thread->sched_mode &= ~TH_MODE_TIMESHARE;

				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
					sched_share_decr();
			}
			thread->sched_mode |= TH_MODE_REALTIME;
			thread_recompute_priority(thread);
		}
		else {
			thread->safe_mode &= ~TH_MODE_TIMESHARE;
			thread->safe_mode |= TH_MODE_REALTIME;
		}

		thread_unlock(thread);
		splx(s);

		break;
	}

	case THREAD_PRECEDENCE_POLICY:
	{
		thread_precedence_policy_t		info;

		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		info = (thread_precedence_policy_t)policy_info;

		s = splsched();
		thread_lock(thread);

		thread->importance = info->importance;

		thread_recompute_priority(thread);

		thread_unlock(thread);
		splx(s);

		break;
	}

	case THREAD_AFFINITY_POLICY:
	{
		thread_affinity_policy_t	info;

		if (!thread_affinity_is_supported()) {
			result = KERN_NOT_SUPPORTED;
			break;
		}
		if (count < THREAD_AFFINITY_POLICY_COUNT) {
			result = KERN_INVALID_ARGUMENT;
			break;
		}

		info = (thread_affinity_policy_t) policy_info;
		/*
		 * Unlock the thread mutex here and
		 * return directly after calling thread_affinity_set().
		 * This is necessary for correct lock ordering because
		 * thread_affinity_set() takes the task lock.
		 */
		thread_mtx_unlock(thread);
		return thread_affinity_set(thread, info->affinity_tag);
	}
	default:
		result = KERN_INVALID_ARGUMENT;
		break;
	}

	thread_mtx_unlock(thread);

	return (result);
}
Пример #5
0
/*
 * Top level routine to direct suspend/resume of a domain.
 */
void
xen_suspend_domain(void)
{
	extern void rtcsync(void);
	extern void ec_resume(void);
	extern kmutex_t ec_lock;
	struct xen_add_to_physmap xatp;
	ulong_t flags;
	int err;

	cmn_err(CE_NOTE, "Domain suspending for save/migrate");

	SUSPEND_DEBUG("xen_suspend_domain\n");

	/*
	 * We only want to suspend the PV devices, since the emulated devices
	 * are suspended by saving the emulated device state.  The PV devices
	 * are all children of the xpvd nexus device.  So we search the
	 * device tree for the xpvd node to use as the root of the tree to
	 * be suspended.
	 */
	if (xpvd_dip == NULL)
		ddi_walk_devs(ddi_root_node(), check_xpvd, NULL);

	/*
	 * suspend interrupts and devices
	 */
	if (xpvd_dip != NULL)
		(void) xen_suspend_devices(ddi_get_child(xpvd_dip));
	else
		cmn_err(CE_WARN, "No PV devices found to suspend");
	SUSPEND_DEBUG("xenbus_suspend\n");
	xenbus_suspend();

	mutex_enter(&cpu_lock);

	/*
	 * Suspend on vcpu 0
	 */
	thread_affinity_set(curthread, 0);
	kpreempt_disable();

	if (ncpus > 1)
		pause_cpus(NULL, NULL);
	/*
	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
	 * any holder would have dropped it to get through pause_cpus().
	 */
	mutex_enter(&ec_lock);

	/*
	 * From here on in, we can't take locks.
	 */

	flags = intr_clear();

	SUSPEND_DEBUG("HYPERVISOR_suspend\n");
	/*
	 * At this point we suspend and sometime later resume.
	 * Note that this call may return with an indication of a cancelled
	 * for now no matter ehat the return we do a full resume of all
	 * suspended drivers, etc.
	 */
	(void) HYPERVISOR_shutdown(SHUTDOWN_suspend);

	/*
	 * Point HYPERVISOR_shared_info to the proper place.
	 */
	xatp.domid = DOMID_SELF;
	xatp.idx = 0;
	xatp.space = XENMAPSPACE_shared_info;
	xatp.gpfn = xen_shared_info_frame;
	if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0)
		panic("Could not set shared_info page. error: %d", err);

	SUSPEND_DEBUG("gnttab_resume\n");
	gnttab_resume();

	SUSPEND_DEBUG("ec_resume\n");
	ec_resume();

	intr_restore(flags);

	if (ncpus > 1)
		start_cpus();

	mutex_exit(&ec_lock);
	mutex_exit(&cpu_lock);

	/*
	 * Now we can take locks again.
	 */

	rtcsync();

	SUSPEND_DEBUG("xenbus_resume\n");
	xenbus_resume();
	SUSPEND_DEBUG("xen_resume_devices\n");
	if (xpvd_dip != NULL)
		(void) xen_resume_devices(ddi_get_child(xpvd_dip), 0);

	thread_affinity_clear(curthread);
	kpreempt_enable();

	SUSPEND_DEBUG("finished xen_suspend_domain\n");

	cmn_err(CE_NOTE, "domain restore/migrate completed");
}