Esempio n. 1
0
/* Flush inflight message buffers. */
int
xc_flush_cpu(struct cpu *cpup)
{
	int i;

	ASSERT((cpup->cpu_flags & CPU_READY) == 0);

	/*
	 * Pause all working CPUs, which ensures that there's no CPU in
	 * function xc_common().
	 * This is used to work around a race condition window in xc_common()
	 * between checking CPU_READY flag and increasing working item count.
	 */
	pause_cpus(cpup);
	start_cpus();

	for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
		if (cpup->cpu_m.xc_work_cnt == 0) {
			break;
		}
		DELAY(1);
	}
	for (; i < XC_FLUSH_MAX_WAITS; i++) {
		if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
			break;
		}
		DELAY(1);
	}

	return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
}
Esempio n. 2
0
static void
cpu_idle_stop(cpu_t *cp)
{
	cpupm_mach_state_t *mach_state =
	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
	cpu_acpi_cstate_t *cstate;
	uint_t cpu_max_cstates, i;

	/*
	 * place the CPUs in a safe place so that we can disable
	 * deep c-state on them.
	 */
	pause_cpus(NULL);
	cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
	start_cpus();

	cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
	if (cstate) {
		cpu_max_cstates = cpu_acpi_get_max_cstates(handle);

		for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
			if (cstate->cs_ksp != NULL)
				kstat_delete(cstate->cs_ksp);
			cstate++;
		}
	}
	cpupm_free_ms_cstate(cp);
	cpupm_remove_domains(cp, CPUPM_C_STATES, &cpupm_cstate_domains);
	cpu_acpi_free_cstate_data(handle);
}
Esempio n. 3
0
/*
 * restart paused slave cpus
 */
void
i_cpr_machdep_setup(void)
{
	if (ncpus > 1) {
		CPR_DEBUG(CPR_DEBUG1, "MP restarted...\n");
		mutex_enter(&cpu_lock);
		start_cpus();
		mutex_exit(&cpu_lock);
	}
}
Esempio n. 4
0
/*
 * This routine is used to start a previously powered off processor.
 * Note that restarted cpus are initialized into the offline state.
 */
void
restart_other_cpu(int cpuid)
{
	struct cpu *cp;
	kthread_id_t tp;
	caddr_t	sp;
	extern void idle();

	ASSERT(MUTEX_HELD(&cpu_lock));
	ASSERT(cpuid < NCPU && cpu[cpuid] != NULL);

	/*
	 * Obtain pointer to the appropriate cpu structure.
	 */
	cp = cpu[cpuid];

	common_startup_init(cp, cpuid);

	/*
	 * idle thread t_lock is held when the idle thread is suspended.
	 * Manually unlock the t_lock of idle loop so that we can resume
	 * the suspended idle thread.
	 * Also adjust the PC of idle thread for re-retry.
	 */
	cp->cpu_intr_actv = 0;	/* clear the value from previous life */
	cp->cpu_m.mutex_ready = 0; /* we are not ready yet */
	lock_clear(&cp->cpu_idle_thread->t_lock);
	tp = cp->cpu_idle_thread;

	sp = tp->t_stk;
	tp->t_sp = (uintptr_t)((struct rwindow *)sp - 1) - STACK_BIAS;
	tp->t_pc = (uintptr_t)idle - 8;

	/*
	 * restart the cpu now
	 */
	promsafe_pause_cpus();
	start_cpu(cpuid, warm_flag_set);
	start_cpus();

	/* call cmn_err outside pause_cpus/start_cpus to avoid deadlock */
	cmn_err(CE_CONT, "!cpu%d initialization complete - restarted\n",
	    cpuid);
}
Esempio n. 5
0
/*
 * This routine is a special form of pause_cpus().  It ensures that
 * prom functions are callable while the cpus are paused.
 */
void
promsafe_pause_cpus(void)
{
	pause_cpus(NULL);

	/* If some other cpu is entering or is in the prom, spin */
	while (prom_cpu || mutex_owner(&prom_mutex)) {

		start_cpus();
		mutex_enter(&prom_mutex);

		/* Wait for other cpu to exit prom */
		while (prom_cpu)
			cv_wait(&prom_cv, &prom_mutex);

		mutex_exit(&prom_mutex);
		pause_cpus(NULL);
	}

	/* At this point all cpus are paused and none are in the prom */
}
Esempio n. 6
0
void
mp_leave_barrier(void)
{
	int i;

	ASSERT(MUTEX_HELD(&cpu_lock));

	for (i = 0; i < NCPU; i++) {
		cpu_t *cp = cpu_get(i);

		if (cp == NULL || cp == CPU)
			continue;

		switch (cpu_phase[i]) {
		/*
		 * If we see a CPU in one of these phases, something has
		 * gone badly wrong with the guarantees
		 * mp_enter_barrier() is supposed to provide.  Rather
		 * than attempt to stumble along (and since we can't
		 * panic properly in this context), we tell the
		 * hypervisor we've crashed.
		 */
		case CPU_PHASE_NONE:
		case CPU_PHASE_WAIT_SAFE:
			(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
			break;

		case CPU_PHASE_POWERED_OFF:
			break;

		case CPU_PHASE_SAFE:
			cpu_phase[i] = CPU_PHASE_NONE;
		}
	}

	start_cpus();
}
Esempio n. 7
0
/*
 * Promote PG above it's current parent.
 * This is only legal if PG has an equal or greater number of CPUs than its
 * parent.
 *
 * This routine operates on the CPU specific processor group data (for the CPUs
 * in the PG being promoted), and may be invoked from a context where one CPU's
 * PG data is under construction. In this case the argument "pgdata", if not
 * NULL, is a reference to the CPU's under-construction PG data.
 */
static void
cmt_hier_promote(pg_cmt_t *pg, cpu_pg_t *pgdata)
{
	pg_cmt_t	*parent;
	group_t		*children;
	cpu_t		*cpu;
	group_iter_t	iter;
	pg_cpu_itr_t	cpu_iter;
	int		r;
	int		err;

	ASSERT(MUTEX_HELD(&cpu_lock));

	parent = pg->cmt_parent;
	if (parent == NULL) {
		/*
		 * Nothing to do
		 */
		return;
	}

	ASSERT(PG_NUM_CPUS((pg_t *)pg) >= PG_NUM_CPUS((pg_t *)parent));

	/*
	 * We're changing around the hierarchy, which is actively traversed
	 * by the dispatcher. Pause CPUS to ensure exclusivity.
	 */
	pause_cpus(NULL);

	/*
	 * If necessary, update the parent's sibling set, replacing parent
	 * with PG.
	 */
	if (parent->cmt_siblings) {
		if (group_remove(parent->cmt_siblings, parent, GRP_NORESIZE)
		    != -1) {
			r = group_add(parent->cmt_siblings, pg, GRP_NORESIZE);
			ASSERT(r != -1);
		}
	}

	/*
	 * If the parent is at the top of the hierarchy, replace it's entry
	 * in the root lgroup's group of top level PGs.
	 */
	if (parent->cmt_parent == NULL &&
	    parent->cmt_siblings != &cmt_root->cl_pgs) {
		if (group_remove(&cmt_root->cl_pgs, parent, GRP_NORESIZE)
		    != -1) {
			r = group_add(&cmt_root->cl_pgs, pg, GRP_NORESIZE);
			ASSERT(r != -1);
		}
	}

	/*
	 * We assume (and therefore assert) that the PG being promoted is an
	 * only child of it's parent. Update the parent's children set
	 * replacing PG's entry with the parent (since the parent is becoming
	 * the child). Then have PG and the parent swap children sets.
	 */
	ASSERT(GROUP_SIZE(parent->cmt_children) <= 1);
	if (group_remove(parent->cmt_children, pg, GRP_NORESIZE) != -1) {
		r = group_add(parent->cmt_children, parent, GRP_NORESIZE);
		ASSERT(r != -1);
	}

	children = pg->cmt_children;
	pg->cmt_children = parent->cmt_children;
	parent->cmt_children = children;

	/*
	 * Update the sibling references for PG and it's parent
	 */
	pg->cmt_siblings = parent->cmt_siblings;
	parent->cmt_siblings = pg->cmt_children;

	/*
	 * Update any cached lineages in the per CPU pg data.
	 */
	PG_CPU_ITR_INIT(pg, cpu_iter);
	while ((cpu = pg_cpu_next(&cpu_iter)) != NULL) {
		int		idx;
		pg_cmt_t	*cpu_pg;
		cpu_pg_t	*pgd;	/* CPU's PG data */

		/*
		 * The CPU's whose lineage is under construction still
		 * references the bootstrap CPU PG data structure.
		 */
		if (pg_cpu_is_bootstrapped(cpu))
			pgd = pgdata;
		else
			pgd = cpu->cpu_pg;

		/*
		 * Iterate over the CPU's PGs updating the children
		 * of the PG being promoted, since they have a new parent.
		 */
		group_iter_init(&iter);
		while ((cpu_pg = group_iterate(&pgd->cmt_pgs, &iter)) != NULL) {
			if (cpu_pg->cmt_parent == pg) {
				cpu_pg->cmt_parent = parent;
			}
		}

		/*
		 * Update the CMT load balancing lineage
		 */
		if ((idx = group_find(&pgd->cmt_pgs, (void *)pg)) == -1) {
			/*
			 * Unless this is the CPU who's lineage is being
			 * constructed, the PG being promoted should be
			 * in the lineage.
			 */
			ASSERT(pg_cpu_is_bootstrapped(cpu));
			continue;
		}

		ASSERT(GROUP_ACCESS(&pgd->cmt_pgs, idx - 1) == parent);
		ASSERT(idx > 0);

		/*
		 * Have the child and the parent swap places in the CPU's
		 * lineage
		 */
		group_remove_at(&pgd->cmt_pgs, idx);
		group_remove_at(&pgd->cmt_pgs, idx - 1);
		err = group_add_at(&pgd->cmt_pgs, parent, idx);
		ASSERT(err == 0);
		err = group_add_at(&pgd->cmt_pgs, pg, idx - 1);
		ASSERT(err == 0);
	}

	/*
	 * Update the parent references for PG and it's parent
	 */
	pg->cmt_parent = parent->cmt_parent;
	parent->cmt_parent = pg;

	start_cpus();
}
Esempio n. 8
0
void
dr_resume(dr_sr_handle_t *srh)
{
	dr_handle_t	*handle;

	handle = srh->sr_dr_handlep;

	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
		/*
		 * Update the signature block.
		 * If cpus are not paused, this can be done now.
		 * See comments below.
		 */
		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
		    CPU->cpu_id);
	}

	switch (srh->sr_suspend_state) {
	case DR_SRSTATE_FULL:

		ASSERT(MUTEX_HELD(&cpu_lock));

		/*
		 * Prevent false alarm in tod_validate() due to tod
		 * value change between suspend and resume
		 */
		mutex_enter(&tod_lock);
		tod_fault_reset();
		mutex_exit(&tod_lock);

		dr_enable_intr(); 	/* enable intr & clock */

		start_cpus();
		mutex_exit(&cpu_lock);

		/*
		 * Update the signature block.
		 * This must not be done while cpus are paused, since on
		 * Starcat the cpu signature update aquires an adaptive
		 * mutex in the iosram driver. Blocking with cpus paused
		 * can lead to deadlock.
		 */
		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
		    CPU->cpu_id);

		/*
		 * If we suspended hw watchdog at suspend,
		 * re-enable it now.
		 */
		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
			mutex_enter(&tod_lock);
			tod_ops.tod_set_watchdog_timer(
				watchdog_timeout_seconds);
			mutex_exit(&tod_lock);
		}

		/*
		 * This should only be called if drmach_suspend_last()
		 * was called and state transitioned to DR_SRSTATE_FULL
		 * to prevent resume attempts on device instances that
		 * were not previously suspended.
		 */
		drmach_resume_first();

		/* FALLTHROUGH */

	case DR_SRSTATE_DRIVER:
		/*
		 * resume drivers
		 */
		srh->sr_err_idx = 0;

		/* no parent dip to hold busy */
		dr_resume_devices(ddi_root_node(), srh);

		if (srh->sr_err_idx && srh->sr_dr_handlep) {
			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
				srh->sr_err_ints, srh->sr_err_idx, 1);
		}

		/*
		 * resume the lock manager
		 */
		lm_cprresume();

		/* FALLTHROUGH */

	case DR_SRSTATE_USER:
		/*
		 * finally, resume user threads
		 */
		if (!dr_skip_user_threads) {
			prom_printf("DR: resuming user threads...\n");
			dr_start_user_threads();
		}
		/* FALLTHROUGH */

	case DR_SRSTATE_BEGIN:
	default:
		/*
		 * let those who care know that we've just resumed
		 */
		PR_QR("sending SIGTHAW...\n");
		dr_signal_user(SIGTHAW);
		break;
	}

	i_ndi_allow_device_tree_changes(handle->h_ndi);

	/*
	 * update the signature block
	 */
	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);

	prom_printf("DR: resume COMPLETED\n");
}
void
sbdp_resume(sbdp_sr_handle_t *srh)
{
	/*
	 * update the signature block
	 */
	CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
	    CPU->cpu_id);

	switch (SR_STATE(srh)) {
	case SBDP_SRSTATE_FULL:

		ASSERT(MUTEX_HELD(&cpu_lock));

		/*
		 * Prevent false alarm in tod_validate() due to tod
		 * value change between suspend and resume
		 */
		mutex_enter(&tod_lock);
		tod_fault_reset();
		mutex_exit(&tod_lock);

		sbdp_enable_intr(); 	/* enable intr & clock */

		/*
		 * release all the other cpus
		 * using start_cpus() vice sbdp_release_cpus()
		 */
		start_cpus();
		mutex_exit(&cpu_lock);

		/*
		 * If we suspended hw watchdog at suspend,
		 * re-enable it now.
		 */
		if (SR_CHECK_FLAG(srh, SR_FLAG_WATCHDOG)) {
			mutex_enter(&tod_lock);
			tod_ops.tod_set_watchdog_timer(
				saved_watchdog_seconds);
			mutex_exit(&tod_lock);
		}

		/* FALLTHROUGH */

	case SBDP_SRSTATE_DRIVER:
		/*
		 * resume devices: root node doesn't have to
		 * be held in any way.
		 */
		sbdp_resume_devices(ddi_root_node(), srh);

		/*
		 * resume the lock manager
		 */
		lm_cprresume();

		/* FALLTHROUGH */

	case SBDP_SRSTATE_USER:
		/*
		 * finally, resume user threads
		 */
		if (!sbdp_skip_user_threads) {
			SBDP_DBG_QR("DR: resuming user threads...\n");
			sbdp_start_user_threads();
		}
		/* FALLTHROUGH */

	case SBDP_SRSTATE_BEGIN:
	default:
		/*
		 * let those who care know that we've just resumed
		 */
		SBDP_DBG_QR("sending SIGTHAW...\n");
		sbdp_signal_user(SIGTHAW);
		break;
	}

	/*
	 * update the signature block
	 */
	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);

	SBDP_DBG_QR("DR: resume COMPLETED\n");
}
Esempio n. 10
0
/*ARGSUSED*/
int
suspend_start(char *error_reason, size_t max_reason_len)
{
	uint64_t	source_tick;
	uint64_t	source_stick;
	uint64_t	rv;
	timestruc_t	source_tod;
	int		spl;

	ASSERT(suspend_supported());
	DBG("suspend: %s", __func__);

	sfmmu_ctxdoms_lock();

	mutex_enter(&cpu_lock);

	/* Suspend the watchdog */
	watchdog_suspend();

	/* Record the TOD */
	mutex_enter(&tod_lock);
	source_tod = tod_get();
	mutex_exit(&tod_lock);

	/* Pause all other CPUs */
	pause_cpus(NULL);
	DBG_PROM("suspend: CPUs paused\n");

	/* Suspend cyclics */
	cyclic_suspend();
	DBG_PROM("suspend: cyclics suspended\n");

	/* Disable interrupts */
	spl = spl8();
	DBG_PROM("suspend: spl8()\n");

	source_tick = gettick_counter();
	source_stick = gettick();
	DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick);
	DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick);

	/*
	 * Call into the HV to initiate the suspend. hv_guest_suspend()
	 * returns after the guest has been resumed or if the suspend
	 * operation failed or was cancelled. After a successful suspend,
	 * the %tick and %stick registers may have changed by an amount
	 * that is not proportional to the amount of time that has passed.
	 * They may have jumped forwards or backwards. Some variation is
	 * allowed and accounted for using suspend_tick_stick_max_delta,
	 * but otherwise this jump must be uniform across all CPUs and we
	 * operate under the assumption that it is (maintaining two global
	 * offset variables--one for %tick and one for %stick.)
	 */
	DBG_PROM("suspend: suspending... \n");
	rv = hv_guest_suspend();
	if (rv != 0) {
		splx(spl);
		cyclic_resume();
		start_cpus();
		watchdog_resume();
		mutex_exit(&cpu_lock);
		sfmmu_ctxdoms_unlock();
		DBG("suspend: failed, rv: %ld\n", rv);
		return (rv);
	}

	suspend_count++;

	/* Update the global tick and stick offsets and the preserved TOD */
	set_tick_offsets(source_tick, source_stick, &source_tod);

	/* Ensure new offsets are globally visible before resuming CPUs */
	membar_sync();

	/* Enable interrupts */
	splx(spl);

	/* Set the {%tick,%stick}.NPT bits on all CPUs */
	if (enable_user_tick_stick_emulation) {
		xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL);
		xt_sync(cpu_ready_set);
		ASSERT(gettick_npt() != 0);
		ASSERT(getstick_npt() != 0);
	}

	/* If emulation is enabled, but not currently active, enable it */
	if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) {
		tick_stick_emulation_active = B_TRUE;
	}

	sfmmu_ctxdoms_remove();

	/* Resume cyclics, unpause CPUs */
	cyclic_resume();
	start_cpus();

	/* Set the TOD */
	mutex_enter(&tod_lock);
	tod_set(source_tod);
	mutex_exit(&tod_lock);

	/* Re-enable the watchdog */
	watchdog_resume();

	mutex_exit(&cpu_lock);

	/* Download the latest MD */
	if ((rv = mach_descrip_update()) != 0)
		cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld",
		    rv);

	sfmmu_ctxdoms_update();
	sfmmu_ctxdoms_unlock();

	/* Get new MD, update CPU mappings/relationships */
	if (suspend_update_cpu_mappings)
		update_cpu_mappings();

	DBG("suspend: target tick: 0x%lx", gettick_counter());
	DBG("suspend: target stick: 0x%llx", gettick());
	DBG("suspend: user %%tick/%%stick emulation is %d",
	    tick_stick_emulation_active);
	DBG("suspend: finished");

	return (0);
}
Esempio n. 11
0
/*
 * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW
 * sharing data structures, and processor groups.
 */
static void
update_cpu_mappings(void)
{
	md_t		*mdp;
	processorid_t	id;
	cpu_t		*cp;
	cpu_pg_t	*pgps[NCPU];

	if ((mdp = md_get_handle()) == NULL) {
		DBG("suspend: md_get_handle failed");
		return;
	}

	DBG("suspend: updating CPU mappings");

	mutex_enter(&cpu_lock);

	setup_chip_mappings(mdp);
	setup_exec_unit_mappings(mdp);
	for (id = 0; id < NCPU; id++) {
		if ((cp = cpu_get(id)) == NULL)
			continue;
		cpu_map_exec_units(cp);
	}

	/*
	 * Re-calculate processor groups.
	 *
	 * First tear down all PG information before adding any new PG
	 * information derived from the MD we just downloaded. We must
	 * call pg_cpu_inactive and pg_cpu_active with CPUs paused and
	 * we want to minimize the number of times pause_cpus is called.
	 * Inactivating all CPUs would leave PGs without any active CPUs,
	 * so while CPUs are paused, call pg_cpu_inactive and swap in the
	 * bootstrap PG structure saving the original PG structure to be
	 * fini'd afterwards. This prevents the dispatcher from encountering
	 * PGs in which all CPUs are inactive. Offline CPUs are already
	 * inactive in their PGs and shouldn't be reactivated, so we must
	 * not call pg_cpu_inactive or pg_cpu_active for those CPUs.
	 */
	pause_cpus(NULL);
	for (id = 0; id < NCPU; id++) {
		if ((cp = cpu_get(id)) == NULL)
			continue;
		if ((cp->cpu_flags & CPU_OFFLINE) == 0)
			pg_cpu_inactive(cp);
		pgps[id] = cp->cpu_pg;
		pg_cpu_bootstrap(cp);
	}
	start_cpus();

	/*
	 * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are
	 * not paused. Use two separate loops here so that we do not
	 * initialize PG data for CPUs until all the old PG data structures
	 * are torn down.
	 */
	for (id = 0; id < NCPU; id++) {
		if ((cp = cpu_get(id)) == NULL)
			continue;
		pg_cpu_fini(cp, pgps[id]);
		mpo_cpu_remove(id);
	}

	/*
	 * Initialize PG data for each CPU, but leave the bootstrapped
	 * PG structure in place to avoid running with any PGs containing
	 * nothing but inactive CPUs.
	 */
	for (id = 0; id < NCPU; id++) {
		if ((cp = cpu_get(id)) == NULL)
			continue;
		mpo_cpu_add(mdp, id);
		pgps[id] = pg_cpu_init(cp, B_TRUE);
	}

	/*
	 * Now that PG data has been initialized for all CPUs in the
	 * system, replace the bootstrapped PG structure with the
	 * initialized PG structure and call pg_cpu_active for each CPU.
	 */
	pause_cpus(NULL);
	for (id = 0; id < NCPU; id++) {
		if ((cp = cpu_get(id)) == NULL)
			continue;
		cp->cpu_pg = pgps[id];
		if ((cp->cpu_flags & CPU_OFFLINE) == 0)
			pg_cpu_active(cp);
	}
	start_cpus();

	mutex_exit(&cpu_lock);

	(void) md_fini_handle(mdp);
}
Esempio n. 12
0
/*
 * Destroy a partition.
 */
int
cpupart_destroy(psetid_t psid)
{
	cpu_t	*cp, *first_cp;
	cpupart_t *pp, *newpp;
	int	err = 0;

	ASSERT(pool_lock_held());
	mutex_enter(&cpu_lock);

	pp = cpupart_find(psid);
	if (pp == NULL || pp == &cp_default) {
		mutex_exit(&cpu_lock);
		return (EINVAL);
	}

	/*
	 * Unbind all the threads currently bound to the partition.
	 */
	err = cpupart_unbind_threads(pp, B_TRUE);
	if (err) {
		mutex_exit(&cpu_lock);
		return (err);
	}

	newpp = &cp_default;
	while ((cp = pp->cp_cpulist) != NULL) {
		if (err = cpupart_move_cpu(cp, newpp, 0)) {
			mutex_exit(&cpu_lock);
			return (err);
		}
	}

	ASSERT(bitset_is_null(&pp->cp_cmt_pgs));
	ASSERT(bitset_is_null(&pp->cp_haltset));

	/*
	 * Teardown the partition's group of active CMT PGs and halted
	 * CPUs now that they have all left.
	 */
	bitset_fini(&pp->cp_cmt_pgs);
	bitset_fini(&pp->cp_haltset);

	/*
	 * Reset the pointers in any offline processors so they won't
	 * try to rejoin the destroyed partition when they're turned
	 * online.
	 */
	first_cp = cp = CPU;
	do {
		if (cp->cpu_part == pp) {
			ASSERT(cp->cpu_flags & CPU_OFFLINE);
			cp->cpu_part = newpp;
		}
		cp = cp->cpu_next;
	} while (cp != first_cp);

	/*
	 * Pause all CPUs while changing the partition list, to make sure
	 * the clock thread (which traverses the list without holding
	 * cpu_lock) isn't running.
	 */
	pause_cpus(NULL);
	pp->cp_prev->cp_next = pp->cp_next;
	pp->cp_next->cp_prev = pp->cp_prev;
	if (cp_list_head == pp)
		cp_list_head = pp->cp_next;
	start_cpus();

	if (cp_id_next > pp->cp_id)
		cp_id_next = pp->cp_id;

	if (pp->cp_kstat)
		kstat_delete(pp->cp_kstat);

	cp_numparts--;

	disp_kp_free(&pp->cp_kp_queue);

	cpupart_lpl_teardown(pp);

	kmem_free(pp, sizeof (cpupart_t));
	mutex_exit(&cpu_lock);

	return (err);
}
Esempio n. 13
0
/*
 * Create a new partition.  On MP systems, this also allocates a
 * kpreempt disp queue for that partition.
 */
int
cpupart_create(psetid_t *psid)
{
	cpupart_t	*pp;

	ASSERT(pool_lock_held());

	pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP);
	pp->cp_nlgrploads = lgrp_plat_max_lgrps();
	pp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * pp->cp_nlgrploads,
	    KM_SLEEP);

	mutex_enter(&cpu_lock);
	if (cp_numparts == cp_max_numparts) {
		mutex_exit(&cpu_lock);
		kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
		pp->cp_lgrploads = NULL;
		kmem_free(pp, sizeof (cpupart_t));
		return (ENOMEM);
	}
	cp_numparts++;
	/* find the next free partition ID */
	while (cpupart_find(CPTOPS(cp_id_next)) != NULL)
		cp_id_next++;
	pp->cp_id = cp_id_next++;
	pp->cp_ncpus = 0;
	pp->cp_cpulist = NULL;
	pp->cp_attr = 0;
	klgrpset_clear(pp->cp_lgrpset);
	pp->cp_kp_queue.disp_maxrunpri = -1;
	pp->cp_kp_queue.disp_max_unbound_pri = -1;
	pp->cp_kp_queue.disp_cpu = NULL;
	pp->cp_gen = 0;
	DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock);
	*psid = CPTOPS(pp->cp_id);
	disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris);
	cpupart_kstat_create(pp);
	cpupart_lpl_initialize(pp);

	bitset_init(&pp->cp_cmt_pgs);

	/*
	 * Initialize and size the partition's bitset of halted CPUs.
	 */
	bitset_init_fanout(&pp->cp_haltset, cp_haltset_fanout);
	bitset_resize(&pp->cp_haltset, max_ncpus);

	/*
	 * Pause all CPUs while changing the partition list, to make sure
	 * the clock thread (which traverses the list without holding
	 * cpu_lock) isn't running.
	 */
	pause_cpus(NULL);
	pp->cp_next = cp_list_head;
	pp->cp_prev = cp_list_head->cp_prev;
	cp_list_head->cp_prev->cp_next = pp;
	cp_list_head->cp_prev = pp;
	start_cpus();
	mutex_exit(&cpu_lock);

	return (0);
}
Esempio n. 14
0
static int
cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
{
	cpupart_t *oldpp;
	cpu_t	*ncp, *newlist;
	kthread_t *t;
	int	move_threads = 1;
	lgrp_id_t lgrpid;
	proc_t 	*p;
	int lgrp_diff_lpl;
	lpl_t	*cpu_lpl;
	int	ret;
	boolean_t unbind_all_threads = (forced != 0);

	ASSERT(MUTEX_HELD(&cpu_lock));
	ASSERT(newpp != NULL);

	oldpp = cp->cpu_part;
	ASSERT(oldpp != NULL);
	ASSERT(oldpp->cp_ncpus > 0);

	if (newpp == oldpp) {
		/*
		 * Don't need to do anything.
		 */
		return (0);
	}

	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);

	if (!disp_bound_partition(cp, 0)) {
		/*
		 * Don't need to move threads if there are no threads in
		 * the partition.  Note that threads can't enter the
		 * partition while we're holding cpu_lock.
		 */
		move_threads = 0;
	} else if (oldpp->cp_ncpus == 1) {
		/*
		 * The last CPU is removed from a partition which has threads
		 * running in it. Some of these threads may be bound to this
		 * CPU.
		 *
		 * Attempt to unbind threads from the CPU and from the processor
		 * set. Note that no threads should be bound to this CPU since
		 * cpupart_move_threads will refuse to move bound threads to
		 * other CPUs.
		 */
		(void) cpu_unbind(oldpp->cp_cpulist->cpu_id, B_FALSE);
		(void) cpupart_unbind_threads(oldpp, B_FALSE);

		if (!disp_bound_partition(cp, 0)) {
			/*
			 * No bound threads in this partition any more
			 */
			move_threads = 0;
		} else {
			/*
			 * There are still threads bound to the partition
			 */
			cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
			return (EBUSY);
		}
	}

	/*
	 * If forced flag is set unbind any threads from this CPU.
	 * Otherwise unbind soft-bound threads only.
	 */
	if ((ret = cpu_unbind(cp->cpu_id, unbind_all_threads)) != 0) {
		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
		return (ret);
	}

	/*
	 * Stop further threads weak binding to this cpu.
	 */
	cpu_inmotion = cp;
	membar_enter();

	/*
	 * Notify the Processor Groups subsystem that the CPU
	 * will be moving cpu partitions. This is done before
	 * CPUs are paused to provide an opportunity for any
	 * needed memory allocations.
	 */
	pg_cpupart_out(cp, oldpp);
	pg_cpupart_in(cp, newpp);

again:
	if (move_threads) {
		int loop_count;
		/*
		 * Check for threads strong or weak bound to this CPU.
		 */
		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
			if (loop_count >= 5) {
				cpu_state_change_notify(cp->cpu_id,
				    CPU_CPUPART_IN);
				pg_cpupart_out(cp, newpp);
				pg_cpupart_in(cp, oldpp);
				cpu_inmotion = NULL;
				return (EBUSY);	/* some threads still bound */
			}
			delay(1);
		}
	}

	/*
	 * Before we actually start changing data structures, notify
	 * the cyclic subsystem that we want to move this CPU out of its
	 * partition.
	 */
	if (!cyclic_move_out(cp)) {
		/*
		 * This CPU must be the last CPU in a processor set with
		 * a bound cyclic.
		 */
		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
		pg_cpupart_out(cp, newpp);
		pg_cpupart_in(cp, oldpp);
		cpu_inmotion = NULL;
		return (EBUSY);
	}

	pause_cpus(cp);

	if (move_threads) {
		/*
		 * The thread on cpu before the pause thread may have read
		 * cpu_inmotion before we raised the barrier above.  Check
		 * again.
		 */
		if (disp_bound_threads(cp, 1)) {
			start_cpus();
			goto again;
		}

	}

	/*
	 * Now that CPUs are paused, let the PG subsystem perform
	 * any necessary data structure updates.
	 */
	pg_cpupart_move(cp, oldpp, newpp);

	/* save this cpu's lgroup -- it'll be the same in the new partition */
	lgrpid = cp->cpu_lpl->lpl_lgrpid;

	cpu_lpl = cp->cpu_lpl;
	/*
	 * let the lgroup framework know cp has left the partition
	 */
	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);

	/* move out of old partition */
	oldpp->cp_ncpus--;
	if (oldpp->cp_ncpus > 0) {

		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
		if (oldpp->cp_cpulist == cp) {
			oldpp->cp_cpulist = ncp;
		}
	} else {
		ncp = oldpp->cp_cpulist = NULL;
		cp_numparts_nonempty--;
		ASSERT(cp_numparts_nonempty != 0);
	}
	oldpp->cp_gen++;

	/* move into new partition */
	newlist = newpp->cp_cpulist;
	if (newlist == NULL) {
		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
		cp_numparts_nonempty++;
		ASSERT(cp_numparts_nonempty != 0);
	} else {
		cp->cpu_next_part = newlist;
		cp->cpu_prev_part = newlist->cpu_prev_part;
		newlist->cpu_prev_part->cpu_next_part = cp;
		newlist->cpu_prev_part = cp;
	}
	cp->cpu_part = newpp;
	newpp->cp_ncpus++;
	newpp->cp_gen++;

	ASSERT(bitset_is_null(&newpp->cp_haltset));
	ASSERT(bitset_is_null(&oldpp->cp_haltset));

	/*
	 * let the lgroup framework know cp has entered the partition
	 */
	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);

	/*
	 * If necessary, move threads off processor.
	 */
	if (move_threads) {
		ASSERT(ncp != NULL);

		/*
		 * Walk thru the active process list to look for
		 * threads that need to have a new home lgroup,
		 * or the last CPU they run on is the same CPU
		 * being moved out of the partition.
		 */

		for (p = practive; p != NULL; p = p->p_next) {

			t = p->p_tlist;

			if (t == NULL)
				continue;

			lgrp_diff_lpl = 0;

			do {

				ASSERT(t->t_lpl != NULL);

				/*
				 * Update the count of how many threads are
				 * in this CPU's lgroup but have a different lpl
				 */

				if (t->t_lpl != cpu_lpl &&
				    t->t_lpl->lpl_lgrpid == lgrpid)
					lgrp_diff_lpl++;
				/*
				 * If the lgroup that t is assigned to no
				 * longer has any CPUs in t's partition,
				 * we'll have to choose a new lgroup for t.
				 */

				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
				    t->t_cpupart)) {
					lgrp_move_thread(t,
					    lgrp_choose(t, t->t_cpupart), 0);
				}

				/*
				 * make sure lpl points to our own partition
				 */
				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
				    t->t_cpupart->cp_nlgrploads));

				ASSERT(t->t_lpl->lpl_ncpu > 0);

				/* Update CPU last ran on if it was this CPU */
				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
				    t->t_bound_cpu != cp) {
					t->t_cpu = disp_lowpri_cpu(ncp,
					    t->t_lpl, t->t_pri, NULL);
				}
				t = t->t_forw;
			} while (t != p->p_tlist);

			/*
			 * Didn't find any threads in the same lgroup as this
			 * CPU with a different lpl, so remove the lgroup from
			 * the process lgroup bitmask.
			 */

			if (lgrp_diff_lpl)
				klgrpset_del(p->p_lgrpset, lgrpid);
		}

		/*
		 * Walk thread list looking for threads that need to be
		 * rehomed, since there are some threads that are not in
		 * their process's p_tlist.
		 */

		t = curthread;

		do {
			ASSERT(t != NULL && t->t_lpl != NULL);

			/*
			 * If the lgroup that t is assigned to no
			 * longer has any CPUs in t's partition,
			 * we'll have to choose a new lgroup for t.
			 * Also, choose best lgroup for home when
			 * thread has specified lgroup affinities,
			 * since there may be an lgroup with more
			 * affinity available after moving CPUs
			 * around.
			 */
			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
			    t->t_cpupart) || t->t_lgrp_affinity) {
				lgrp_move_thread(t,
				    lgrp_choose(t, t->t_cpupart), 1);
			}

			/* make sure lpl points to our own partition */
			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
			    t->t_cpupart->cp_nlgrploads));

			ASSERT(t->t_lpl->lpl_ncpu > 0);

			/* Update CPU last ran on if it was this CPU */
			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
			    t->t_bound_cpu != cp) {
				t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl,
				    t->t_pri, NULL);
			}

			t = t->t_next;
		} while (t != curthread);

		/*
		 * Clear off the CPU's run queue, and the kp queue if the
		 * partition is now empty.
		 */
		disp_cpu_inactive(cp);

		/*
		 * Make cp switch to a thread from the new partition.
		 */
		cp->cpu_runrun = 1;
		cp->cpu_kprunrun = 1;
	}

	cpu_inmotion = NULL;
	start_cpus();

	/*
	 * Let anyone interested know that cpu has been added to the set.
	 */
	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);

	/*
	 * Now let the cyclic subsystem know that it can reshuffle cyclics
	 * bound to the new processor set.
	 */
	cyclic_move_in(cp);

	return (0);
}
Esempio n. 15
0
/*
 * Top level routine to direct suspend/resume of a domain.
 */
void
xen_suspend_domain(void)
{
	extern void rtcsync(void);
	extern void ec_resume(void);
	extern kmutex_t ec_lock;
	struct xen_add_to_physmap xatp;
	ulong_t flags;
	int err;

	cmn_err(CE_NOTE, "Domain suspending for save/migrate");

	SUSPEND_DEBUG("xen_suspend_domain\n");

	/*
	 * We only want to suspend the PV devices, since the emulated devices
	 * are suspended by saving the emulated device state.  The PV devices
	 * are all children of the xpvd nexus device.  So we search the
	 * device tree for the xpvd node to use as the root of the tree to
	 * be suspended.
	 */
	if (xpvd_dip == NULL)
		ddi_walk_devs(ddi_root_node(), check_xpvd, NULL);

	/*
	 * suspend interrupts and devices
	 */
	if (xpvd_dip != NULL)
		(void) xen_suspend_devices(ddi_get_child(xpvd_dip));
	else
		cmn_err(CE_WARN, "No PV devices found to suspend");
	SUSPEND_DEBUG("xenbus_suspend\n");
	xenbus_suspend();

	mutex_enter(&cpu_lock);

	/*
	 * Suspend on vcpu 0
	 */
	thread_affinity_set(curthread, 0);
	kpreempt_disable();

	if (ncpus > 1)
		pause_cpus(NULL, NULL);
	/*
	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
	 * any holder would have dropped it to get through pause_cpus().
	 */
	mutex_enter(&ec_lock);

	/*
	 * From here on in, we can't take locks.
	 */

	flags = intr_clear();

	SUSPEND_DEBUG("HYPERVISOR_suspend\n");
	/*
	 * At this point we suspend and sometime later resume.
	 * Note that this call may return with an indication of a cancelled
	 * for now no matter ehat the return we do a full resume of all
	 * suspended drivers, etc.
	 */
	(void) HYPERVISOR_shutdown(SHUTDOWN_suspend);

	/*
	 * Point HYPERVISOR_shared_info to the proper place.
	 */
	xatp.domid = DOMID_SELF;
	xatp.idx = 0;
	xatp.space = XENMAPSPACE_shared_info;
	xatp.gpfn = xen_shared_info_frame;
	if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0)
		panic("Could not set shared_info page. error: %d", err);

	SUSPEND_DEBUG("gnttab_resume\n");
	gnttab_resume();

	SUSPEND_DEBUG("ec_resume\n");
	ec_resume();

	intr_restore(flags);

	if (ncpus > 1)
		start_cpus();

	mutex_exit(&ec_lock);
	mutex_exit(&cpu_lock);

	/*
	 * Now we can take locks again.
	 */

	rtcsync();

	SUSPEND_DEBUG("xenbus_resume\n");
	xenbus_resume();
	SUSPEND_DEBUG("xen_resume_devices\n");
	if (xpvd_dip != NULL)
		(void) xen_resume_devices(ddi_get_child(xpvd_dip), 0);

	thread_affinity_clear(curthread);
	kpreempt_enable();

	SUSPEND_DEBUG("finished xen_suspend_domain\n");

	cmn_err(CE_NOTE, "domain restore/migrate completed");
}