Beispiel #1
0
static void
stop_other_cpus(void)
{
	ulong_t s = clear_int_flag(); /* fast way to keep CPU from changing */
	cpuset_t xcset;

	CPUSET_ALL_BUT(xcset, CPU->cpu_id);
	xc_priority(0, 0, 0, CPUSET2BV(xcset), (xc_func_t)mach_cpu_halt);
	restore_int_flag(s);
}
Beispiel #2
0
int
sysp_ischar()
{
	int i;
	ulong_t s;

	if (cons_polledio == NULL ||
	    cons_polledio->cons_polledio_ischar == NULL)
		return (0);

	s = clear_int_flag();
	i = cons_polledio->cons_polledio_ischar(
	    cons_polledio->cons_polledio_argument);
	restore_int_flag(s);
	return (i);
}
Beispiel #3
0
void
sysp_putchar(int c)
{
	ulong_t s;

	/*
	 * We have no alternative but to drop the output on the floor.
	 */
	if (cons_polledio == NULL ||
	    cons_polledio->cons_polledio_putchar == NULL)
		return;

	s = clear_int_flag();
	cons_polledio->cons_polledio_putchar(
	    cons_polledio->cons_polledio_argument, c);
	restore_int_flag(s);
}
Beispiel #4
0
int
sysp_getchar()
{
	int i;
	ulong_t s;

	if (cons_polledio == NULL) {
		/* Uh oh */
		prom_printf("getchar called with no console\n");
		for (;;)
			/* LOOP FOREVER */;
	}

	s = clear_int_flag();
	i = cons_polledio->cons_polledio_getchar(
	    cons_polledio->cons_polledio_argument);
	restore_int_flag(s);
	return (i);
}
/*
 * Called by a CPU which has just been onlined.  It is expected that the CPU
 * performing the online operation will call tsc_sync_master().
 *
 * TSC sync is disabled in the context of virtualization. See comments
 * above tsc_sync_master.
 */
void
tsc_sync_slave(void)
{
	ulong_t flags;
	hrtime_t s1;
	tsc_sync_t *tsc = tscp;
	int cnt;
	int hwtype;

	hwtype = get_hwenv();
	if (!tsc_master_slave_sync_needed || hwtype == HW_XEN_HVM ||
	    hwtype == HW_VMWARE)
		return;

	flags = clear_int_flag();

	for (cnt = 0; cnt < SYNC_ITERATIONS; cnt++) {
		/* Re-fill the cache line */
		s1 = tsc->master_tsc;
		membar_enter();
		tsc_sync_go = TSC_SYNC_GO;
		do {
			/*
			 * Do not put an SMT_PAUSE here. For instance,
			 * if the master and slave are really the same
			 * hyper-threaded CPU, then you want the master
			 * to yield to the slave as quickly as possible here,
			 * but not the other way.
			 */
			s1 = tsc_read();
		} while (tsc->master_tsc == 0);
		tsc->slave_tsc = s1;
		membar_enter();
		tsc_sync_go = TSC_SYNC_DONE;

		while (tsc_sync_go != TSC_SYNC_STOP)
			SMT_PAUSE();
	}

	restore_int_flag(flags);
}
hrtime_t
tsc_gethrtimeunscaled_delta(void)
{
	hrtime_t hrt;
	ulong_t flags;

	/*
	 * Similarly to tsc_gethrtime_delta, we need to disable preemption
	 * to prevent migration between the call to tsc_gethrtimeunscaled
	 * and adding the CPU's hrtime delta. Note that disabling and
	 * reenabling preemption is forbidden here because we may be in the
	 * middle of a fast trap. In the amd64 kernel we cannot tolerate
	 * preemption during a fast trap. See _update_sregs().
	 */

	flags = clear_int_flag();
	hrt = tsc_gethrtimeunscaled() + tsc_sync_tick_delta[CPU->cpu_id];
	restore_int_flag(flags);

	return (hrt);
}
hrtime_t
tsc_gethrtime_delta(void)
{
	uint32_t old_hres_lock;
	hrtime_t tsc, hrt;
	ulong_t flags;

	do {
		old_hres_lock = hres_lock;

		/*
		 * We need to disable interrupts here to assure that we
		 * don't migrate between the call to tsc_read() and
		 * adding the CPU's TSC tick delta. Note that disabling
		 * and reenabling preemption is forbidden here because
		 * we may be in the middle of a fast trap. In the amd64
		 * kernel we cannot tolerate preemption during a fast
		 * trap. See _update_sregs().
		 */

		flags = clear_int_flag();
		tsc = tsc_read() + tsc_sync_tick_delta[CPU->cpu_id];
		restore_int_flag(flags);

		/* See comments in tsc_gethrtime() above */

		if (tsc >= tsc_last) {
			tsc -= tsc_last;
		} else if (tsc >= tsc_last - 2 * tsc_max_delta) {
			tsc = 0;
		}

		hrt = tsc_hrtime_base;

		TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale);
	} while ((old_hres_lock & ~1) != hres_lock);

	return (hrt);
}
void
tsc_hrtimeinit(uint64_t cpu_freq_hz)
{
	extern int gethrtime_hires;
	longlong_t tsc;
	ulong_t flags;

	/*
	 * cpu_freq_hz is the measured cpu frequency in hertz
	 */

	/*
	 * We can't accommodate CPUs slower than 31.25 MHz.
	 */
	ASSERT(cpu_freq_hz > NANOSEC / (1 << NSEC_SHIFT));
	nsec_scale =
	    (uint_t)(((uint64_t)NANOSEC << (32 - NSEC_SHIFT)) / cpu_freq_hz);
	nsec_unscale =
	    (uint_t)(((uint64_t)cpu_freq_hz << (32 - NSEC_SHIFT)) / NANOSEC);

	flags = clear_int_flag();
	tsc = tsc_read();
	(void) tsc_gethrtime();
	tsc_max_delta = tsc_read() - tsc;
	restore_int_flag(flags);
	gethrtimef = tsc_gethrtime;
	gethrtimeunscaledf = tsc_gethrtimeunscaled;
	scalehrtimef = tsc_scalehrtime;
	unscalehrtimef = tsc_unscalehrtime;
	hrtime_tick = tsc_tick;
	gethrtime_hires = 1;
	/*
	 * Allocate memory for the structure used in the tsc sync logic.
	 * This structure should be aligned on a multiple of cache line size.
	 */
	tscp = kmem_zalloc(PAGESIZE, KM_SLEEP);
}
void
microfind(void)
{
	uint64_t max, count = MICROCOUNT;

	/*
	 * The algorithm tries to guess a loop count for tenmicrosec such
	 * that found will be 0xf000 PIT counts, but because it is only a
	 * rough guess there is no guarantee that tenmicrosec will take
	 * exactly 0xf000 PIT counts. min is set initially to 0xe000 and
	 * represents the number of PIT counts that must elapse in
	 * tenmicrosec for microfind to calculate the correct loop count for
	 * tenmicrosec. The algorith will successively set count to better
	 * approximations until the number of PIT counts elapsed are greater
	 * than min. Ideally the first guess should be correct, but as cpu's
	 * become faster MICROCOUNT may have to be increased to ensure
	 * that the first guess for count is correct. There is no harm
	 * leaving MICRCOUNT at 0x2000, the results will be correct, it just
	 * may take longer to calculate the correct value for the loop
	 * count used by tenmicrosec. In some cases min may be reset as the
	 * algorithm progresses in order to facilitate faster cpu's.
	 */
	unsigned long found, min = 0xe000;
	ulong_t s;
	unsigned char status;

	s = clear_int_flag();		/* disable interrupts */

	/*CONSTCOND*/
	while (1) {

		/*
		 * microdata is the loop count used in tenmicrosec. The first
		 * time around microdata is set to 1 to make tenmicrosec
		 * return quickly. The purpose of this while loop is to
		 * warm the cache for the next time around when the number
		 * of PIT counts are measured.
		 */
		microdata = 1;

		/*CONSTCOND*/
		while (1) {
			/* Put counter 0 in mode 0 */
			outb(PITCTL_PORT, PIT_LOADMODE);
			/* output a count of -1 to counter 0 */
			outb(PITCTR0_PORT, 0xff);
			outb(PITCTR0_PORT, 0xff);
			tenmicrosec();

			/* READ BACK counter 0 to latch status and count */
			outb(PITCTL_PORT, PIT_READBACK|PIT_READBACKC0);

			/* Read status of counter 0 */
			status = inb(PITCTR0_PORT);

			/* Read the value left in the counter */
			found = inb(PITCTR0_PORT) | (inb(PITCTR0_PORT) << 8);

			if (microdata != 1)
				break;

			microdata = count;
		}

		/* verify that the counter began the count-down */
		if (status & (1 << PITSTAT_NULLCNT)) {
			/* microdata is too small */
			count = count << 1;

			/*
			 * If the cpu is so fast that it cannot load the
			 * counting element of the PIT with a very large
			 * value for the loop used in tenmicrosec, then
			 * the algorithm will not work for this cpu.
			 * It is very unlikely there will ever be such
			 * an x86.
			 */
			if (count > 0x100000000)
				panic("microfind: cpu is too fast");

			continue;
		}

		/* verify that the counter did not wrap around */
		if (status & (1 << PITSTAT_OUTPUT)) {
			/*
			 * microdata is too large. Since there are counts
			 * that would have been appropriate for the PIT
			 * not to wrap on even a lowly AT, count will never
			 * decrease to 1.
			 */
			count = count >> 1;
			continue;
		}

		/* mode 0 is an n + 1 counter */
		found = 0x10000 - found;
		if (found > min)
			break;

		/* verify that the cpu is slow enough to count to 0xf000 */
		count *= 0xf000;
		max = 0x100000001 * found;

		/*
		 * It is possible that at some point cpu's will become
		 * sufficiently fast such that the PIT will not be able to
		 * count to 0xf000 within the maximum loop count used in
		 * tenmicrosec. In that case the loop count in tenmicrosec
		 * may be set to the maximum value because it is unlikely
		 * that the cpu will be so fast that tenmicrosec with the
		 * maximum loop count will take more than ten microseconds.
		 * If the cpu is indeed too fast for the current
		 * implementation of tenmicrosec, then there is code below
		 * intended to catch that situation.
		 */
		if (count >= max) {
			/* cpu is fast, just make it count as high it can */
			count = 0x100000000;
			min = 0;
			continue;
		}

		/*
		 * Count in the neighborhood of 0xf000 next time around
		 * There is no risk of dividing by zero since found is in the
		 * range of 0x1 to 0x1000.
		 */
		count = count / found;
	}
/*
 * Called by the master in the TSC sync operation (usually the boot CPU).
 * If the slave is discovered to have a skew, gethrtimef will be changed to
 * point to tsc_gethrtime_delta(). Calculating skews is precise only when
 * the master and slave TSCs are read simultaneously; however, there is no
 * algorithm that can read both CPUs in perfect simultaneity. The proposed
 * algorithm is an approximate method based on the behaviour of cache
 * management. The slave CPU continuously reads TSC and then reads a global
 * variable which the master CPU updates. The moment the master's update reaches
 * the slave's visibility (being forced by an mfence operation) we use the TSC
 * reading taken on the slave. A corresponding TSC read will be taken on the
 * master as soon as possible after finishing the mfence operation. But the
 * delay between causing the slave to notice the invalid cache line and the
 * competion of mfence is not repeatable. This error is heuristically assumed
 * to be 1/4th of the total write time as being measured by the two TSC reads
 * on the master sandwiching the mfence. Furthermore, due to the nature of
 * bus arbitration, contention on memory bus, etc., the time taken for the write
 * to reflect globally can vary a lot. So instead of taking a single reading,
 * a set of readings are taken and the one with least write time is chosen
 * to calculate the final skew.
 *
 * TSC sync is disabled in the context of virtualization because the CPUs
 * assigned to the guest are virtual CPUs which means the real CPUs on which
 * guest runs keep changing during life time of guest OS. So we would end up
 * calculating TSC skews for a set of CPUs during boot whereas the guest
 * might migrate to a different set of physical CPUs at a later point of
 * time.
 */
void
tsc_sync_master(processorid_t slave)
{
	ulong_t flags, source, min_write_time = ~0UL;
	hrtime_t write_time, x, mtsc_after, tdelta;
	tsc_sync_t *tsc = tscp;
	int cnt;
	int hwtype;

	hwtype = get_hwenv();
	if (!tsc_master_slave_sync_needed || hwtype == HW_XEN_HVM ||
	    hwtype == HW_VMWARE)
		return;

	flags = clear_int_flag();
	source = CPU->cpu_id;

	for (cnt = 0; cnt < SYNC_ITERATIONS; cnt++) {
		while (tsc_sync_go != TSC_SYNC_GO)
			SMT_PAUSE();

		tsc->master_tsc = tsc_read();
		membar_enter();
		mtsc_after = tsc_read();
		while (tsc_sync_go != TSC_SYNC_DONE)
			SMT_PAUSE();
		write_time =  mtsc_after - tsc->master_tsc;
		if (write_time <= min_write_time) {
			min_write_time = write_time;
			/*
			 * Apply heuristic adjustment only if the calculated
			 * delta is > 1/4th of the write time.
			 */
			x = tsc->slave_tsc - mtsc_after;
			if (x < 0)
				x = -x;
			if (x > (min_write_time/4))
				/*
				 * Subtract 1/4th of the measured write time
				 * from the master's TSC value, as an estimate
				 * of how late the mfence completion came
				 * after the slave noticed the cache line
				 * change.
				 */
				tdelta = tsc->slave_tsc -
				    (mtsc_after - (min_write_time/4));
			else
				tdelta = tsc->slave_tsc - mtsc_after;
			tsc_sync_tick_delta[slave] =
			    tsc_sync_tick_delta[source] - tdelta;
		}

		tsc->master_tsc = tsc->slave_tsc = write_time = 0;
		membar_enter();
		tsc_sync_go = TSC_SYNC_STOP;
	}
	if (tdelta < 0)
		tdelta = -tdelta;
	if (tdelta > largest_tsc_delta)
		largest_tsc_delta = tdelta;
	if (min_write_time < shortest_write_time)
		shortest_write_time = min_write_time;
	/*
	 * Enable delta variants of tsc functions if the largest of all chosen
	 * deltas is > smallest of the write time.
	 */
	if (largest_tsc_delta > shortest_write_time) {
		gethrtimef = tsc_gethrtime_delta;
		gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
	}
	restore_int_flag(flags);
}
/*
 * This is similar to the above, but it cannot actually spin on hres_lock.
 * As a result, it caches all of the variables it needs; if the variables
 * don't change, it's done.
 */
hrtime_t
dtrace_gethrtime(void)
{
	uint32_t old_hres_lock;
	hrtime_t tsc, hrt;
	ulong_t flags;

	do {
		old_hres_lock = hres_lock;

		/*
		 * Interrupts are disabled to ensure that the thread isn't
		 * migrated between the tsc_read() and adding the CPU's
		 * TSC tick delta.
		 */
		flags = clear_int_flag();

		tsc = tsc_read();

		if (gethrtimef == tsc_gethrtime_delta)
			tsc += tsc_sync_tick_delta[CPU->cpu_id];

		restore_int_flag(flags);

		/*
		 * See the comments in tsc_gethrtime(), above.
		 */
		if (tsc >= tsc_last)
			tsc -= tsc_last;
		else if (tsc >= tsc_last - 2*tsc_max_delta)
			tsc = 0;

		hrt = tsc_hrtime_base;

		TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale);

		if ((old_hres_lock & ~1) == hres_lock)
			break;

		/*
		 * If we're here, the clock lock is locked -- or it has been
		 * unlocked and locked since we looked.  This may be due to
		 * tsc_tick() running on another CPU -- or it may be because
		 * some code path has ended up in dtrace_probe() with
		 * CLOCK_LOCK held.  We'll try to determine that we're in
		 * the former case by taking another lap if the lock has
		 * changed since when we first looked at it.
		 */
		if (old_hres_lock != hres_lock)
			continue;

		/*
		 * So the lock was and is locked.  We'll use the old data
		 * instead.
		 */
		old_hres_lock = shadow_hres_lock;

		/*
		 * Again, disable interrupts to ensure that the thread
		 * isn't migrated between the tsc_read() and adding
		 * the CPU's TSC tick delta.
		 */
		flags = clear_int_flag();

		tsc = tsc_read();

		if (gethrtimef == tsc_gethrtime_delta)
			tsc += tsc_sync_tick_delta[CPU->cpu_id];

		restore_int_flag(flags);

		/*
		 * See the comments in tsc_gethrtime(), above.
		 */
		if (tsc >= shadow_tsc_last)
			tsc -= shadow_tsc_last;
		else if (tsc >= shadow_tsc_last - 2 * tsc_max_delta)
			tsc = 0;

		hrt = shadow_tsc_hrtime_base;

		TSC_CONVERT_AND_ADD(tsc, hrt, shadow_nsec_scale);
	} while ((old_hres_lock & ~1) != shadow_hres_lock);

	return (hrt);
}