Example #1
0
/*
 * update the percpu scd from the raw @now value
 *
 *  - filter out backward motion
 *  - use the GTOD tick value to create a window to filter crazy TSC values
 */
static u64 sched_clock_local(struct sched_clock_data *scd)
{
	u64 now, clock, old_clock, min_clock, max_clock;
	s64 delta;

again:
	now = sched_clock();
	delta = now - scd->tick_raw;
	if (unlikely(delta < 0))
		delta = 0;

	old_clock = scd->clock;

	/*
	 * scd->clock = clamp(scd->tick_gtod + delta,
	 *		      max(scd->tick_gtod, scd->clock),
	 *		      scd->tick_gtod + TICK_NSEC);
	 */

	clock = scd->tick_gtod + delta;
	min_clock = wrap_max(scd->tick_gtod, old_clock);
	max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);

	clock = wrap_max(clock, min_clock);
	clock = wrap_min(clock, max_clock);

	if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock)
		goto again;

	return clock;
}
Example #2
0
static u64 sched_clock_remote(struct sched_clock_data *scd)
{
	struct sched_clock_data *my_scd = this_scd();
	u64 this_clock, remote_clock;
	u64 *ptr, old_val, val;

	sched_clock_local(my_scd);
again:
	this_clock = my_scd->clock;
	remote_clock = scd->clock;

	
	if (likely((s64)(remote_clock - this_clock) < 0)) {
		ptr = &scd->clock;
		old_val = remote_clock;
		val = this_clock;
	} else {
		
		ptr = &my_scd->clock;
		old_val = this_clock;
		val = remote_clock;
	}

	if (cmpxchg64(ptr, old_val, val) != old_val)
		goto again;

	return val;
}
static u64 sched_clock_remote(struct sched_clock_data *scd)
{
	struct sched_clock_data *my_scd = this_scd();
	u64 this_clock, remote_clock;
	u64 *ptr, old_val, val;

	sched_clock_local(my_scd);
again:
	this_clock = my_scd->clock;
	remote_clock = scd->clock;

	/*
	 * Use the opportunity that we have both locks
	 * taken to couple the two clocks: we take the
	 * larger time as the latest time for both
	 * runqueues. (this creates monotonic movement)
	 */
	if (likely((s64)(remote_clock - this_clock) < 0)) {
		ptr = &scd->clock;
		old_val = remote_clock;
		val = this_clock;
	} else {
		/*
		 * Should be rare, but possible:
		 */
		ptr = &my_scd->clock;
		old_val = this_clock;
		val = remote_clock;
	}

	if (cmpxchg64(ptr, old_val, val) != old_val)
		goto again;

	return val;
}
static unsigned int cpufreq_get_load(struct cpufreq_policy *policy,
			unsigned int cpu, unsigned int *gpu_block_load)
{
	u64 delta_gpu_block_time;
	u64 tmp_block_start;
#else
static unsigned int cpufreq_get_load(struct cpufreq_policy *policy,
				     unsigned int cpu)
{
#endif
	u64 tsc;
	u64 total_active_tsc = 0;
	u64 delta_tsc, delta_active_tsc;
	u64 load;
	u64 tmp;
	unsigned int j;
	struct per_cpu_t *this_cpu;
	struct per_cpu_t *pcpu;
	struct per_physical_core_t *pphycore = NULL;
	int phycore_id;
	u64 *phycore_start;

	phycore_id = phy_core_id(cpu);
	pphycore = &per_cpu(pphycore_counts, phycore_id);
	phycore_start = &(pphycore->active_start_tsc);
	this_cpu = &per_cpu(pcpu_counts, cpu);
	rdtscll(tsc);
	delta_tsc = tsc - this_cpu->tsc;

	/*
	 * if this sampling occurs at the same time when all logical cores
	 * enter idle, they may compete to access the active tsc and shared
	 * active_start_tsc. To solve the issue, we use cmpxchg to make sure
	 * only one can do it.
	 */
	tmp = *phycore_start;
	if (!phy_core_idle(pphycore->busy_mask)) {
		if (tmp == *phycore_start && tmp ==
		    cmpxchg64(phycore_start, tmp, tsc)) {
			if (tsc > tmp)
				this_cpu->active_tsc += tsc - tmp;
			pphycore->accum_flag = 1;
		}
	}

	/*
	 * To compute the load of physical core, we need to sum all the
	 * active time accumulated by siblings in the physical core.
	 */
	for_each_cpu(j, cpu_sibling_mask(cpu)) {
		pcpu = &per_cpu(pcpu_counts, j);
		total_active_tsc += pcpu->active_tsc;
	}
Example #5
0
static u64 sched_clock_remote(struct sched_clock_data *scd)
{
	struct sched_clock_data *my_scd = this_scd();
	u64 this_clock, remote_clock;
	u64 *ptr, old_val, val;

#if BITS_PER_LONG != 64
again:
	/*
	 * Careful here: The local and the remote clock values need to
	 * be read out atomic as we need to compare the values and
	 * then update either the local or the remote side. So the
	 * cmpxchg64 below only protects one readout.
	 *
	 * We must reread via sched_clock_local() in the retry case on
	 * 32bit as an NMI could use sched_clock_local() via the
	 * tracer and hit between the readout of
	 * the low32bit and the high 32bit portion.
	 */
	this_clock = sched_clock_local(my_scd);
	/*
	 * We must enforce atomic readout on 32bit, otherwise the
	 * update on the remote cpu can hit inbetween the readout of
	 * the low32bit and the high 32bit portion.
	 */
	remote_clock = cmpxchg64(&scd->clock, 0, 0);
#else
	/*
	 * On 64bit the read of [my]scd->clock is atomic versus the
	 * update, so we can avoid the above 32bit dance.
	 */
	sched_clock_local(my_scd);
again:
	this_clock = my_scd->clock;
	remote_clock = scd->clock;
#endif

	/*
	 * Use the opportunity that we have both locks
	 * taken to couple the two clocks: we take the
	 * larger time as the latest time for both
	 * runqueues. (this creates monotonic movement)
	 */
	if (likely((s64)(remote_clock - this_clock) < 0)) {
		ptr = &scd->clock;
		old_val = remote_clock;
		val = this_clock;
	} else {
		/*
		 * Should be rare, but possible:
		 */
		ptr = &my_scd->clock;
		old_val = this_clock;
		val = remote_clock;
	}

	if (cmpxchg64(ptr, old_val, val) != old_val)
		goto again;

	return val;
}
/*
 * Update the active time when CPU enter/exit idle. Physical core is active
 * when at least one logical core is active, physical core is idle when all
 * the logical cores are idle. So the physical core active tsc is the time
 * period between the first logical core exit idle and the time all the
 * logical cores enter idle. The active period is updated at the time all
 * the logical cores enter idle. Physical core active time is used for
 * computing load of physical core.
 */
void update_cpu_active_tsc(int cpu, int enter_idle)
{
	u64 tsc;
	u64 old;
	struct per_cpu_t *pcpu = NULL;
	struct per_physical_core_t *pphycore = NULL;
	int phycore_id;
	u64 *phycore_start;

	phycore_id = phy_core_id(cpu);
	pphycore = &per_cpu(pphycore_counts, phycore_id);
	phycore_start = &(pphycore->active_start_tsc);

	if (enter_idle) {
		/* get the TSC at the time */
		rdtscll(tsc);
		old = *phycore_start;
		/* set the cpu's byte in its physical mask */
		set_cpu_idle(cpu, &(pphycore->busy_mask));
		/*
		 * update the active time only when all the siblings in the
		 * physical core are idle. To avoid simulataneous access by
		 * siblings, we use cmpxchg here.
		 */
		if (phy_core_idle(pphycore->busy_mask)) {
			if (old == *phycore_start &&
			    old == cmpxchg64(phycore_start, old, tsc)) {
				pcpu = &per_cpu(pcpu_counts, cpu);
				if (tsc > old)
					pcpu->active_tsc += tsc - old;
#ifdef CONFIG_COUNT_GPU_BLOCKING_TIME
				/*
				 * If current physical core is blocked by GPU,
				 * record entering idle tsc as the start of a
				 * time slice blocked on GPU.
				 */
				if (atomic_read(&pphycore->wait_for_gpu_count)
				    > 0)
					pphycore->gpu_block_start_tsc = tsc;
#endif
				pphycore->accum_flag = 1;
			}
		}
	} else { /* exit idle */
		/*
		 * the active period is counted at the time the first core
		 * exits idle. We use cmpxchg to avoid confliction.
		 */
		rdtscll(tsc);
		if (phy_core_idle(pphycore->busy_mask)) {
			if (1 == cmpxchg64(&(pphycore->accum_flag), 1, 0)) {
				*phycore_start = tsc;
#ifdef CONFIG_COUNT_GPU_BLOCKING_TIME
				if (atomic_read(&pphycore->wait_for_gpu_count)
				    > 0 && pphycore->gpu_block_start_tsc > 0)
					pphycore->gpu_block_time += tsc -
						pphycore->gpu_block_start_tsc;
				pphycore->gpu_block_start_tsc = 0;
#endif
			}
		}
		/* since the cpu exits idle, set its byte in mask as active */
		set_cpu_busy(cpu, &(pphycore->busy_mask));
	}
}