예제 #1
0
asmlinkage void plat_irq_dispatch(struct pt_regs *regs)
{
	unsigned int pending;

#ifdef CONFIG_SIBYTE_BCM1480_PROF
	/* Set compare to count to silence count/compare timer interrupts */
	write_c0_compare(read_c0_count());
#endif

	pending = read_c0_cause() & read_c0_status();

#ifdef CONFIG_SIBYTE_BCM1480_PROF
	if (pending & CAUSEF_IP7)	/* Cpu performance counter interrupt */
		sbprof_cpu_intr(exception_epc(regs));
	else
#endif

	if (pending & CAUSEF_IP4)
		bcm1480_timer_interrupt(regs);

#ifdef CONFIG_SMP
	else if (pending & CAUSEF_IP3)
		bcm1480_mailbox_interrupt(regs);
#endif

#ifdef CONFIG_KGDB
	else if (pending & CAUSEF_IP6)
		bcm1480_kgdb_interrupt(regs);		/* KGDB (uart 1) */
#endif

	else if (pending & CAUSEF_IP2) {
		unsigned long long mask_h, mask_l;
		unsigned long base;

		/*
		 * Default...we've hit an IP[2] interrupt, which means we've
		 * got to check the 1480 interrupt registers to figure out what
		 * to do.  Need to detect which CPU we're on, now that
		 * smp_affinity is supported.
		 */
		base = A_BCM1480_IMR_MAPPER(smp_processor_id());
		mask_h = __raw_readq(
			IOADDR(base + R_BCM1480_IMR_INTERRUPT_STATUS_BASE_H));
		mask_l = __raw_readq(
			IOADDR(base + R_BCM1480_IMR_INTERRUPT_STATUS_BASE_L));

		if (mask_h) {
			if (mask_h ^ 1)
				do_IRQ(fls64(mask_h) - 1, regs);
			else
				do_IRQ(63 + fls64(mask_l), regs);
		}
	}
}
void kbase_js_affinity_release_slot_cores(kbase_device *kbdev, int js, u64 affinity)
{
	kbasep_js_device_data *js_devdata;
	u64 cores;

	KBASE_DEBUG_ASSERT(kbdev != NULL);
	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
	js_devdata = &kbdev->js_data;

	cores = affinity;
	while (cores) {
		int bitnum = fls64(cores) - 1;
		u64 bit = 1ULL << bitnum;
		s8 cnt;

		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);

		cnt = --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);

		if (0 == cnt)
			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;

		cores &= ~bit;
	}

}
예제 #3
0
/*
 * Given the xsave area and a state inside, this function returns the
 * address of the state.
 *
 * This is the API that is called to get xstate address in either
 * standard format or compacted format of xsave area.
 *
 * Note that if there is no data for the field in the xsave buffer
 * this will return NULL.
 *
 * Inputs:
 *	xstate: the thread's storage area for all FPU data
 *	xstate_feature: state which is defined in xsave.h (e.g.
 *	XSTATE_FP, XSTATE_SSE, etc...)
 * Output:
 *	address of the state in the xsave area, or NULL if the
 *	field is not present in the xsave buffer.
 */
void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
{
	int feature_nr = fls64(xstate_feature) - 1;
	/*
	 * Do we even *have* xsave state?
	 */
	if (!boot_cpu_has(X86_FEATURE_XSAVE))
		return NULL;

	xsave = &current->thread.fpu.state.xsave;
	/*
	 * We should not ever be requesting features that we
	 * have not enabled.  Remember that pcntxt_mask is
	 * what we write to the XCR0 register.
	 */
	WARN_ONCE(!(xfeatures_mask & xstate_feature),
		  "get of unsupported state");
	/*
	 * This assumes the last 'xsave*' instruction to
	 * have requested that 'xstate_feature' be saved.
	 * If it did not, we might be seeing and old value
	 * of the field in the buffer.
	 *
	 * This can happen because the last 'xsave' did not
	 * request that this feature be saved (unlikely)
	 * or because the "init optimization" caused it
	 * to not be saved.
	 */
	if (!(xsave->header.xfeatures & xstate_feature))
		return NULL;

	return (void *)xsave + xstate_comp_offsets[feature_nr];
}
예제 #4
0
/*
 * Return whether the system supports a given xfeature.
 *
 * Also return the name of the (most advanced) feature that the caller requested:
 */
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
{
	u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;

	if (unlikely(feature_name)) {
		long xfeature_idx, max_idx;
		u64 xfeatures_print;
		/*
		 * So we use FLS here to be able to print the most advanced
		 * feature that was requested but is missing. So if a driver
		 * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the
		 * missing AVX feature - this is the most informative message
		 * to users:
		 */
		if (xfeatures_missing)
			xfeatures_print = xfeatures_missing;
		else
			xfeatures_print = xfeatures_needed;

		xfeature_idx = fls64(xfeatures_print)-1;
		max_idx = ARRAY_SIZE(xfeature_names)-1;
		xfeature_idx = min(xfeature_idx, max_idx);

		*feature_name = xfeature_names[xfeature_idx];
	}

	if (xfeatures_missing)
		return 0;

	return 1;
}
예제 #5
0
void opal_handle_events(void)
{
	__be64 events = 0;
	u64 e;

	e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask;
again:
	while (e) {
		int virq, hwirq;

		hwirq = fls64(e) - 1;
		e &= ~BIT_ULL(hwirq);

		local_irq_disable();
		virq = irq_find_mapping(opal_event_irqchip.domain, hwirq);
		if (virq) {
			irq_enter();
			generic_handle_irq(virq);
			irq_exit();
		}
		local_irq_enable();

		cond_resched();
	}
	last_outstanding_events = 0;
	if (opal_poll_events(&events) != OPAL_SUCCESS)
		return;
	e = be64_to_cpu(events) & opal_event_irqchip.mask;
	if (e)
		goto again;
}
void kbase_js_affinity_retain_slot_cores(kbase_device *kbdev, int js, u64 affinity)
{
	kbasep_js_device_data *js_devdata;
	u64 cores;

	KBASE_DEBUG_ASSERT(kbdev != NULL);
	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
	js_devdata = &kbdev->js_data;

	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) == MALI_FALSE);

	cores = affinity;
	while (cores) {
		int bitnum = fls64(cores) - 1;
		u64 bit = 1ULL << bitnum;
		s8 cnt;

		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] < BASE_JM_SUBMIT_SLOTS);

		cnt = ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);

		if (cnt == 1)
			js_devdata->runpool_irq.slot_affinities[js] |= bit;

		cores &= ~bit;
	}

}
void kbase_pm_request_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
{
	unsigned long flags;
	u64 cores;

	kbase_pm_change_state change_gpu_state = 0u;

	KBASE_DEBUG_ASSERT(kbdev != NULL);

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	cores = shader_cores;
	while (cores) {
		int bitnum = fls64(cores) - 1;
		u64 bit = 1ULL << bitnum;

		/* It should be almost impossible for this to overflow. It would require 2^32 atoms
		 * to request a particular core, which would require 2^24 contexts to submit. This 
		 * would require an amount of memory that is impossible on a 32-bit system and 
		 * extremely unlikely on a 64-bit system. */
		int cnt = ++kbdev->shader_needed_cnt[bitnum];

		if (1 == cnt) {
			kbdev->shader_needed_bitmap |= bit;
			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
		}

		cores &= ~bit;
	}

	if (tiler_required != MALI_FALSE) {
		++kbdev->tiler_needed_cnt;

		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);

		/* For tiler jobs, we must make sure that core 0 is not turned off if it's already on.
	         * However, it's safe for core 0 to be left off and turned on later whilst a tiler job
		 * is running. Hence, we don't need to update the cores state immediately. Also,
		 * attempts to turn off cores will always check the tiler_needed/inuse state first anyway.
		 *
		 * Finally, kbase_js_choose_affinity() ensures core 0 is always requested for tiler jobs
		 * anyway. Hence when there's only a tiler job in the system, this will still cause
		 * kbase_pm_update_cores_state_nolock() to be called.
		 *
		 * Note that we still need to keep track of tiler_needed/inuse_cnt, to ensure that
		 * kbase_pm_update_cores_state_nolock() can override the core availability policy and
		 * force core 0 to be powered when a tiler job is in the system. */
	}

	if (change_gpu_state) {
		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap);

		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_START, change_gpu_state);
		kbase_pm_update_cores_state_nolock(kbdev);
		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_END, change_gpu_state);
	}

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
예제 #8
0
asmlinkage void plat_irq_dispatch(void)
{
	unsigned int pending;

#ifdef CONFIG_SIBYTE_SB1250_PROF
	/* Set compare to count to silence count/compare timer interrupts */
	write_c0_compare(read_c0_count());
#endif

	/*
	 * What a pain. We have to be really careful saving the upper 32 bits
	 * of any * register across function calls if we don't want them
	 * trashed--since were running in -o32, the calling routing never saves
	 * the full 64 bits of a register across a function call.  Being the
	 * interrupt handler, we're guaranteed that interrupts are disabled
	 * during this code so we don't have to worry about random interrupts
	 * blasting the high 32 bits.
	 */

	pending = read_c0_cause() & read_c0_status() & ST0_IM;

#ifdef CONFIG_SIBYTE_SB1250_PROF
	if (pending & CAUSEF_IP7) /* Cpu performance counter interrupt */
		sbprof_cpu_intr();
	else
#endif

	if (pending & CAUSEF_IP4)
		sb1250_timer_interrupt();

#ifdef CONFIG_SMP
	else if (pending & CAUSEF_IP3)
		sb1250_mailbox_interrupt();
#endif

#ifdef CONFIG_KGDB
	else if (pending & CAUSEF_IP6)			/* KGDB (uart 1) */
		sb1250_kgdb_interrupt();
#endif

	else if (pending & CAUSEF_IP2) {
		unsigned long long mask;

		/*
		 * Default...we've hit an IP[2] interrupt, which means we've
		 * got to check the 1250 interrupt registers to figure out what
		 * to do.  Need to detect which CPU we're on, now that
		 * smp_affinity is supported.
		 */
		mask = __raw_readq(IOADDR(A_IMR_REGISTER(smp_processor_id(),
		                              R_IMR_INTERRUPT_STATUS_BASE)));
		if (mask)
			do_IRQ(fls64(mask) - 1);
		else
			spurious_interrupt();
	} else
		spurious_interrupt();
}
예제 #9
0
파일: direct.c 프로젝트: avagin/linux
u64 dma_direct_get_required_mask(struct device *dev)
{
	u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);

	if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma)
		max_dma = dev->bus_dma_mask;

	return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
}
예제 #10
0
static void recover_bitmaps(struct md_thread *thread)
{
	struct mddev *mddev = thread->mddev;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct dlm_lock_resource *bm_lockres;
	char str[64];
	int slot, ret;
	struct suspend_info *s, *tmp;
	sector_t lo, hi;

	while (cinfo->recovery_map) {
		slot = fls64((u64)cinfo->recovery_map) - 1;

		/* Clear suspend_area associated with the bitmap */
		spin_lock_irq(&cinfo->suspend_lock);
		list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
			if (slot == s->slot) {
				list_del(&s->list);
				kfree(s);
			}
		spin_unlock_irq(&cinfo->suspend_lock);

		snprintf(str, 64, "bitmap%04d", slot);
		bm_lockres = lockres_init(mddev, str, NULL, 1);
		if (!bm_lockres) {
			pr_err("md-cluster: Cannot initialize bitmaps\n");
			goto clear_bit;
		}

		ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
		if (ret) {
			pr_err("md-cluster: Could not DLM lock %s: %d\n",
					str, ret);
			goto clear_bit;
		}
		ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
		if (ret) {
			pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
			goto dlm_unlock;
		}
		if (hi > 0) {
			if (lo < mddev->recovery_cp)
				mddev->recovery_cp = lo;
			/* wake up thread to continue resync in case resync
			 * is not finished */
			if (mddev->recovery_cp != MaxSector) {
			    set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			    md_wakeup_thread(mddev->thread);
			}
		}
dlm_unlock:
		dlm_unlock_sync(bm_lockres);
clear_bit:
		lockres_free(bm_lockres);
		clear_bit(slot, &cinfo->recovery_map);
	}
}
예제 #11
0
int main(int argc, char *argv[])
{
	unsigned int i, j;

	plan_tests(64 * 64 + 2);

	ok1(fls64(0) == 0);
	ok1(dumb_fls(0) == 0);

	for (i = 0; i < 64; i++) {
		for (j = 0; j < 64; j++) {
			uint64_t val = (1ULL << i) | (1ULL << j);
			ok(fls64(val) == dumb_fls(val),
			   "%llu -> %u should be %u", (long long)val,
			   fls64(val), dumb_fls(val));
		}
	}
	return exit_status();
}
예제 #12
0
파일: fork.c 프로젝트: 19Dan01/linux
/*
 * set_max_threads
 */
static void set_max_threads(unsigned int max_threads_suggested)
{
	u64 threads;

	/*
	 * The number of threads shall be limited such that the thread
	 * structures may only consume a small part of the available memory.
	 */
	if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64)
		threads = MAX_THREADS;
	else
		threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
				    (u64) THREAD_SIZE * 8UL);

	if (threads > max_threads_suggested)
		threads = max_threads_suggested;

	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
}
예제 #13
0
TEST(BitopsTest, FlsNonzero) {
  int64_t out;
  int64_t one = 1;
  bool res;
  for (int i = 0; i < 63; i++) {
    res = fls64(one << i, out);
    EXPECT_TRUE(res);
    EXPECT_EQ(i, out);
  }
}
예제 #14
0
asmlinkage void plat_irq_dispatch(void)
{
	const unsigned long core_id = cvmx_get_core_num();
	const uint64_t ciu_sum0_address = CVMX_CIU_INTX_SUM0(core_id * 2);
	const uint64_t ciu_en0_address = CVMX_CIU_INTX_EN0(core_id * 2);
	const uint64_t ciu_sum1_address = CVMX_CIU_INT_SUM1;
	const uint64_t ciu_en1_address = CVMX_CIU_INTX_EN1(core_id * 2 + 1);
	unsigned long cop0_cause;
	unsigned long cop0_status;
	uint64_t ciu_en;
	uint64_t ciu_sum;

	while (1) {
		cop0_cause = read_c0_cause();
		cop0_status = read_c0_status();
		cop0_cause &= cop0_status;
		cop0_cause &= ST0_IM;

		if (unlikely(cop0_cause & STATUSF_IP2)) {
			ciu_sum = cvmx_read_csr(ciu_sum0_address);
			ciu_en = cvmx_read_csr(ciu_en0_address);
			ciu_sum &= ciu_en;
			if (likely(ciu_sum))
				do_IRQ(fls64(ciu_sum) + OCTEON_IRQ_WORKQ0 - 1);
			else
				spurious_interrupt();
		} else if (unlikely(cop0_cause & STATUSF_IP3)) {
			ciu_sum = cvmx_read_csr(ciu_sum1_address);
			ciu_en = cvmx_read_csr(ciu_en1_address);
			ciu_sum &= ciu_en;
			if (likely(ciu_sum))
				do_IRQ(fls64(ciu_sum) + OCTEON_IRQ_WDOG0 - 1);
			else
				spurious_interrupt();
		} else if (likely(cop0_cause)) {
			do_IRQ(fls(cop0_cause) - 9 + MIPS_CPU_IRQ_BASE);
		} else {
			break;
		}
	}
}
void kbase_pm_unrequest_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
{
	unsigned long flags;

	kbase_pm_change_state change_gpu_state = 0u;

	KBASE_DEBUG_ASSERT(kbdev != NULL);

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	while (shader_cores) {
		int bitnum = fls64(shader_cores) - 1;
		u64 bit = 1ULL << bitnum;
		int cnt;

		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);

		cnt = --kbdev->shader_needed_cnt[bitnum];

		if (0 == cnt) {
			kbdev->shader_needed_bitmap &= ~bit;

			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
		}

		shader_cores &= ~bit;
	}

	if (tiler_required != MALI_FALSE) {
		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);

		--kbdev->tiler_needed_cnt;

		/* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an
		 * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off
		 * when the last tiler job unrequests cores: kbase_js_choose_affinity() ensures core 0
		 * was originally requested for tiler jobs. Hence when there's only a tiler job in the
		 * system, this will still cause kbase_pm_update_cores_state_nolock() to be called. */
	}

	if (change_gpu_state) {
		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap);

		kbase_pm_update_cores_state_nolock(kbdev);

		/* Trace that any state change effectively completes immediately -
		 * no-one will wait on the state change */
		kbase_pm_trace_check_and_finish_state_change(kbdev);
	}

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
void kbase_pm_release_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
{
	unsigned long flags;
	kbase_pm_change_state change_gpu_state = 0u;

	KBASE_DEBUG_ASSERT(kbdev != NULL);

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	while (shader_cores) {
		int bitnum = fls64(shader_cores) - 1;
		u64 bit = 1ULL << bitnum;
		int cnt;

		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);

		cnt = --kbdev->shader_inuse_cnt[bitnum];

		if (0 == cnt) {
			kbdev->shader_inuse_bitmap &= ~bit;
			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
		}

		shader_cores &= ~bit;
	}

	if (tiler_required != MALI_FALSE) {
		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);

		--kbdev->tiler_inuse_cnt;

		/* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an
		 * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off
		 * when the last tiler job finishes: kbase_js_choose_affinity() ensures core 0 was
		 * originally requested for tiler jobs. Hence when there's only a tiler job in the
		 * system, this will still cause kbase_pm_update_cores_state_nolock() to be called */
	}

	if (change_gpu_state) {
		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap);

		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_START, change_gpu_state);
		kbase_pm_update_cores_state_nolock(kbdev);
		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_END, change_gpu_state);

		/* Trace that any state change completed immediately */
		kbase_pm_trace_check_and_finish_state_change(kbdev);
	}

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
예제 #17
0
static u64 swiotlb_powerpc_get_required(struct device *dev)
{
	u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;

	end = memblock_end_of_DRAM();
	if (max_direct_dma_addr && end > max_direct_dma_addr)
		end = max_direct_dma_addr;
	end += get_dma_offset(dev);

	mask = 1ULL << (fls64(end) - 1);
	mask += mask - 1;

	return mask;
}
예제 #18
0
파일: irq.c 프로젝트: CSCLOG/beaglebone
static inline void dispatch_ip2(void)
{
	unsigned int cpu = smp_processor_id();
	unsigned long long mask;

	/*
	 * Default...we've hit an IP[2] interrupt, which means we've got to
	 * check the 1250 interrupt registers to figure out what to do.  Need
	 * to detect which CPU we're on, now that smp_affinity is supported.
	 */
	mask = __raw_readq(IOADDR(A_IMR_REGISTER(cpu,
				  R_IMR_INTERRUPT_STATUS_BASE)));
	if (mask)
		do_IRQ(fls64(mask) - 1);
}
예제 #19
0
파일: tnum.c 프로젝트: AlexShiLucky/linux
struct tnum tnum_range(u64 min, u64 max)
{
	u64 chi = min ^ max, delta;
	u8 bits = fls64(chi);

	/* special case, needed because 1ULL << 64 is undefined */
	if (bits > 63)
		return tnum_unknown;
	/* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7.
	 * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return
	 *  constant min (since min == max).
	 */
	delta = (1ULL << bits) - 1;
	return TNUM(min & ~delta, delta);
}
예제 #20
0
/*
 * Record the offsets and sizes of various xstates contained
 * in the XSAVE state memory layout.
 *
 * ( Note that certain features might be non-present, for them
 *   we'll have 0 offset and 0 size. )
 */
static void __init setup_xstate_features(void)
{
	u32 eax, ebx, ecx, edx, leaf;

	xfeatures_nr = fls64(xfeatures_mask);

	for (leaf = 2; leaf < xfeatures_nr; leaf++) {
		cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);

		xstate_offsets[leaf] = ebx;
		xstate_sizes[leaf] = eax;

		printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax);
	}
}
예제 #21
0
static inline void dispatch_ip2(void)
{
	unsigned long long mask_h, mask_l;
	unsigned int cpu = smp_processor_id();
	unsigned long base;

	/*
	 * Default...we've hit an IP[2] interrupt, which means we've got to
	 * check the 1480 interrupt registers to figure out what to do.  Need
	 * to detect which CPU we're on, now that smp_affinity is supported.
	 */
	base = A_BCM1480_IMR_MAPPER(cpu);
	mask_h = __raw_readq(
		IOADDR(base + R_BCM1480_IMR_INTERRUPT_STATUS_BASE_H));
	mask_l = __raw_readq(
		IOADDR(base + R_BCM1480_IMR_INTERRUPT_STATUS_BASE_L));

	if (mask_h) {
		if (mask_h ^ 1)
			do_IRQ(fls64(mask_h) - 1);
		else if (mask_l)
			do_IRQ(63 + fls64(mask_l));
	}
}
예제 #22
0
u_int64_t
polymod (u_int64_t nh, u_int64_t nl, u_int64_t d)
{
  //  assert (d);
  int k = fls64 (d) - 1;
  d <<= 63 - k;

  if (nh) {
    if (nh & MSB64)
      nh ^= d;
    for (int i = 62; i >= 0; i--)
      if (nh & ((u_int64_t) 1) << i) {
	nh ^= d >> (63 - i);
	nl ^= d << (i + 1);
      }
  }
예제 #23
0
/*
 * Record the offsets and sizes of different state managed by the xsave
 * memory layout.
 */
static void __init setup_xstate_features(void)
{
	int eax, ebx, ecx, edx, leaf = 0x2;

	xstate_features = fls64(pcntxt_mask);
	xstate_offsets = alloc_bootmem(xstate_features * sizeof(int));
	xstate_sizes = alloc_bootmem(xstate_features * sizeof(int));

	do {
		cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);

		if (eax == 0)
			break;

		xstate_offsets[leaf] = ebx;
		xstate_sizes[leaf] = eax;

		leaf++;
	} while (1);
}
예제 #24
0
파일: util.c 프로젝트: Web-Engine/mdbfs
u64_t parse_size(char *s)
{
	char c;
	char *endptr;
	u64_t mult = 1;
	u64_t ret;

	if (!s) {
		fprintf(stderr, "ERROR: Size value is empty\n");
		exit(1);
	}
	if (s[0] == '-') {
		fprintf(stderr,
			"ERROR: Size value '%s' is less equal than 0\n", s);
		exit(1);
	}
	ret = strtoull(s, &endptr, 10);
	if (endptr == s) {
		fprintf(stderr, "ERROR: Size value '%s' is invalid\n", s);
		exit(1);
	}
	if (endptr[0] && endptr[1]) {
		fprintf(stderr, "ERROR: Illegal suffix contains character '%c' in wrong position\n",
			endptr[1]);
		exit(1);
	}

	if (errno == ERANGE && ret == ULLONG_MAX) {
		fprintf(stderr,
			"ERROR: Size value '%s' is too large for u64\n", s);
		exit(1);
	}
	if (endptr[0]) {
		c = tolower(endptr[0]);
		switch (c) {
		case 'e':
			mult *= 1024;
		case 'p':
			mult *= 1024;
		case 't':
			mult *= 1024;
		case 'g':
			mult *= 1024;
		case 'm':
			mult *= 1024;
		case 'k':
			mult *= 1024;
		case 'b':
			break;
		default:
			fprintf(stderr, "ERROR: Unknown size descriptor '%c'\n",
				c);
			exit(1);
		}
	}

	if (fls64(ret) + fls64(mult) - 1 > 64) {
		fprintf(stderr,
			"ERROR: Size value '%s' is too large for u64\n", s);
		exit(1);
	}
	ret *= mult;
	return ret;
}
예제 #25
0
파일: xstate.c 프로젝트: 020gzh/linux
/*
 * Given an xstate feature mask, calculate where in the xsave
 * buffer the state is.  Callers should ensure that the buffer
 * is valid.
 *
 * Note: does not work for compacted buffers.
 */
void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
{
	int feature_nr = fls64(xstate_feature_mask) - 1;

	return (void *)xsave + xstate_comp_offsets[feature_nr];
}
예제 #26
0
TEST(BitopsTest, FlsZero) {
  int64_t zero = 0;
  int64_t out;
  EXPECT_FALSE(fls64(zero, out));
}
kbase_pm_cores_ready kbase_pm_register_inuse_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
{
	unsigned long flags;
	u64 prev_shader_needed;	/* Just for tracing */
	u64 prev_shader_inuse;	/* Just for tracing */

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	prev_shader_needed = kbdev->shader_needed_bitmap;
	prev_shader_inuse = kbdev->shader_inuse_bitmap;

	/* If desired_shader_state does not contain the requested cores, then power
	 * management is not attempting to powering those cores (most likely
	 * due to core availability policy) and a new job affinity must be
	 * chosen */
	if ((kbdev->pm.desired_shader_state & shader_cores) != shader_cores) {
		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);

		return KBASE_NEW_AFFINITY;
	}

	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
	    (tiler_required != MALI_FALSE && !kbdev->tiler_available_bitmap)) {
		/* Trace ongoing core transition */
		kbase_timeline_pm_l2_transition_start(kbdev);
		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
		return KBASE_CORES_NOT_READY;
	}

	/* If we started to trace a state change, then trace it has being finished
	 * by now, at the very latest */
	kbase_pm_trace_check_and_finish_state_change(kbdev);
	/* Trace core transition done */
	kbase_timeline_pm_l2_transition_done(kbdev);

	while (shader_cores) {
		int bitnum = fls64(shader_cores) - 1;
		u64 bit = 1ULL << bitnum;
		int cnt;

		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);

		cnt = --kbdev->shader_needed_cnt[bitnum];

		if (0 == cnt)
			kbdev->shader_needed_bitmap &= ~bit;

		/* shader_inuse_cnt should not overflow because there can only be a
		 * very limited number of jobs on the h/w at one time */

		kbdev->shader_inuse_cnt[bitnum]++;
		kbdev->shader_inuse_bitmap |= bit;

		shader_cores &= ~bit;
	}

	if (tiler_required != MALI_FALSE) {
		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);

		--kbdev->tiler_needed_cnt;

		kbdev->tiler_inuse_cnt++;

		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
	}

	if (prev_shader_needed != kbdev->shader_needed_bitmap)
		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap);

	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap);

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);

	return KBASE_CORES_READY;
}
예제 #28
0
static int cdns_pcie_ep_set_bar(struct pci_epc *epc, u8 fn,
				struct pci_epf_bar *epf_bar)
{
	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
	struct cdns_pcie *pcie = &ep->pcie;
	dma_addr_t bar_phys = epf_bar->phys_addr;
	enum pci_barno bar = epf_bar->barno;
	int flags = epf_bar->flags;
	u32 addr0, addr1, reg, cfg, b, aperture, ctrl;
	u64 sz;

	/* BAR size is 2^(aperture + 7) */
	sz = max_t(size_t, epf_bar->size, CDNS_PCIE_EP_MIN_APERTURE);
	/*
	 * roundup_pow_of_two() returns an unsigned long, which is not suited
	 * for 64bit values.
	 */
	sz = 1ULL << fls64(sz - 1);
	aperture = ilog2(sz) - 7; /* 128B -> 0, 256B -> 1, 512B -> 2, ... */

	if ((flags & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
		ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_IO_32BITS;
	} else {
		bool is_prefetch = !!(flags & PCI_BASE_ADDRESS_MEM_PREFETCH);
		bool is_64bits = sz > SZ_2G;

		if (is_64bits && (bar & 1))
			return -EINVAL;

		if (is_64bits && !(flags & PCI_BASE_ADDRESS_MEM_TYPE_64))
			epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64;

		if (is_64bits && is_prefetch)
			ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_64BITS;
		else if (is_prefetch)
			ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_32BITS;
		else if (is_64bits)
			ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_64BITS;
		else
			ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_32BITS;
	}

	addr0 = lower_32_bits(bar_phys);
	addr1 = upper_32_bits(bar_phys);
	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar),
			 addr0);
	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar),
			 addr1);

	if (bar < BAR_4) {
		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG0(fn);
		b = bar;
	} else {
		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG1(fn);
		b = bar - BAR_4;
	}

	cfg = cdns_pcie_readl(pcie, reg);
	cfg &= ~(CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
		 CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
	cfg |= (CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE(b, aperture) |
		CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl));
	cdns_pcie_writel(pcie, reg, cfg);

	return 0;
}
struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
{
	struct kbase_gator_hwcnt_handles *hand;
	struct kbase_uk_hwcnt_setup setup;
	int err;
	uint32_t dump_size = 0, i = 0;
	struct kbase_va_region *reg;
	u64 flags;
	u64 nr_pages;
	u16 va_alignment = 0;

	if (!in_out_info)
		return NULL;

	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
	if (!hand)
		return NULL;

	/* Get the first device */
	hand->kbdev = kbase_find_device(-1);
	if (!hand->kbdev)
		goto free_hand;

	/* Create a kbase_context */
	hand->kctx = kbase_create_context(hand->kbdev, true);
	if (!hand->kctx)
		goto release_device;

	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;

	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
		uint32_t cg, j;
		uint64_t core_mask;

		/* There are 8 hardware counters blocks per core group */
		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
			MALI_MAX_NUM_BLOCKS_PER_GROUP *
			in_out_info->nr_core_groups, GFP_KERNEL);

		if (!in_out_info->hwc_layout)
			goto destroy_context;

		dump_size = in_out_info->nr_core_groups *
			MALI_MAX_NUM_BLOCKS_PER_GROUP *
			MALI_COUNTERS_PER_BLOCK *
			MALI_BYTES_PER_COUNTER;

		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;

			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
				if (core_mask & (1u << j))
					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
				else
					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
			}

			in_out_info->hwc_layout[i++] = TILER_BLOCK;
			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;

			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;

			if (0 == cg)
				in_out_info->hwc_layout[i++] = JM_BLOCK;
			else
				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
		}
	/* If we are using any other device */
	} else {
		uint32_t nr_l2, nr_sc_bits, j;
		uint64_t core_mask;

		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;

		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;

		nr_sc_bits = fls64(core_mask);

		/* The job manager and tiler sets of counters
		 * are always present */
		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);

		if (!in_out_info->hwc_layout)
			goto destroy_context;

		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;

		in_out_info->hwc_layout[i++] = JM_BLOCK;
		in_out_info->hwc_layout[i++] = TILER_BLOCK;

		for (j = 0; j < nr_l2; j++)
			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;

		while (core_mask != 0ull) {
			if ((core_mask & 1ull) != 0ull)
				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
			else
				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
			core_mask >>= 1;
		}
	}

	in_out_info->nr_hwc_blocks = i;

	in_out_info->size = dump_size;

	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR;
	nr_pages = PFN_UP(dump_size);
	reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0,
			&flags, &hand->hwcnt_gpu_va, &va_alignment);
	if (!reg)
		goto free_layout;

	hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va,
			dump_size, &hand->hwcnt_map);

	if (!hand->hwcnt_cpu_va)
		goto free_buffer;

	in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va;
	memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE);

	/*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/
	setup.dump_buffer = hand->hwcnt_gpu_va;
	setup.jm_bm = in_out_info->bitmask[0];
	setup.tiler_bm = in_out_info->bitmask[1];
	setup.shader_bm = in_out_info->bitmask[2];
	setup.mmu_l2_bm = in_out_info->bitmask[3];

	err = kbase_instr_hwcnt_enable(hand->kctx, &setup);
	if (err)
		goto free_unmap;

	kbase_instr_hwcnt_clear(hand->kctx);

	return hand;

free_unmap:
	kbase_vunmap(hand->kctx, &hand->hwcnt_map);

free_buffer:
	kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va);

free_layout:
	kfree(in_out_info->hwc_layout);

destroy_context:
	kbase_destroy_context(hand->kctx);

release_device:
	kbase_release_device(hand->kbdev);

free_hand:
	kfree(hand);

	return NULL;
}
예제 #30
0
int __weak __clzdi2(long val)
{
 return 64 - fls64((u64)val);
}