asmlinkage void plat_irq_dispatch(struct pt_regs *regs) { unsigned int pending; #ifdef CONFIG_SIBYTE_BCM1480_PROF /* Set compare to count to silence count/compare timer interrupts */ write_c0_compare(read_c0_count()); #endif pending = read_c0_cause() & read_c0_status(); #ifdef CONFIG_SIBYTE_BCM1480_PROF if (pending & CAUSEF_IP7) /* Cpu performance counter interrupt */ sbprof_cpu_intr(exception_epc(regs)); else #endif if (pending & CAUSEF_IP4) bcm1480_timer_interrupt(regs); #ifdef CONFIG_SMP else if (pending & CAUSEF_IP3) bcm1480_mailbox_interrupt(regs); #endif #ifdef CONFIG_KGDB else if (pending & CAUSEF_IP6) bcm1480_kgdb_interrupt(regs); /* KGDB (uart 1) */ #endif else if (pending & CAUSEF_IP2) { unsigned long long mask_h, mask_l; unsigned long base; /* * Default...we've hit an IP[2] interrupt, which means we've * got to check the 1480 interrupt registers to figure out what * to do. Need to detect which CPU we're on, now that * smp_affinity is supported. */ base = A_BCM1480_IMR_MAPPER(smp_processor_id()); mask_h = __raw_readq( IOADDR(base + R_BCM1480_IMR_INTERRUPT_STATUS_BASE_H)); mask_l = __raw_readq( IOADDR(base + R_BCM1480_IMR_INTERRUPT_STATUS_BASE_L)); if (mask_h) { if (mask_h ^ 1) do_IRQ(fls64(mask_h) - 1, regs); else do_IRQ(63 + fls64(mask_l), regs); } } }
void kbase_js_affinity_release_slot_cores(kbase_device *kbdev, int js, u64 affinity) { kbasep_js_device_data *js_devdata; u64 cores; KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); js_devdata = &kbdev->js_data; cores = affinity; while (cores) { int bitnum = fls64(cores) - 1; u64 bit = 1ULL << bitnum; s8 cnt; KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0); cnt = --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); if (0 == cnt) js_devdata->runpool_irq.slot_affinities[js] &= ~bit; cores &= ~bit; } }
/* * Given the xsave area and a state inside, this function returns the * address of the state. * * This is the API that is called to get xstate address in either * standard format or compacted format of xsave area. * * Note that if there is no data for the field in the xsave buffer * this will return NULL. * * Inputs: * xstate: the thread's storage area for all FPU data * xstate_feature: state which is defined in xsave.h (e.g. * XSTATE_FP, XSTATE_SSE, etc...) * Output: * address of the state in the xsave area, or NULL if the * field is not present in the xsave buffer. */ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) { int feature_nr = fls64(xstate_feature) - 1; /* * Do we even *have* xsave state? */ if (!boot_cpu_has(X86_FEATURE_XSAVE)) return NULL; xsave = ¤t->thread.fpu.state.xsave; /* * We should not ever be requesting features that we * have not enabled. Remember that pcntxt_mask is * what we write to the XCR0 register. */ WARN_ONCE(!(xfeatures_mask & xstate_feature), "get of unsupported state"); /* * This assumes the last 'xsave*' instruction to * have requested that 'xstate_feature' be saved. * If it did not, we might be seeing and old value * of the field in the buffer. * * This can happen because the last 'xsave' did not * request that this feature be saved (unlikely) * or because the "init optimization" caused it * to not be saved. */ if (!(xsave->header.xfeatures & xstate_feature)) return NULL; return (void *)xsave + xstate_comp_offsets[feature_nr]; }
/* * Return whether the system supports a given xfeature. * * Also return the name of the (most advanced) feature that the caller requested: */ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) { u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask; if (unlikely(feature_name)) { long xfeature_idx, max_idx; u64 xfeatures_print; /* * So we use FLS here to be able to print the most advanced * feature that was requested but is missing. So if a driver * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the * missing AVX feature - this is the most informative message * to users: */ if (xfeatures_missing) xfeatures_print = xfeatures_missing; else xfeatures_print = xfeatures_needed; xfeature_idx = fls64(xfeatures_print)-1; max_idx = ARRAY_SIZE(xfeature_names)-1; xfeature_idx = min(xfeature_idx, max_idx); *feature_name = xfeature_names[xfeature_idx]; } if (xfeatures_missing) return 0; return 1; }
void opal_handle_events(void) { __be64 events = 0; u64 e; e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask; again: while (e) { int virq, hwirq; hwirq = fls64(e) - 1; e &= ~BIT_ULL(hwirq); local_irq_disable(); virq = irq_find_mapping(opal_event_irqchip.domain, hwirq); if (virq) { irq_enter(); generic_handle_irq(virq); irq_exit(); } local_irq_enable(); cond_resched(); } last_outstanding_events = 0; if (opal_poll_events(&events) != OPAL_SUCCESS) return; e = be64_to_cpu(events) & opal_event_irqchip.mask; if (e) goto again; }
void kbase_js_affinity_retain_slot_cores(kbase_device *kbdev, int js, u64 affinity) { kbasep_js_device_data *js_devdata; u64 cores; KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); js_devdata = &kbdev->js_data; KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) == MALI_FALSE); cores = affinity; while (cores) { int bitnum = fls64(cores) - 1; u64 bit = 1ULL << bitnum; s8 cnt; KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] < BASE_JM_SUBMIT_SLOTS); cnt = ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); if (cnt == 1) js_devdata->runpool_irq.slot_affinities[js] |= bit; cores &= ~bit; } }
void kbase_pm_request_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) { unsigned long flags; u64 cores; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); cores = shader_cores; while (cores) { int bitnum = fls64(cores) - 1; u64 bit = 1ULL << bitnum; /* It should be almost impossible for this to overflow. It would require 2^32 atoms * to request a particular core, which would require 2^24 contexts to submit. This * would require an amount of memory that is impossible on a 32-bit system and * extremely unlikely on a 64-bit system. */ int cnt = ++kbdev->shader_needed_cnt[bitnum]; if (1 == cnt) { kbdev->shader_needed_bitmap |= bit; change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; } cores &= ~bit; } if (tiler_required != MALI_FALSE) { ++kbdev->tiler_needed_cnt; KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0); /* For tiler jobs, we must make sure that core 0 is not turned off if it's already on. * However, it's safe for core 0 to be left off and turned on later whilst a tiler job * is running. Hence, we don't need to update the cores state immediately. Also, * attempts to turn off cores will always check the tiler_needed/inuse state first anyway. * * Finally, kbase_js_choose_affinity() ensures core 0 is always requested for tiler jobs * anyway. Hence when there's only a tiler job in the system, this will still cause * kbase_pm_update_cores_state_nolock() to be called. * * Note that we still need to keep track of tiler_needed/inuse_cnt, to ensure that * kbase_pm_update_cores_state_nolock() can override the core availability policy and * force core 0 to be powered when a tiler job is in the system. */ } if (change_gpu_state) { KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap); kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_START, change_gpu_state); kbase_pm_update_cores_state_nolock(kbdev); kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_END, change_gpu_state); } spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); }
asmlinkage void plat_irq_dispatch(void) { unsigned int pending; #ifdef CONFIG_SIBYTE_SB1250_PROF /* Set compare to count to silence count/compare timer interrupts */ write_c0_compare(read_c0_count()); #endif /* * What a pain. We have to be really careful saving the upper 32 bits * of any * register across function calls if we don't want them * trashed--since were running in -o32, the calling routing never saves * the full 64 bits of a register across a function call. Being the * interrupt handler, we're guaranteed that interrupts are disabled * during this code so we don't have to worry about random interrupts * blasting the high 32 bits. */ pending = read_c0_cause() & read_c0_status() & ST0_IM; #ifdef CONFIG_SIBYTE_SB1250_PROF if (pending & CAUSEF_IP7) /* Cpu performance counter interrupt */ sbprof_cpu_intr(); else #endif if (pending & CAUSEF_IP4) sb1250_timer_interrupt(); #ifdef CONFIG_SMP else if (pending & CAUSEF_IP3) sb1250_mailbox_interrupt(); #endif #ifdef CONFIG_KGDB else if (pending & CAUSEF_IP6) /* KGDB (uart 1) */ sb1250_kgdb_interrupt(); #endif else if (pending & CAUSEF_IP2) { unsigned long long mask; /* * Default...we've hit an IP[2] interrupt, which means we've * got to check the 1250 interrupt registers to figure out what * to do. Need to detect which CPU we're on, now that * smp_affinity is supported. */ mask = __raw_readq(IOADDR(A_IMR_REGISTER(smp_processor_id(), R_IMR_INTERRUPT_STATUS_BASE))); if (mask) do_IRQ(fls64(mask) - 1); else spurious_interrupt(); } else spurious_interrupt(); }
u64 dma_direct_get_required_mask(struct device *dev) { u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma) max_dma = dev->bus_dma_mask; return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; }
static void recover_bitmaps(struct md_thread *thread) { struct mddev *mddev = thread->mddev; struct md_cluster_info *cinfo = mddev->cluster_info; struct dlm_lock_resource *bm_lockres; char str[64]; int slot, ret; struct suspend_info *s, *tmp; sector_t lo, hi; while (cinfo->recovery_map) { slot = fls64((u64)cinfo->recovery_map) - 1; /* Clear suspend_area associated with the bitmap */ spin_lock_irq(&cinfo->suspend_lock); list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) if (slot == s->slot) { list_del(&s->list); kfree(s); } spin_unlock_irq(&cinfo->suspend_lock); snprintf(str, 64, "bitmap%04d", slot); bm_lockres = lockres_init(mddev, str, NULL, 1); if (!bm_lockres) { pr_err("md-cluster: Cannot initialize bitmaps\n"); goto clear_bit; } ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); if (ret) { pr_err("md-cluster: Could not DLM lock %s: %d\n", str, ret); goto clear_bit; } ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true); if (ret) { pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); goto dlm_unlock; } if (hi > 0) { if (lo < mddev->recovery_cp) mddev->recovery_cp = lo; /* wake up thread to continue resync in case resync * is not finished */ if (mddev->recovery_cp != MaxSector) { set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); } } dlm_unlock: dlm_unlock_sync(bm_lockres); clear_bit: lockres_free(bm_lockres); clear_bit(slot, &cinfo->recovery_map); } }
int main(int argc, char *argv[]) { unsigned int i, j; plan_tests(64 * 64 + 2); ok1(fls64(0) == 0); ok1(dumb_fls(0) == 0); for (i = 0; i < 64; i++) { for (j = 0; j < 64; j++) { uint64_t val = (1ULL << i) | (1ULL << j); ok(fls64(val) == dumb_fls(val), "%llu -> %u should be %u", (long long)val, fls64(val), dumb_fls(val)); } } return exit_status(); }
/* * set_max_threads */ static void set_max_threads(unsigned int max_threads_suggested) { u64 threads; /* * The number of threads shall be limited such that the thread * structures may only consume a small part of the available memory. */ if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, (u64) THREAD_SIZE * 8UL); if (threads > max_threads_suggested) threads = max_threads_suggested; max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); }
TEST(BitopsTest, FlsNonzero) { int64_t out; int64_t one = 1; bool res; for (int i = 0; i < 63; i++) { res = fls64(one << i, out); EXPECT_TRUE(res); EXPECT_EQ(i, out); } }
asmlinkage void plat_irq_dispatch(void) { const unsigned long core_id = cvmx_get_core_num(); const uint64_t ciu_sum0_address = CVMX_CIU_INTX_SUM0(core_id * 2); const uint64_t ciu_en0_address = CVMX_CIU_INTX_EN0(core_id * 2); const uint64_t ciu_sum1_address = CVMX_CIU_INT_SUM1; const uint64_t ciu_en1_address = CVMX_CIU_INTX_EN1(core_id * 2 + 1); unsigned long cop0_cause; unsigned long cop0_status; uint64_t ciu_en; uint64_t ciu_sum; while (1) { cop0_cause = read_c0_cause(); cop0_status = read_c0_status(); cop0_cause &= cop0_status; cop0_cause &= ST0_IM; if (unlikely(cop0_cause & STATUSF_IP2)) { ciu_sum = cvmx_read_csr(ciu_sum0_address); ciu_en = cvmx_read_csr(ciu_en0_address); ciu_sum &= ciu_en; if (likely(ciu_sum)) do_IRQ(fls64(ciu_sum) + OCTEON_IRQ_WORKQ0 - 1); else spurious_interrupt(); } else if (unlikely(cop0_cause & STATUSF_IP3)) { ciu_sum = cvmx_read_csr(ciu_sum1_address); ciu_en = cvmx_read_csr(ciu_en1_address); ciu_sum &= ciu_en; if (likely(ciu_sum)) do_IRQ(fls64(ciu_sum) + OCTEON_IRQ_WDOG0 - 1); else spurious_interrupt(); } else if (likely(cop0_cause)) { do_IRQ(fls(cop0_cause) - 9 + MIPS_CPU_IRQ_BASE); } else { break; } } }
void kbase_pm_unrequest_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) { unsigned long flags; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; u64 bit = 1ULL << bitnum; int cnt; KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); cnt = --kbdev->shader_needed_cnt[bitnum]; if (0 == cnt) { kbdev->shader_needed_bitmap &= ~bit; change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; } shader_cores &= ~bit; } if (tiler_required != MALI_FALSE) { KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); --kbdev->tiler_needed_cnt; /* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off * when the last tiler job unrequests cores: kbase_js_choose_affinity() ensures core 0 * was originally requested for tiler jobs. Hence when there's only a tiler job in the * system, this will still cause kbase_pm_update_cores_state_nolock() to be called. */ } if (change_gpu_state) { KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap); kbase_pm_update_cores_state_nolock(kbdev); /* Trace that any state change effectively completes immediately - * no-one will wait on the state change */ kbase_pm_trace_check_and_finish_state_change(kbdev); } spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); }
void kbase_pm_release_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) { unsigned long flags; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; u64 bit = 1ULL << bitnum; int cnt; KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0); cnt = --kbdev->shader_inuse_cnt[bitnum]; if (0 == cnt) { kbdev->shader_inuse_bitmap &= ~bit; change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; } shader_cores &= ~bit; } if (tiler_required != MALI_FALSE) { KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0); --kbdev->tiler_inuse_cnt; /* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off * when the last tiler job finishes: kbase_js_choose_affinity() ensures core 0 was * originally requested for tiler jobs. Hence when there's only a tiler job in the * system, this will still cause kbase_pm_update_cores_state_nolock() to be called */ } if (change_gpu_state) { KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap); kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_START, change_gpu_state); kbase_pm_update_cores_state_nolock(kbdev); kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_END, change_gpu_state); /* Trace that any state change completed immediately */ kbase_pm_trace_check_and_finish_state_change(kbdev); } spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); }
static u64 swiotlb_powerpc_get_required(struct device *dev) { u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr; end = memblock_end_of_DRAM(); if (max_direct_dma_addr && end > max_direct_dma_addr) end = max_direct_dma_addr; end += get_dma_offset(dev); mask = 1ULL << (fls64(end) - 1); mask += mask - 1; return mask; }
static inline void dispatch_ip2(void) { unsigned int cpu = smp_processor_id(); unsigned long long mask; /* * Default...we've hit an IP[2] interrupt, which means we've got to * check the 1250 interrupt registers to figure out what to do. Need * to detect which CPU we're on, now that smp_affinity is supported. */ mask = __raw_readq(IOADDR(A_IMR_REGISTER(cpu, R_IMR_INTERRUPT_STATUS_BASE))); if (mask) do_IRQ(fls64(mask) - 1); }
struct tnum tnum_range(u64 min, u64 max) { u64 chi = min ^ max, delta; u8 bits = fls64(chi); /* special case, needed because 1ULL << 64 is undefined */ if (bits > 63) return tnum_unknown; /* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7. * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return * constant min (since min == max). */ delta = (1ULL << bits) - 1; return TNUM(min & ~delta, delta); }
/* * Record the offsets and sizes of various xstates contained * in the XSAVE state memory layout. * * ( Note that certain features might be non-present, for them * we'll have 0 offset and 0 size. ) */ static void __init setup_xstate_features(void) { u32 eax, ebx, ecx, edx, leaf; xfeatures_nr = fls64(xfeatures_mask); for (leaf = 2; leaf < xfeatures_nr; leaf++) { cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); xstate_offsets[leaf] = ebx; xstate_sizes[leaf] = eax; printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax); } }
static inline void dispatch_ip2(void) { unsigned long long mask_h, mask_l; unsigned int cpu = smp_processor_id(); unsigned long base; /* * Default...we've hit an IP[2] interrupt, which means we've got to * check the 1480 interrupt registers to figure out what to do. Need * to detect which CPU we're on, now that smp_affinity is supported. */ base = A_BCM1480_IMR_MAPPER(cpu); mask_h = __raw_readq( IOADDR(base + R_BCM1480_IMR_INTERRUPT_STATUS_BASE_H)); mask_l = __raw_readq( IOADDR(base + R_BCM1480_IMR_INTERRUPT_STATUS_BASE_L)); if (mask_h) { if (mask_h ^ 1) do_IRQ(fls64(mask_h) - 1); else if (mask_l) do_IRQ(63 + fls64(mask_l)); } }
u_int64_t polymod (u_int64_t nh, u_int64_t nl, u_int64_t d) { // assert (d); int k = fls64 (d) - 1; d <<= 63 - k; if (nh) { if (nh & MSB64) nh ^= d; for (int i = 62; i >= 0; i--) if (nh & ((u_int64_t) 1) << i) { nh ^= d >> (63 - i); nl ^= d << (i + 1); } }
/* * Record the offsets and sizes of different state managed by the xsave * memory layout. */ static void __init setup_xstate_features(void) { int eax, ebx, ecx, edx, leaf = 0x2; xstate_features = fls64(pcntxt_mask); xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); do { cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); if (eax == 0) break; xstate_offsets[leaf] = ebx; xstate_sizes[leaf] = eax; leaf++; } while (1); }
u64_t parse_size(char *s) { char c; char *endptr; u64_t mult = 1; u64_t ret; if (!s) { fprintf(stderr, "ERROR: Size value is empty\n"); exit(1); } if (s[0] == '-') { fprintf(stderr, "ERROR: Size value '%s' is less equal than 0\n", s); exit(1); } ret = strtoull(s, &endptr, 10); if (endptr == s) { fprintf(stderr, "ERROR: Size value '%s' is invalid\n", s); exit(1); } if (endptr[0] && endptr[1]) { fprintf(stderr, "ERROR: Illegal suffix contains character '%c' in wrong position\n", endptr[1]); exit(1); } if (errno == ERANGE && ret == ULLONG_MAX) { fprintf(stderr, "ERROR: Size value '%s' is too large for u64\n", s); exit(1); } if (endptr[0]) { c = tolower(endptr[0]); switch (c) { case 'e': mult *= 1024; case 'p': mult *= 1024; case 't': mult *= 1024; case 'g': mult *= 1024; case 'm': mult *= 1024; case 'k': mult *= 1024; case 'b': break; default: fprintf(stderr, "ERROR: Unknown size descriptor '%c'\n", c); exit(1); } } if (fls64(ret) + fls64(mult) - 1 > 64) { fprintf(stderr, "ERROR: Size value '%s' is too large for u64\n", s); exit(1); } ret *= mult; return ret; }
/* * Given an xstate feature mask, calculate where in the xsave * buffer the state is. Callers should ensure that the buffer * is valid. * * Note: does not work for compacted buffers. */ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) { int feature_nr = fls64(xstate_feature_mask) - 1; return (void *)xsave + xstate_comp_offsets[feature_nr]; }
TEST(BitopsTest, FlsZero) { int64_t zero = 0; int64_t out; EXPECT_FALSE(fls64(zero, out)); }
kbase_pm_cores_ready kbase_pm_register_inuse_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) { unsigned long flags; u64 prev_shader_needed; /* Just for tracing */ u64 prev_shader_inuse; /* Just for tracing */ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); prev_shader_needed = kbdev->shader_needed_bitmap; prev_shader_inuse = kbdev->shader_inuse_bitmap; /* If desired_shader_state does not contain the requested cores, then power * management is not attempting to powering those cores (most likely * due to core availability policy) and a new job affinity must be * chosen */ if ((kbdev->pm.desired_shader_state & shader_cores) != shader_cores) { spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); return KBASE_NEW_AFFINITY; } if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || (tiler_required != MALI_FALSE && !kbdev->tiler_available_bitmap)) { /* Trace ongoing core transition */ kbase_timeline_pm_l2_transition_start(kbdev); spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); return KBASE_CORES_NOT_READY; } /* If we started to trace a state change, then trace it has being finished * by now, at the very latest */ kbase_pm_trace_check_and_finish_state_change(kbdev); /* Trace core transition done */ kbase_timeline_pm_l2_transition_done(kbdev); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; u64 bit = 1ULL << bitnum; int cnt; KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); cnt = --kbdev->shader_needed_cnt[bitnum]; if (0 == cnt) kbdev->shader_needed_bitmap &= ~bit; /* shader_inuse_cnt should not overflow because there can only be a * very limited number of jobs on the h/w at one time */ kbdev->shader_inuse_cnt[bitnum]++; kbdev->shader_inuse_bitmap |= bit; shader_cores &= ~bit; } if (tiler_required != MALI_FALSE) { KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); --kbdev->tiler_needed_cnt; kbdev->tiler_inuse_cnt++; KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0); } if (prev_shader_needed != kbdev->shader_needed_bitmap) KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap); if (prev_shader_inuse != kbdev->shader_inuse_bitmap) KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap); spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); return KBASE_CORES_READY; }
static int cdns_pcie_ep_set_bar(struct pci_epc *epc, u8 fn, struct pci_epf_bar *epf_bar) { struct cdns_pcie_ep *ep = epc_get_drvdata(epc); struct cdns_pcie *pcie = &ep->pcie; dma_addr_t bar_phys = epf_bar->phys_addr; enum pci_barno bar = epf_bar->barno; int flags = epf_bar->flags; u32 addr0, addr1, reg, cfg, b, aperture, ctrl; u64 sz; /* BAR size is 2^(aperture + 7) */ sz = max_t(size_t, epf_bar->size, CDNS_PCIE_EP_MIN_APERTURE); /* * roundup_pow_of_two() returns an unsigned long, which is not suited * for 64bit values. */ sz = 1ULL << fls64(sz - 1); aperture = ilog2(sz) - 7; /* 128B -> 0, 256B -> 1, 512B -> 2, ... */ if ((flags & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) { ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_IO_32BITS; } else { bool is_prefetch = !!(flags & PCI_BASE_ADDRESS_MEM_PREFETCH); bool is_64bits = sz > SZ_2G; if (is_64bits && (bar & 1)) return -EINVAL; if (is_64bits && !(flags & PCI_BASE_ADDRESS_MEM_TYPE_64)) epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64; if (is_64bits && is_prefetch) ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_64BITS; else if (is_prefetch) ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_32BITS; else if (is_64bits) ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_64BITS; else ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_32BITS; } addr0 = lower_32_bits(bar_phys); addr1 = upper_32_bits(bar_phys); cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar), addr0); cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar), addr1); if (bar < BAR_4) { reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG0(fn); b = bar; } else { reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG1(fn); b = bar - BAR_4; } cfg = cdns_pcie_readl(pcie, reg); cfg &= ~(CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) | CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b)); cfg |= (CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE(b, aperture) | CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl)); cdns_pcie_writel(pcie, reg, cfg); return 0; }
struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { struct kbase_gator_hwcnt_handles *hand; struct kbase_uk_hwcnt_setup setup; int err; uint32_t dump_size = 0, i = 0; struct kbase_va_region *reg; u64 flags; u64 nr_pages; u16 va_alignment = 0; if (!in_out_info) return NULL; hand = kzalloc(sizeof(*hand), GFP_KERNEL); if (!hand) return NULL; /* Get the first device */ hand->kbdev = kbase_find_device(-1); if (!hand->kbdev) goto free_hand; /* Create a kbase_context */ hand->kctx = kbase_create_context(hand->kbdev, true); if (!hand->kctx) goto release_device; in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id; /* If we are using a v4 device (Mali-T6xx or Mali-T72x) */ if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) { uint32_t cg, j; uint64_t core_mask; /* There are 8 hardware counters blocks per core group */ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * MALI_MAX_NUM_BLOCKS_PER_GROUP * in_out_info->nr_core_groups, GFP_KERNEL); if (!in_out_info->hwc_layout) goto destroy_context; dump_size = in_out_info->nr_core_groups * MALI_MAX_NUM_BLOCKS_PER_GROUP * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; for (cg = 0; cg < in_out_info->nr_core_groups; cg++) { core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask; for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) { if (core_mask & (1u << j)) in_out_info->hwc_layout[i++] = SHADER_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; } in_out_info->hwc_layout[i++] = TILER_BLOCK; in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; in_out_info->hwc_layout[i++] = RESERVED_BLOCK; if (0 == cg) in_out_info->hwc_layout[i++] = JM_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; } /* If we are using any other device */ } else { uint32_t nr_l2, nr_sc_bits, j; uint64_t core_mask; nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; nr_sc_bits = fls64(core_mask); /* The job manager and tiler sets of counters * are always present */ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) goto destroy_context; dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; in_out_info->hwc_layout[i++] = JM_BLOCK; in_out_info->hwc_layout[i++] = TILER_BLOCK; for (j = 0; j < nr_l2; j++) in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; while (core_mask != 0ull) { if ((core_mask & 1ull) != 0ull) in_out_info->hwc_layout[i++] = SHADER_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; core_mask >>= 1; } } in_out_info->nr_hwc_blocks = i; in_out_info->size = dump_size; flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR; nr_pages = PFN_UP(dump_size); reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0, &flags, &hand->hwcnt_gpu_va, &va_alignment); if (!reg) goto free_layout; hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va, dump_size, &hand->hwcnt_map); if (!hand->hwcnt_cpu_va) goto free_buffer; in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va; memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE); /*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/ setup.dump_buffer = hand->hwcnt_gpu_va; setup.jm_bm = in_out_info->bitmask[0]; setup.tiler_bm = in_out_info->bitmask[1]; setup.shader_bm = in_out_info->bitmask[2]; setup.mmu_l2_bm = in_out_info->bitmask[3]; err = kbase_instr_hwcnt_enable(hand->kctx, &setup); if (err) goto free_unmap; kbase_instr_hwcnt_clear(hand->kctx); return hand; free_unmap: kbase_vunmap(hand->kctx, &hand->hwcnt_map); free_buffer: kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va); free_layout: kfree(in_out_info->hwc_layout); destroy_context: kbase_destroy_context(hand->kctx); release_device: kbase_release_device(hand->kbdev); free_hand: kfree(hand); return NULL; }
int __weak __clzdi2(long val) { return 64 - fls64((u64)val); }