uint32_t rte_member_lookup_multi_vbf(const struct rte_member_setsum *ss, const void *key, uint32_t match_per_key, member_set_t *set_id) { uint32_t num_matches = 0; uint32_t j; uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed); uint32_t mask = ~0; uint32_t bit_loc; for (j = 0; j < ss->num_hashes; j++) { bit_loc = (h1 + j * h2) & ss->bit_mask; mask &= test_bit(bit_loc, ss); } while (mask) { uint32_t loc = __builtin_ctzl(mask); set_id[num_matches] = loc + 1; num_matches++; if (num_matches >= match_per_key) return num_matches; mask &= ~(1UL << loc); } return num_matches; }
uint32_t rte_member_lookup_bulk_vbf(const struct rte_member_setsum *ss, const void **keys, uint32_t num_keys, member_set_t *set_ids) { uint32_t i, k; uint32_t num_matches = 0; uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX]; uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX]; uint32_t bit_loc; for (i = 0; i < num_keys; i++) h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len, ss->prim_hash_seed); for (i = 0; i < num_keys; i++) h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t), ss->sec_hash_seed); for (i = 0; i < num_keys; i++) { mask[i] = ~0; for (k = 0; k < ss->num_hashes; k++) { bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask; mask[i] &= test_bit(bit_loc, ss); } } for (i = 0; i < num_keys; i++) { if (mask[i]) { set_ids[i] = __builtin_ctzl(mask[i]) + 1; num_matches++; } else set_ids[i] = RTE_MEMBER_NO_MATCH; } return num_matches; }
StgWord hs_ctz64(StgWord64 x) { #if defined(__GNUC__) && (defined(i386_HOST_ARCH) || defined(powerpc_HOST_ARCH)) /* On Linux/i386, the 64bit `__builtin_ctzll()` instrinsic doesn't get inlined by GCC but rather a short `__ctzdi2` runtime function is inserted when needed into compiled object files. This workaround forces GCC on 32bit x86 to express `hs_ctz64` in terms of the 32bit `__builtin_ctz()` (this is no loss, as there's no 64bit BSF instruction on i686 anyway) and thus avoid the problematic out-of-line runtime function. */ if (!x) return 64; return ((uint32_t)x ? __builtin_ctz((uint32_t)x) : (__builtin_ctz(x >> 32) + 32)); #elif SIZEOF_UNSIGNED_LONG == 8 return x ? __builtin_ctzl(x) : 64; #elif SIZEOF_UNSIGNED_LONG_LONG == 8 return x ? __builtin_ctzll(x) : 64; #else # error no suitable __builtin_ctz() found #endif }
uint32_t sl_siglines_process_signals(struct sl_siglines* sglns) { BUG_ON(!is_direction_receiver(sglns->dir)); uint32_t nprocessed = 0; uint32_t i, bit_n, start_bit, end_bit, max_i = (sglns->num_lines/ NBITS_PER_UINT64); uint64_t* bits_p; uint64_t bits_value; for (i = 0; i < max_i; i++) { bits_p = &sglns->event_lines[i]; bits_value = *bits_p; if (bits_value != 0) { start_bit = (uint32_t)__builtin_ctzl(bits_value); end_bit = NBITS_PER_UINT64 - (uint32_t)__builtin_clzl(bits_value); for (bit_n = start_bit; bit_n < end_bit; bit_n++) { if (unlikely(test_and_clear_bit(bits_p, bit_n) == 0)) continue; sl_sigline_t line = NBITS_PER_UINT64 * i + bit_n; sglns->handler(sglns, line); nprocessed++; } } } return nprocessed; }
void core_init_mmu_regs(void) { uint64_t mair; uint64_t tcr; mair = MAIR_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX); mair |= MAIR_ATTR_SET(ATTR_IWBWA_OWBWA_NTR, ATTR_IWBWA_OWBWA_NTR_INDEX); write_mair_el1(mair); tcr = TCR_XRGNX_WBWA << TCR_IRGN0_SHIFT; tcr |= TCR_XRGNX_WBWA << TCR_ORGN0_SHIFT; tcr |= TCR_SHX_ISH << TCR_SH0_SHIFT; tcr |= tcr_ps_bits << TCR_EL1_IPS_SHIFT; tcr |= 64 - __builtin_ctzl(ADDR_SPACE_SIZE); /* Disable the use of TTBR1 */ tcr |= TCR_EPD1; /* * TCR.A1 = 0 => ASID is stored in TTBR0 * TCR.AS = 0 => Same ASID size as in Aarch32/ARMv7 */ write_tcr_el1(tcr); write_ttbr0_el1((paddr_t)l1_xlation_table[get_core_pos()]); write_ttbr1_el1(0); }
/* 0: inserted 1: not enough space in cache */ int tcam_cache_insert(tcam_cache_t *cache, uint8_t *key, void *data) { pthread_mutex_lock(&cache->lock); if(cache->nb_entries == cache->size) { pthread_mutex_unlock(&cache->lock); return 1; } int i; cache_entry_t *entry = NULL; int trailing_ones; for(i = 0; i < cache->bitmap_size; i++) { trailing_ones = __builtin_ctzl(~cache->bitmap_used[i]); if(trailing_ones != sizeof(unsigned long)) { entry = &cache->entries[sizeof(unsigned long) * i + trailing_ones]; cache->bitmap_used[i] |= (1 << trailing_ones); break; } } entry->last_access = time(NULL); entry->key = key; entry->key_size = cache->key_size; entry->data = data; uint32_t hash = hashlittle(key, cache->key_size, 0); tommy_hashtable_insert(&cache->hashtable, &entry->node, entry, hash); cache->nb_entries++; pthread_mutex_unlock(&cache->lock); return 0; }
uint32_t LUT_offset(size_t N, size_t leafN) { int i; size_t p_lut_size = 0; size_t lut_size = 0; int hardcoded = 0; size_t n_luts = __builtin_ctzl(N/leafN); int n = leafN*2; //if(N <= 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; } for(i=0;i<n_luts-1;i++) { p_lut_size = lut_size; if(!i || hardcoded) { #ifdef __arm__ if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t); else lut_size += n/4 * sizeof(cdata_t); #else lut_size += n/4 * 2 * sizeof(cdata_t); #endif // n *= 2; } else { #ifdef __arm__ lut_size += n/8 * 3 * sizeof(cdata_t); #else lut_size += n/8 * 3 * 2 * sizeof(cdata_t); #endif } n *= 2; } return lut_size; }
/* * exchanges_lookup_multi_by_uid */ gint exchanges_lookup_multi_by_uid ( const gchar *uids, exchange **exch ) { unsigned long int nuids, nuid; gint nb_exch = 0; nuids = strtoul ( uids, NULL, 10 ); if ( nuids < 0 ) return 0; while ( ( nuids != 0 ) && ( nb_exch < EXCH_NB_MAX_MULTI ) ) { nuid = __builtin_ctzl ( nuids ); nuids = nuids ^ (((unsigned long int)1) << nuid); if ( ( nuid >= 0 ) && ( nuid < nb_exchanges ) ) { exch[nb_exch] = EXCHANGES[nuid]; ++nb_exch; } } return nb_exch; }
int tcam_cache_purge(tcam_cache_t *cache) { int removed = 0; pthread_mutex_lock(&cache->lock); int i, j; cache_entry_t *entry; time_t now = time(NULL); unsigned long used; int trailing_zeros; for(i = 0; i < cache->bitmap_size; i++) { used = cache->bitmap_used[i]; trailing_zeros = __builtin_ctzl(used); for(j = trailing_zeros; j < sizeof(unsigned long); j++) { if(used & (1 << j)){ entry = &cache->entries[sizeof(unsigned long) * i + j]; if ((now - entry->last_access) > cache->expiration_secs) { cache->bitmap_used[i] &= ~(1 << j); tommy_hashtable_remove_existing(&cache->hashtable, &entry->node); removed++; } } } } cache->nb_entries -= removed; pthread_mutex_unlock(&cache->lock); return removed; }
Size findFirstBit(Size a) { #ifdef __GNUC__ #ifdef __X64__ return __builtin_ctzl(a); #else return __builtin_ctz(a); #endif #elif defined(_MSC_VER) unsigned long pos; #ifdef __X64__ _BitScanForward64(&pos, a); #else _BitScanForward(&pos, a); #endif return pos; #else //Very naive implementation. Size c = 0; while(!(a & 1)) { a >>= 1; c++; } return c; #endif }
static inline unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask) { if (!_Mask) return 0; *_Index = __builtin_ctzl(_Mask); return 1; }
int wcscmp(const wchar_t *s1, const wchar_t *s2){ // 如果 arp1 和 arp2 是对齐到字的,就不用考虑越界的问题。 // 因为内存按页分配的,也自然对齐到页,并且也对齐到字。 // 每个字内的字节的权限必然一致。 const wchar_t *arp1 = (const wchar_t *)((uintptr_t)s1 & (uintptr_t)-64); const wchar_t *arp2 = (const wchar_t *)((uintptr_t)s2 & (uintptr_t)-64); const unsigned align = (unsigned)(32 - ((const wchar_t *)s1 - arp1) + ((const wchar_t *)s2 - arp2)); __m128i xz[1]; __MCFCRT_xmmsetz(xz); __m128i s2v[12]; bool s2z; __m128i xw[4], xc[4]; uint32_t mask; ptrdiff_t dist; //============================================================================= #define BEGIN \ arp1 = __MCFCRT_xmmload_4(xw, arp1, _mm_load_si128); \ for(unsigned i = 0; i < 8; i += 4){ \ __MCFCRT_xmmload_4(s2v + i, s2v + i + 4, _mm_load_si128); \ } \ if(_MCFCRT_EXPECT(!s2z)){ \ arp2 = __MCFCRT_xmmload_4(s2v + 8, arp2, _mm_load_si128); \ mask = __MCFCRT_xmmcmp_41w(s2v + 8, xz); \ s2z = mask != 0; \ } \ __MCFCRT_xmmalign_4(xc, s2v, align); \ mask = ~__MCFCRT_xmmcmpandn_441w(xw, xc, xz); #define END \ if(_MCFCRT_EXPECT_NOT(mask != 0)){ \ goto end; \ } //============================================================================= __MCFCRT_xmmsetz_4(s2v + 4); arp2 = __MCFCRT_xmmload_4(s2v + 8, arp2, _mm_load_si128); mask = __MCFCRT_xmmcmp_41w(s2v + 8, xz); dist = (const wchar_t *)s2 - (arp2 - 32); mask &= (uint32_t)-1 << dist; s2z = mask != 0; BEGIN dist = (const wchar_t *)s1 - (arp1 - 32); mask &= (uint32_t)-1 << dist; for(;;){ END BEGIN } end: arp1 = arp1 - 32 + (unsigned)__builtin_ctzl(mask); arp2 = arp1 - (const wchar_t *)s1 + (const wchar_t *)s2; if(*arp1 == *arp2){ goto end_equal; } return (*arp1 < *arp2) ? -1 : 1; end_equal: return 0; }
int get_order(unsigned long size) { int order; size--; size >>= PAGE_SHIFT; order = __builtin_ctzl(size); return order; }
/* See en.wikipedia.org/wiki/Binary_GCD_algorithm. Taken from Daniel Lemire's blog (with improvements by Ralph Corderoy): http://lemire.me/blog/archives/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/ */ value core_extended_extended_int_gcd(value vu, value vv) { int shift; unsigned long u = labs(Long_val(vu)), v = labs(Long_val(vv)); unsigned long m; if ((u == 0) || (u == v)) return Val_long(v); if (v == 0) return Val_long(u); shift = __builtin_ctzl(u | v); u >>= __builtin_ctzl(u); do { v >>= __builtin_ctzl(v); m = (v ^ u) & -(v < u); u ^= m; v ^= m; v -= u; } while (v != 0); return Val_long(u << shift); }
void check_ctz(int n) { // ABORT: builtins.cpp:[[@LINE+2]]:17: runtime error: passing zero to ctz(), which is not a valid argument // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to ctz(), which is not a valid argument __builtin_ctz(n); // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to ctz(), which is not a valid argument __builtin_ctzl(n); // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to ctz(), which is not a valid argument __builtin_ctzll(n); }
/* * exchanges_lookup_by_uid */ exchange * exchanges_lookup_by_uid ( const gchar *uid ) { long int nuid; nuid = __builtin_ctzl ( strtoul(uid,NULL,10) ); if ( ( nuid < 0 ) || ( nuid >= nb_exchanges ) ) return NULL; return EXCHANGES[nuid]; }
std::string LogMsgFlagString(LOGT logflags) { std::string out; auto bitint = flag_unwrap<LOGT>(logflags & LOGT::GROUP_STR); while (bitint) { int offset = __builtin_ctzl(bitint); bitint &= ~(static_cast<decltype(bitint)>(1) << offset); if (out.size()) { out += ","; } out += logflagsstrings[offset]; } return out; }
inline int trailing_zero_count_64(std::uint64_t word) { #ifdef QUICKSTEP_HAVE_BUILTIN_CTZ return __builtin_ctzl(word); #else if (word) { int count = 0; while (!(word & 0x1U)) { ++count; word >>= 1; } return count; } else { return 64;
static inline int count_trailing_zeros(word_t word) { #if defined(__GNUC__) return __builtin_ctzl(word); #elif defined(_MSC_VER) unsigned long index; # if defined(_M_AMD64) assert(_BitScanForward64(&index, word) != 0); # else assert(_BitScanForward(&index, word) != 0); # endif return static_cast<int>(index); #else #endif }
constexpr T _tzcnt(T src) { static_assert(binary_digits<T>::value, ""); constexpr T digits = binary_digits<T>::value; T dest = 0; if (digits <= std::numeric_limits<unsigned int>::digits) { dest = src ? __builtin_ctz(src) : digits; } else if (digits <= std::numeric_limits<unsigned long int>::digits) { dest = src ? __builtin_ctzl(src) : digits; } else if (digits <= std::numeric_limits<unsigned long long int>::digits) { dest = src ? __builtin_ctzll(src) : digits; } else { dest = _tzcnt(src, std::ignore); } return dest; }
static __init int __setup_hugepagesz(unsigned long ps) { int log_ps = __builtin_ctzl(ps); int level, base_shift; if ((1UL << log_ps) != ps || (log_ps & 1) != 0) { pr_warn("Not enabling %ld byte huge pages; must be a power of four\n", ps); return -EINVAL; } if (ps > 64*1024*1024*1024UL) { pr_warn("Not enabling %ld MB huge pages; largest legal value is 64 GB\n", ps >> 20); return -EINVAL; } else if (ps >= PUD_SIZE) {
/** * Free all memory used by an instance of matras. */ void matras_destroy(struct matras *m) { while(m->ver_occ_mask != 1) { matras_id_t ver = __builtin_ctzl(m->ver_occ_mask ^ 1); matras_destroy_read_view(m, ver); } if (m->block_counts[0]) { uintptr_t *extent1 = matras_ptr(m->roots[0]); matras_id_t id = m->block_counts[0]; matras_id_t i, j; matras_id_t n1 = id >> m->shift1; id &= m->mask1; /* free not fully loaded extents */ if (id) { matras_id_t n2 = id >> m->shift2; id &= m->mask2; if (id) n2++; uintptr_t *extent2 = matras_ptr(extent1[n1]); for (j = 0; j < n2; j++) { uintptr_t *extent3 = matras_ptr(extent2[j]); m->free_func(extent3); } m->free_func(extent2); } /* free fully loaded extents */ matras_id_t n2 = m->extent_size / sizeof(void *); for ( i = 0; i < n1; i++) { uintptr_t *extent2 = matras_ptr(extent1[i]); for (j = 0; j < n2; j++) { uintptr_t *extent3 = matras_ptr(extent2[j]); m->free_func(extent3); } m->free_func(extent2); } m->free_func(extent1); m->block_counts[0] = 0; }
void hash_table_print_stats_brief(const HashTable *const ht) { size_t nbytes, nkeybits; double occupancy = (100.0 * ht->num_kmers) / ht->capacity; nbytes = ht->capacity * sizeof(BinaryKmer) + ht->num_of_buckets * sizeof(uint8_t[2]); nkeybits = (size_t)__builtin_ctzl(ht->num_of_buckets); char mem_str[50], num_buckets_str[100], num_entries_str[100], capacity_str[100]; ulong_to_str(ht->num_of_buckets, num_buckets_str); bytes_to_str(nbytes, 1, mem_str); ulong_to_str(ht->capacity, capacity_str); ulong_to_str(ht->num_kmers, num_entries_str); status("[hash] buckets: %s [2^%zu]; bucket size: %zu; " "memory: %s; occupancy: %s / %s (%.2f%%)\n", num_buckets_str, nkeybits, (size_t)ht->bucket_size, mem_str, num_entries_str, capacity_str, occupancy); }
uint32_t rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *ss, const void **keys, uint32_t num_keys, uint32_t match_per_key, uint32_t *match_count, member_set_t *set_ids) { uint32_t i, k; uint32_t num_matches = 0; uint32_t match_cnt_t; uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX]; uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX]; uint32_t bit_loc; for (i = 0; i < num_keys; i++) h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len, ss->prim_hash_seed); for (i = 0; i < num_keys; i++) h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t), ss->sec_hash_seed); for (i = 0; i < num_keys; i++) { mask[i] = ~0; for (k = 0; k < ss->num_hashes; k++) { bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask; mask[i] &= test_bit(bit_loc, ss); } } for (i = 0; i < num_keys; i++) { match_cnt_t = 0; while (mask[i]) { uint32_t loc = __builtin_ctzl(mask[i]); set_ids[i * match_per_key + match_cnt_t] = loc + 1; match_cnt_t++; if (match_cnt_t >= match_per_key) break; mask[i] &= ~(1UL << loc); } match_count[i] = match_cnt_t; if (match_cnt_t != 0) num_matches++; } return num_matches; }
void ffts_static_rec_i(ffts_plan_t *p, float *data, size_t N) { if(N > 16) { size_t N1 = N >> 1; size_t N2 = N >> 2; size_t N3 = N >> 3; float *ws = ((float *)(p->ws)) + (p->ws_is[__builtin_ctzl(N)-4] << 1); ffts_static_rec_i(p, data, N2); ffts_static_rec_i(p, data + N1, N3); ffts_static_rec_i(p, data + N1 + N2, N3); ffts_static_rec_i(p, data + N, N2); ffts_static_rec_i(p, data + N + N1, N2); if(N == p->N) { neon_static_x8_t_i(data, N, ws); } else { neon_static_x8_i(data, N, ws); } } else if(N==16) {
int main(int argc, char **argv) { int a; a = __builtin_bswap32(a); a = __builtin_bswap64(a); a = __builtin_constant_p(1); a = __builtin_constant_p("string"); char *b = __builtin_strchr("string", 's'); a = __builtin_expect(1, a); a = __builtin_strlen("string"); a = __builtin_strcmp("string1", "string2"); a = __builtin_offsetof(struct point, y); char c[100]; b = __builtin_strcpy(c, "a"); b = __builtin_strncpy(c, "a", 1); a = __builtin_ctzl(a); varargsfn(0); __builtin_prefetch(b); __builtin_prefetch(b, 1); __builtin_prefetch(b, 1, 1); return a; }
count_zeroes(size_t *x) { int result; #if defined(HAVE_BUILTIN_CTZL) result = __builtin_ctzl(*x); *x >>= result; #elif defined(HAVE_BITSCANFORWARD64) _BitScanForward64(&result, *x); *x >>= result; #elif defined(HAVE_BITSCANFORWARD) _BitScanForward(&result, *x); *x >>= result; #else result = 0; while ((*x & 1) == 0) { ++result; *x >>= 1; } #endif return result; }
int rte_member_lookup_vbf(const struct rte_member_setsum *ss, const void *key, member_set_t *set_id) { uint32_t j; uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed); uint32_t mask = ~0; uint32_t bit_loc; for (j = 0; j < ss->num_hashes; j++) { bit_loc = (h1 + j * h2) & ss->bit_mask; mask &= test_bit(bit_loc, ss); } if (mask) { *set_id = __builtin_ctzl(mask) + 1; return 1; } *set_id = RTE_MEMBER_NO_MATCH; return 0; }
static int __attribute__((noinline)) ctzl(unsigned long x) { return __builtin_ctzl(x); }
// returns 0 on failure inline size_t first_bit_in_block(const size_t& block) const{ if (block == 0) return 0; else return (size_t)__builtin_ctzl(block); }