constexpr T _lzcnt(T src) { static_assert(binary_digits<T>::value, ""); constexpr T digits = binary_digits<T>::value; T dest = 0; if (digits < std::numeric_limits<unsigned int>::digits) { dest = src ? __builtin_clz(src) - (std::numeric_limits<unsigned int>::digits - digits) : digits; } else if (digits == std::numeric_limits<unsigned int>::digits) { dest = src ? __builtin_clz(src) : digits; } else if (digits < std::numeric_limits<unsigned long int>::digits) { dest = src ? __builtin_clzl(src) - (std::numeric_limits<unsigned long int>::digits - digits) : digits; } else if (digits == std::numeric_limits<unsigned long int>::digits) { dest = src ? __builtin_clzl(src) : digits; } else if (digits < std::numeric_limits<unsigned long long int>::digits) { dest = src ? __builtin_clzll(src) - (std::numeric_limits<unsigned long long int>::digits - digits) : digits; } else if (digits == std::numeric_limits<unsigned long long int>::digits) { dest = src ? __builtin_clzll(src) : digits; } else { dest = _lzcnt(src, std::ignore); } return dest; }
int main(int argc, char ** argv) { unsigned int x = 0x12345678; unsigned int y = 0xDEADBEEF; unsigned long long xx = 0x1234567812345678ULL; unsigned z; double a = 3.14159; double b = 2.718; double c = 1.414; unsigned short s = 0x1234; printf("mulhw(%x, %x) = %x\n", x, y, __builtin_mulhw(x, y)); printf("mulhwu(%x, %x) = %x\n", x, y, __builtin_mulhwu(x, y)); printf("clz(%x) = %d\n", x, __builtin_clz(x)); printf("clzll(%llx) = %d\n", (unsigned long long) x, __builtin_clzll(x)); printf("clzll(%llx) = %d\n", xx, __builtin_clzll(xx)); z = __builtin_bswap(x); printf("clzll(%lx) = %d\n", z, __builtin_clzll(z)); printf("bswap(%x) = %x\n", x, __builtin_bswap(x)); printf("bswap16(%x) = %x\n", s, __builtin_bswap16(s)); printf("fmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fmadd(a, b, c)); printf("fmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fmsub(a, b, c)); printf("fabs(%f) = %f\n", a, __builtin_fabs(a)); printf("fabs(%f) = %f\n", -a, __builtin_fabs(-a)); printf("fsqrt(%f) = %f\n", a, __builtin_fsqrt(a)); printf("frsqrte(%f) = %s\n", a, check_relative_error(1.0 / sqrt(a), __builtin_frsqrte(a), 1./32.)); printf("fres(%f) = %s\n", a, check_relative_error(1.0 / a, __builtin_fres(a), 1./256.)); printf("fsel(%f, %f, %f) = %f\n", a, b, c, __builtin_fsel(a, b, c)); printf("fsel(%f, %f, %f) = %f\n", -a, b, c, __builtin_fsel(-a, b, c)); printf("fcti(%f) = %d\n", a, __builtin_fcti(a)); printf("fcti(%f) = %d\n", b, __builtin_fcti(b)); printf("fcti(%f) = %d\n", c, __builtin_fcti(c)); __builtin_eieio(); __builtin_sync(); __builtin_isync(); printf("isel(%d, %d, %d) = %d\n", 0, x, y, __builtin_isel(0, x, y)); printf("isel(%d, %d, %d) = %d\n", 42, x, y, __builtin_isel(42, x, y)); printf ("read_16_rev = %x\n", __builtin_read16_reversed(&s)); printf ("read_32_rev = %x\n", __builtin_read32_reversed(&y)); __builtin_write16_reversed(&s, 0x789A); printf ("after write_16_rev: %x\n", s); __builtin_write32_reversed(&y, 0x12345678); printf ("after write_32_rev: %x\n", y); y = 0; __builtin_write32_reversed(&y, 0x12345678); printf ("CSE write_32_rev: %s\n", y == 0x78563412 ? "ok" : "ERROR"); /* Make sure that ignoring the result of a builtin doesn't cause an internal error */ (void) __builtin_bswap(x); (void) __builtin_fsqrt(a); return 0; }
COMPILER_RT_ABI fp_t __floatditf(di_int a) { const int aWidth = sizeof a * CHAR_BIT; // Handle zero as a special case to protect clz if (a == 0) return fromRep(0); // All other cases begin by extracting the sign and absolute value of a rep_t sign = 0; du_int aAbs = (du_int)a; if (a < 0) { sign = signBit; aAbs = ~(du_int)a + 1U; } // Exponent of (fp_t)a is the width of abs(a). const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); rep_t result; // Shift a into the significand field, rounding if it is a right-shift const int shift = significandBits - exponent; result = (rep_t)aAbs << shift ^ implicitBit; // Insert the exponent result += (rep_t)(exponent + exponentBias) << significandBits; // Insert the sign bit and return return fromRep(result | sign); }
void test_i64(float P) { leading = __builtin_clzll(P); trailing = __builtin_ctzll(P); // CHECK: @test_i64 // CHECK: call i64 @llvm.ctlz.i64(i64 {{.*}}, i1 false) // CHECK: call i64 @llvm.cttz.i64(i64 {{.*}}, i1 false) }
static void sb_check (sb *ptr, size_t len) { size_t want = ptr->len + len; if (want > ptr->max) { size_t max; want += MALLOC_OVERHEAD + 1; if ((ssize_t) want < 0) as_fatal ("string buffer overflow"); #if GCC_VERSION >= 3004 max = (size_t) 1 << (CHAR_BIT * sizeof (want) - (sizeof (want) <= sizeof (long) ? __builtin_clzl ((long) want) : __builtin_clzll ((long long) want))); #else max = 128; while (want > max) max <<= 1; #endif max -= MALLOC_OVERHEAD + 1; ptr->max = max; ptr->ptr = xrealloc (ptr->ptr, max + 1); } }
size_t lwan_nextpow2(size_t number) { #if defined(HAVE_BUILTIN_CLZLL) static const int size_bits = (int)sizeof(number) * CHAR_BIT; if (sizeof(size_t) == sizeof(unsigned int)) { return (size_t)1 << (size_bits - __builtin_clz((unsigned int)number)); } else if (sizeof(size_t) == sizeof(unsigned long)) { return (size_t)1 << (size_bits - __builtin_clzl((unsigned long)number)); } else if (sizeof(size_t) == sizeof(unsigned long long)) { return (size_t)1 << (size_bits - __builtin_clzll((unsigned long long)number)); } else { (void)size_bits; } #endif number--; number |= number >> 1; number |= number >> 2; number |= number >> 4; number |= number >> 8; number |= number >> 16; return number + 1; }
static size_t find_next_power_of_two(size_t number) { #if HAVE_BUILTIN_CLZLL static const int size_bits = (int)sizeof(number) * CHAR_BIT; if (sizeof(size_t) == sizeof(unsigned int)) { return 1U << (size_bits - __builtin_clz((unsigned int)number)); } else if (sizeof(size_t) == sizeof(unsigned long)) { return 1UL << (size_bits - __builtin_clzl((unsigned long)number)); } else if (sizeof(size_t) == sizeof(unsigned long long)) { return 1ULL << (size_bits - __builtin_clzll((unsigned long long)number)); } else { __builtin_unreachable(); } #else number--; number |= number >> 1; number |= number >> 2; number |= number >> 4; number |= number >> 8; number |= number >> 16; return number + 1; #endif }
// log2 - returns -1 if x==0, otherwise log2(x) inline int log2(size_t x) { if (x == 0) return -1; #if defined(__GNUC__) # ifdef REALM_PTR_64 return 63 - __builtin_clzll(x); // returns int # else return 31 - __builtin_clz(x); // returns int # endif #elif defined(_WIN32) unsigned long index = 0; # ifdef REALM_PTR_64 unsigned char c = _BitScanReverse64(&index, x); // outputs unsigned long # else unsigned char c = _BitScanReverse(&index, x); // outputs unsigned long # endif return static_cast<int>(index); #else // not __GNUC__ and not _WIN32 int r = 0; while (x >>= 1) { r++; } return r; #endif }
/* * Convert signed quad to double. */ double __floatdidf(quad_t x) { union ieee_double_u ux = { .dblu_d = 0.0 }; if (x == 0) return 0.0; if (x == 1) return 1.0; if (x < 0) { if (x == QUAD_MIN) return -0x1.0p63; ux.dblu_sign = 1; x = -x; } u_int l = __builtin_clzll(x); x <<= (l + 1); /* clear implicit bit */ x >>= 64 - (DBL_FRACHBITS + DBL_FRACLBITS); union uu u = { .uq = x }; ux.dblu_frach = u.ul[H]; ux.dblu_fracl = u.ul[L]; ux.dblu_exp = DBL_EXP_BIAS + 63 - l; return ux.dblu_d; }
static int rte_table_acl_lookup( void *table, struct rte_mbuf **pkts, uint64_t pkts_mask, uint64_t *lookup_hit_mask, void **entries) { struct rte_table_acl *acl = (struct rte_table_acl *) table; const uint8_t *pkts_data[RTE_PORT_IN_BURST_SIZE_MAX]; uint32_t results[RTE_PORT_IN_BURST_SIZE_MAX]; uint64_t pkts_out_mask; uint32_t n_pkts, i, j; __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); RTE_TABLE_ACL_STATS_PKTS_IN_ADD(acl, n_pkts_in); /* Input conversion */ for (i = 0, j = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX - __builtin_clzll(pkts_mask)); i++) { uint64_t pkt_mask = 1LLU << i; if (pkt_mask & pkts_mask) { pkts_data[j] = rte_pktmbuf_mtod(pkts[i], uint8_t *); j++; } }
int tree_function(BITMAP_TYPE bitmap, uint8_t stride) { #ifndef FAST_TREE_FUNCTION int i; int pos; if (bitmap == 0ULL) return -1; for(i=STRIDE-1;i>=0;i--){ stride >>= 1; pos = count_inl_bitmap(stride, i); if (test_bitmap(bitmap, pos)){ return pos; } } return -1; #else BITMAP_TYPE ret; int pos; ret = fct[(stride>>1)] & bitmap; if(ret){ pos = __builtin_clzll(ret); return 63 - pos; } else return -1; #endif }
static h2o_http2_scheduler_queue_node_t *queue_pop(h2o_http2_scheduler_queue_t *queue) { if (!h2o_linklist_is_empty(&queue->anchor257)) { h2o_http2_scheduler_queue_node_t *node = H2O_STRUCT_FROM_MEMBER(h2o_http2_scheduler_queue_node_t, _link, queue->anchor257.next); h2o_linklist_unlink(&node->_link); return node; } while (queue->bits != 0) { int zeroes = __builtin_clzll(queue->bits); queue->bits <<= zeroes; queue->offset = (queue->offset + zeroes) % (sizeof(queue->anchors) / sizeof(queue->anchors[0])); if (!h2o_linklist_is_empty(queue->anchors + queue->offset)) { h2o_http2_scheduler_queue_node_t *node = H2O_STRUCT_FROM_MEMBER(h2o_http2_scheduler_queue_node_t, _link, queue->anchors[queue->offset].next); h2o_linklist_unlink(&node->_link); if (h2o_linklist_is_empty(queue->anchors + queue->offset)) queue->bits &= (1ULL << (sizeof(queue->bits) * 8 - 1)) - 1; return node; } queue->bits &= (1ULL << (sizeof(queue->bits) * 8 - 1)) - 1; } return NULL; }
static uintptr_t iopa_allocinpage(io_pagealloc_t * pa, uint32_t count, uint64_t align) { uint32_t n, s; uint64_t avail = pa->avail; assert(avail); // find strings of count 1 bits in avail for (n = count; n > 1; n -= s) { s = n >> 1; avail = avail & (avail << s); } // and aligned avail &= align; if (avail) { n = __builtin_clzll(avail); pa->avail &= ~((-1ULL << (64 - count)) >> n); if (!pa->avail && pa->link.next) { remque(&pa->link); pa->link.next = 0; } return (n * kIOPageAllocChunkBytes + trunc_page((uintptr_t) pa)); } return (0); }
/* Final conversion to double. */ static void strscan_double(uint64_t x, tl_value *o, int32_t ex2, int32_t neg) { double n; /* Avoid double rounding for denormals. */ if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { /* NYI: all of this generates way too much code on 32 bit CPUs. */ #if defined(__GNUC__) && LJ_64 int32_t b = (int32_t)(__builtin_clzll(x)^63); #else int32_t b = tl_fls64(x); #endif if ((int32_t)b + ex2 <= -1023 && (int32_t)b + ex2 >= -1075) { uint64_t rb = (uint64_t)1 << (-1075-ex2); if ((x & rb) && ((x & (rb+rb+rb-1)))) x += rb+rb; x = (x & ~(rb+rb-1)); } } /* Convert to double using a signed int64_t conversion, then rescale. */ lua_assert((int64_t)x >= 0); n = (double)(int64_t)x; if (neg) n = -n; if (ex2) n = ldexp(n, ex2); o->n = n; }
void mpi_mul_u64(const mpi *a, uint64_t b, mpi *p) { if (mpi_is_zero(a) || b == 0) { mpi_zero(p); return; } else if (b == 1) { if (a != p) mpi_set_mpi(p, a); return; } else if ((b & (b-1)) == 0) { /* B is a power of 2 */ mpi_lshift(a, __builtin_ctzll(b), p); return; } else if (b == (mp_digit)b) { /* B fits in an mp_digit */ if (a == p) { mp_digit cy = mp_dmuli(p->digits, p->size, (mp_digit)b); if (cy) { MPI_MIN_ALLOC(p, p->size + 1); p->digits[p->size++] = cy; } } else { MPI_MIN_ALLOC(p, a->size); mp_digit cy = mp_dmul(a->digits, a->size, (mp_digit)b, p->digits); if (cy) { MPI_MIN_ALLOC(p, a->size + 1); p->digits[a->size] = cy; p->size = a->size + 1; } else { p->size = a->size; } } } else { unsigned bits = CHAR_BIT * sizeof(uint64_t) - __builtin_clzll(b); mp_size size = (bits + MP_DIGIT_BITS - 1) / MP_DIGIT_BITS; mp_digit *bp = MP_TMP_ALLOC(size); #if MP_DIGIT_BITS >= 64 bp[0] = b; #else for (mp_size j=0; j<size; j++) { bp[j] = (mp_digit)b; b >>= MP_DIGIT_BITS; } #endif if (a == p) { mp_digit *tmp = MP_TMP_ALLOC(p->size + size); mp_mul(p->digits, p->size, bp, size, tmp); MPI_MIN_ALLOC(p, p->size + size); mp_copy(tmp, p->size + size, p->digits); p->size = mp_rsize(p->digits, p->size + size); MP_TMP_FREE(tmp); } else { MPI_MIN_ALLOC(p, a->size + size); mp_mul(a->digits, a->size, bp, size, p->digits); p->size = mp_rsize(p->digits, a->size + size); } MP_TMP_FREE(bp); } }
int main(int argc, char **argv) { uint64_t value = atol(argv[1]), b; int64_t res; //return __builtin_cpu_supports_popcount(); return (int)__builtin_clzll(value); //__asm__ volatile ("lzcnt %1, %0" : "=r"(res) : "r"(value)); //__asm__ volatile ("popcnt %1, %0" : "=r"(b) : "r"(a)); //return res; }
si_int __clzti2(ti_int a) { twords x; x.all = a; const di_int f = -(x.s.high == 0); return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); }
void test_i64(float P) { leading = __builtin_clzll(P); trailing = __builtin_ctzll(P); pop = __builtin_popcountll(P); // CHECK: @test_i64 // CHECK: call i64 @llvm.ctlz.i64 // CHECK: call i64 @llvm.cttz.i64 // CHECK: call i64 @llvm.ctpop.i64 }
static UDWtype __udivmoddi4 (UDWtype x, UDWtype y, UDWtype *res) { static unsigned char unrt[256] = {-2, -4, -6, -8, -10, -12, -14, -16, -18, -20, -22, -23, -25, -27, -29, -31, -32, -34, -36, -38, -39, -41, -43, -44, -46, -48, -49, -51, -53, -54, -56, -57, -59, -61, -62, -64, -65, -67, -68, -70, -71, -73, -74, -76, -77, -78, -80, -81, -83, -84, -86, -87, -88, -90, -91, -92, -94, -95, -96, -98, -99, -100, -102, -103, -104, -105, -107, -108, -109, -110, -112, -113, -114, -115, -117, -118, -119, -120, -121, -122, -124, -125, -126, -127, -128, 127, 126, 125, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 88, 87, 86, 85, 84, 83, 82, 81, 80, 80, 79, 78, 77, 76, 75, 74, 74, 73, 72, 71, 70, 70, 69, 68, 67, 66, 66, 65, 64, 63, 62, 62, 61, 60, 59, 59, 58, 57, 56, 56, 55, 54, 53, 53, 52, 51, 50, 50, 49, 48, 48, 47, 46, 46, 45, 44, 43, 43, 42, 41, 41, 40, 39, 39, 38, 37, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 25, 25, 24, 24, 23, 22, 22, 21, 21, 20, 19, 19, 18, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0}; // table lookup start #define W 64 unsigned char k = __builtin_clzll(y); UDItype lshifted_y = lsl64(y,k); /// remove the leading 1 lshifted_y = lshifted_y << 1; UDItype ty = lsr64(lshifted_y, W-8 ); // prescaling UDItype t = unrt[ ty ] | 256; // table lookup UDItype z = lsr64(lsl64(t,W-9), W-k-1 ); // postscaling // z recurrence UDItype my = 0-y; //#define NR_UNROLLED #ifdef NR_UNROLLED z = z + umulh64(z,mul64(my,z)); z = z + umulh64(z,mul64(my,z)); z = z + umulh64(z,mul64(my,z)); #else unsigned int index; for (index = 0; index < 3; ++index) { UDItype zd = umulh64(z,mul64(my,z)); //if (zd == 0) break; z = z + zd; } #endif // q estimate UDItype q = umulh64(x,z); UDItype r = x - mul64(y,q); // q refinement if (r >= y) { r = r - y; q = q + 1; if (r >= y) { r = r - y; q = q + 1; if (r >= y) { //add this in case of three iterations r = r - y; q = q + 1; } } } if (res != 0) *res = r; return q; }
void check_clz(int n) { // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to clz(), which is not a valid argument __builtin_clz(n); // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to clz(), which is not a valid argument __builtin_clzl(n); // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to clz(), which is not a valid argument __builtin_clzll(n); }
// Retorna a * b mod m llu mm( llu a, llu b, llu m ) { unsigned step = __builtin_clzll(m); llu mask = (1llu << step) - 1; a %= m; b %= m; llu res = 0; do { res = ( res + a * (b & mask) ) % m; a = (a << step) % m; } while( (b >>= step) != 0 ); return res; }
node_t* empty_circuit(uint64_t pin_size, uint64_t input_pins, uint64_t output_pins) { node_t* circuit = calloc(1,sizeof(node_t)); if(pin_size==0) pin_size++; circuit->lg2_pin_size = 64-__builtin_clzll(pin_size-1); pin_size = 1<<(circuit->lg2_pin_size); circuit->input0 = 0; circuit->inputM = input_pins; circuit->output0 = input_pins; circuit->outputN = input_pins+output_pins; circuit->pins = calloc(pin_size,input_pins+output_pins); return circuit; }
ll s(ll x){ if (x==0) return 0; int leftmostone=63-__builtin_clzll(x); int rightmostone=__builtin_ffsll(x)-1; if (leftmostone==rightmostone){ return (rightmostone+s(x-1)) % MOD; } else{ ll y = x^(1ll<<rightmostone); int middlezeros=leftmostone-rightmostone+1-__builtin_popcountll(x); return (s(y)+range(rightmostone,leftmostone+middlezeros+2)) % MOD; } }
int __clrsbdi2 (long long x) { int ret; if (x < 0LL) x = ~x; if (x == 0LL) return 8 * sizeof (x) -1; ret = __builtin_clzll ((unsigned long long) x); return ret - 1; }
/** * Adds a new hash to the HLL * @arg h The hll to add to * @arg hash The hash to add */ void hll_add_hash(hll_t *h, uint64_t hash) { // Determine the index using the first p bits int idx = hash >> (64 - h->precision); // Shift out the index bits hash = hash << h->precision | (1 << (h->precision -1)); // Determine the count of leading zeros int leading = __builtin_clzll(hash) + 1; // Update the register if the new value is larger if (leading > get_register(h, idx)) { set_register(h, idx, leading); } }
__INTRIN_INLINE bool bsr64(unsigned long* const index, const uint64_t mask) { #if defined(__GNUC__) || defined(__clang__) if (mask) { *index = (unsigned long)(63 - __builtin_clzll(mask)); return true; } else { return false; } #elif defined(_MSC_VER) return _BitScanReverse64(index, mask) != 0; #else # error Unsupported platform #endif }
long double __floatdixf(di_int a) { if (a == 0) return 0.0; const unsigned N = sizeof(di_int) * CHAR_BIT; const di_int s = a >> (N-1); a = (a ^ s) - s; int clz = __builtin_clzll(a); int e = (N - 1) - clz ; /* exponent */ long_double_bits fb; fb.u.high.s.low = ((su_int)s & 0x00008000) | /* sign */ (e + 16383); /* exponent */ fb.u.low.all = a << clz; /* mantissa */ return fb.f; }
/*----------------------------------------------------------------------------*/ void netmap_load_module_upper_half(void) { int i; int num_dev; uint64_t cpu_mask; int queue_range; num_dev = g_config.mos->netdev_table->num; for (i = 0; i < num_dev; i++) { cpu_mask = g_config.mos->netdev_table->ent[i]->cpu_mask; queue_range = sizeof(cpu_mask) * NBBY - __builtin_clzll(cpu_mask); num_queues = (num_queues < queue_range) ? queue_range : num_queues; } }
void tw_hyperloglog_add(struct tw_hyperloglog *hll, const void *key, size_t key_size) { if (!hll || !key || !key_size) { return; } const uint64_t hash = tw_metrohash_64(TW_HLL_DEFAULT_SEED, key, key_size); const uint8_t precision = hll->precision; const uint32_t bucket_idx = hash >> (64 - precision); const uint8_t leading_zeros = (__builtin_clzll(hash << precision | (1 << (precision - 1))) + 1), old_val = hll->registers[bucket_idx]; hll->registers[bucket_idx] = (leading_zeros > old_val) ? leading_zeros : old_val; }
U solve(U a, U b){ U min_k, max_k, sum, i, k, low, high; U counts[64]; memset(counts, 0, sizeof (counts)); sum = 0; min_k = 1; max_k = 64 - __builtin_clzll(b); for (k = max_k;k >= min_k; --k){ if (k > 1){ low = floor(kth_root2(a, k)); high = ceil(kth_root2(b, k)); } else { low = a; high = b; } while (ipow(low, k) < a){ ++low; double x = pow(low, k); if (x > MAX) goto next;; } if (ipow(low, k) > b) continue; while (pow(high, k) > MAX) --high; while (ipow(high, k) > b) --high; if (ipow(high, k) < a) continue; counts[k] = high - low + 1; for (i = k + k;i <= max_k; i += k){ counts[k] -= counts[i]; } sum += counts[k] * k; next:; } return sum; }