示例#1
0
Size findFirstBit(Size a) {
#ifdef __GNUC__
#ifdef __X64__
    return __builtin_ctzl(a);
#else
    return __builtin_ctz(a);
#endif
#elif defined(_MSC_VER)
    unsigned long pos;
#ifdef __X64__
	_BitScanForward64(&pos, a);
#else
    _BitScanForward(&pos, a);
#endif
	return pos;
#else
	//Very naive implementation.
	Size c = 0;
	while(!(a & 1)) {
		a >>= 1;
		c++;
	}
	return c;
#endif
}
示例#2
0
文件: lz4.c 项目: BobWay/rippled
/********************************
   Common functions
********************************/
static unsigned LZ4_NbCommonBytes (register size_t val)
{
    if (LZ4_isLittleEndian())
    {
        if (LZ4_64bits())
        {
#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
            unsigned long r = 0;
            _BitScanForward64( &r, (U64)val );
            return (int)(r>>3);
#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
            return (__builtin_ctzll((U64)val) >> 3);
#       else
            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
#       endif
        }
        else /* 32 bits */
        {
#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
            unsigned long r;
            _BitScanForward( &r, (U32)val );
            return (int)(r>>3);
#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
            return (__builtin_ctz((U32)val) >> 3);
#       else
            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
#       endif
        }
    }
示例#3
0
文件: zdict.c 项目: TrianglesPCT/zstd
/*-********************************************************
*  Dictionary training functions
**********************************************************/
static unsigned ZDICT_NbCommonBytes (register size_t val)
{
    if (MEM_isLittleEndian()) {
        if (MEM_64bits()) {
#       if defined(_MSC_VER) && defined(_WIN64)
            unsigned long r = 0;
            _BitScanForward64( &r, (U64)val );
            return (unsigned)(r>>3);
#       elif defined(__GNUC__) && (__GNUC__ >= 3)
            return (__builtin_ctzll((U64)val) >> 3);
#       else
            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
#       endif
        } else { /* 32 bits */
#       if defined(_MSC_VER)
            unsigned long r=0;
            _BitScanForward( &r, (U32)val );
            return (unsigned)(r>>3);
#       elif defined(__GNUC__) && (__GNUC__ >= 3)
            return (__builtin_ctz((U32)val) >> 3);
#       else
            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
#       endif
        }
    } else {  /* Big Endian CPU */
示例#4
0
 inline uint countTrailingZeros(uint64_t value)
 {
     unsigned long index;
     if (_BitScanForward64(&index, value))
         return index;
     else
         return 64;
 }
示例#5
0
文件: strings.c 项目: Strongc/WinObjC
int ffsll(long long value)
{
	unsigned long index = 0;
	unsigned char isNonZero;

	isNonZero = _BitScanForward64(&index, value);
	return isNonZero ? index + 1 : 0;
}
示例#6
0
static unsigned __inline clz (unsigned long x)
{
	unsigned long	r;

#if defined (WORDSIZE) && (WORDSIZE == 64)
	_BitScanForward64 (&r, x);
#else
	_BitScanForward32 (&r, x);
#endif
	return (r);
}
示例#7
0
inline bitcount_t trailingzeros(uint64_t v)
{
#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
    unsigned long i;
    _BitScanForward64(&i, v);
    return i;
#else
    // 32-bit x86
    uint32_t high = v >> 32;
    uint32_t low  = uint32_t(v);
    return low ? trailingzeros(low) : trailingzeros(high)+32;
#endif
}
示例#8
0
  static inline int count_trailing_zeros(word_t word) {
#if defined(__GNUC__)
    return __builtin_ctzl(word);
#elif defined(_MSC_VER)
    unsigned long index;
#  if defined(_M_AMD64)
    assert(_BitScanForward64(&index, word) != 0);
#  else
    assert(_BitScanForward(&index, word) != 0);
#  endif
    return static_cast<int>(index);
#else
#endif
  }
  static FORCEINLINE uint64_t getNextPrime(uint64_t* bits, uint64_t base)
  {
	  // calculate bitValues_[ bitScanForward(*bits) ]
	  // using a custom De Bruijn bitscan
	  //uint64_t debruijn64 = UINT64_C(0x3F08A4C6ACB9DBD);
	  uint64_t mask = *bits - 1;
	  //uint64_t bitValue = bruijnBitValues_[((*bits ^ mask) * debruijn64) >> 58];
	  //uint64_t prime = base + bitValue;
	  unsigned long index;
	  _BitScanForward64(&index, *bits);
	  uint64_t prime = base + bitValuesRaw_[index];
	  *bits &= mask;
	  return prime;
  }
示例#10
0
/*-************************************
*  Common functions
**************************************/
static inline unsigned LZ4_NbCommonBytes (register reg_t val)
{
	if (LZ4_isLittleEndian()) {
	    if (sizeof(val)==8) {
#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
	        unsigned long r = 0;
	        _BitScanForward64( &r, (U64)val );
	        return (int)(r>>3);
#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
	        return (__builtin_ctzll((U64)val) >> 3);
#       else
	        static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
	        return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
#       endif
	    } else /* 32 bits */ {
示例#11
0
    __INTRIN_INLINE bool bsf64(unsigned long* const index, const uint64_t mask)
    {
#if defined(__GNUC__) || defined(__clang__)
        if (mask) {
            *index = (unsigned long)__builtin_ctzll(mask);
            return true;
        } else {
            return false;
        }
#elif defined(_MSC_VER)
        return _BitScanForward64(index, mask) != 0;
#else
#     error Unsupported platform
#endif
    }
count_zeroes(size_t *x)
{
  int result;
#if defined(HAVE_BUILTIN_CTZL)
  result = __builtin_ctzl(*x);
  *x >>= result;
#elif defined(HAVE_BITSCANFORWARD64)
  _BitScanForward64(&result, *x);
  *x >>= result;
#elif defined(HAVE_BITSCANFORWARD)
  _BitScanForward(&result, *x);
  *x >>= result;
#else
  result = 0;
  while ((*x & 1) == 0) {
    ++result;
    *x >>= 1;
  }
#endif
  return result;
}
示例#13
0
文件: shader.cpp 项目: Lamorna/engine
/*
==================
==================
*/
void Process_Fragments(

	raster_output_& raster_output,
	shader_input_& shader_input
) {

	const __m128 zero = set_all(0.0f);

	shader_input.tile_mask_16x16 = 0x0;
	shader_input.tile_mask_64x64 = 0x0;

	//===============================================================================================

	{
		const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_64x64];
		for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) {

			raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_64x64][i_fragment];

			const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
			const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;

			Process_Fragment_64x64(

				raster_fragment.w,
				i_buffer,
				coverage_mask,
				raster_output,
				shader_input
			);
		}
	}
	//===============================================================================================
	{
		const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_16x16];
		for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) {

			raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_16x16][i_fragment];

			const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
			const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;

			Process_Fragment_16x16(

				raster_fragment.w,
				0,
				i_buffer,
				coverage_mask,
				raster_output,
				shader_input
			);
		}
	}
	//===============================================================================================
	{

		const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_4x4];
		for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) {

			raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_4x4][i_fragment];
			const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
			const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;
			Process_Fragment_4x4(raster_fragment.w, 0, i_buffer, coverage_mask, raster_output, shader_input);
		}
	}
	//===============================================================================================
	{
		//const __int32 start = raster_output_::MAX_FRAGMENTS - 1;
		//const __int32 end = raster_output.n_fragments[raster_output_::PARTIAL_ACCEPT_4x4];
		//for (__int32 i_fragment = start; i_fragment > end; i_fragment--) {


		//	raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::PARTIAL_ACCEPT_4x4][i_fragment];
		//	const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
		//	const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;
		//	Process_Fragment_4x4(raster_fragment.w, 0, i_buffer, coverage_mask, raster_output, shader_input);
		//}
	}
	//===============================================================================================
	{
		const __int32 n_fragments = raster_output.n_fragments_COMPLETE;
		__int32 n_depth_fragments = 0;
		for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) {

			raster_fragment_complete_& raster_fragment = raster_output.raster_fragment_complete[i_fragment];
			const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
			const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;

			pixel_shader(i_buffer, coverage_mask, raster_fragment.bazza, shader_input);

			const __int32 i_buffer_depth_4x4 = i_buffer / (4 * 4);
			const __int32 i_buffer_depth_16x16 = i_buffer / (16 * 16);
			const __int32 i_buffer_depth_64x64 = i_buffer / (64 * 64);
			shader_input.depth_tiles_4x4[i_buffer_depth_4x4] = shader_input.z_max;
			shader_input.tile_mask_16x16 |= one_bit_64 << i_buffer_depth_16x16;
			shader_input.tile_mask_64x64 |= one_bit_64 << i_buffer_depth_64x64;
		}
	}
	//===============================================================================================
	{
		//printf_s(" %llu ", shader_input.tile_mask_16x16);

		__int64 n_tiles = _mm_popcnt_u64(shader_input.tile_mask_16x16);

		for (__int32 i_bit = 0; i_bit < n_tiles; i_bit++) {

			unsigned long i_tile_16x16;
			_BitScanForward64(&i_tile_16x16, shader_input.tile_mask_16x16);
			shader_input.tile_mask_16x16 ^= one_bit_64 << i_tile_16x16;

			const __int32 i_tile_4x4 = i_tile_16x16 * (4 * 4);

			__m128 depth_4x4[4];
			depth_4x4[0] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (0 * 4));
			depth_4x4[1] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (1 * 4));
			depth_4x4[2] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (2 * 4));
			depth_4x4[3] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (3 * 4));

			__m128 z_max;
			z_max = depth_4x4[0];
			z_max = min_vec(depth_4x4[1], z_max);
			z_max = min_vec(depth_4x4[2], z_max);
			z_max = min_vec(depth_4x4[3], z_max);

			__m128 z_out = z_max;
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);

			shader_input.depth_tiles_16x16[i_tile_16x16] = store_s(z_out);
		}
	}
	{
		__int64 n_tiles = _mm_popcnt_u64(shader_input.tile_mask_64x64);

		//printf_s(" %llu ", n_tiles);

		for (__int32 i_bit = 0; i_bit < n_tiles; i_bit++) {

			unsigned long i_tile_64x64;
			_BitScanForward64(&i_tile_64x64, shader_input.tile_mask_64x64);
			shader_input.tile_mask_64x64 ^= one_bit_64 << i_tile_64x64;

			const __int32 i_tile_16x16 = i_tile_64x64 * (4 * 4);

			__m128 depth_16x16[4];
			depth_16x16[0] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (0 * 4));
			depth_16x16[1] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (1 * 4));
			depth_16x16[2] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (2 * 4));
			depth_16x16[3] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (3 * 4));

			__m128 z_max;
			z_max = depth_16x16[0];
			z_max = min_vec(depth_16x16[1], z_max);
			z_max = min_vec(depth_16x16[2], z_max);
			z_max = min_vec(depth_16x16[3], z_max);

			__m128 z_out = z_max;
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);

			shader_input.depth_tiles_64x64[i_tile_64x64] = store_s(z_out);
		}
	}
}
示例#14
0
BOOST_FORCEINLINE unsigned find_lsb(unsigned __int64 mask, const mpl::int_<2>&)
{
   unsigned long result;
   _BitScanForward64(&result, mask);
   return result;
}
示例#15
0
void inline BSF( unsigned long* index, size_t& mask )
{
	_BitScanForward64( index, mask );
}
示例#16
0
DWORD WINAPI find_nonce(void* data) {
#else
void* find_nonce(void* data) {
#endif
  bc_trit_t midStateCopyLow[STATE_LENGTH], midStateCopyHigh[STATE_LENGTH];
  int i, shift;
  bc_trit_t nonce_probe, nonce_output;
  PDThread* my_thread = (PDThread*)data;
  char* trits = my_thread->trits;

  memset(midStateCopyLow, 0, STATE_LENGTH * sizeof(bc_trit_t));
  memset(midStateCopyHigh, 0, STATE_LENGTH * sizeof(bc_trit_t));
  PearlDiver* ctx = my_thread->ctx;
  memcpy(midStateCopyLow, my_thread->states->mid_low,
         STATE_LENGTH * sizeof(bc_trit_t));
  memcpy(midStateCopyHigh, my_thread->states->mid_high,
         STATE_LENGTH * sizeof(bc_trit_t));

  for (i = my_thread->threadIndex; i-- > 0;) {
    pd_increment(midStateCopyLow, midStateCopyHigh, NONCE_INIT_START,
        NONCE_INCREMENT_START);
  }

  bc_trit_t scratchpadLow[STATE_LENGTH], scratchpadHigh[STATE_LENGTH],
      stateLow[STATE_LENGTH], stateHigh[STATE_LENGTH];
  memset(stateLow, 0, STATE_LENGTH * sizeof(bc_trit_t));
  memset(stateHigh, 0, STATE_LENGTH * sizeof(bc_trit_t));
  memset(scratchpadLow, 0, STATE_LENGTH * sizeof(bc_trit_t));
  memset(scratchpadHigh, 0, STATE_LENGTH * sizeof(bc_trit_t));

  while (ctxStatusEq(my_thread, ctx, PD_SEARCHING)) {
    pd_increment(midStateCopyLow, midStateCopyHigh, NONCE_INCREMENT_START,
                 HASH_LENGTH);
    memcpy(stateLow, midStateCopyLow, STATE_LENGTH * sizeof(bc_trit_t));
    memcpy(stateHigh, midStateCopyHigh, STATE_LENGTH * sizeof(bc_trit_t));
    pd_transform(stateLow, stateHigh, scratchpadLow, scratchpadHigh);

    if ((nonce_probe = is_found_fast(stateLow, stateHigh,
                                     my_thread->min_weight_magnitude)) == 0)
      continue;

#if defined(_WIN32) && !defined(__MINGW32__)
#ifdef _WIN64
    _BitScanForward64(&shift, nonce_probe);
#else
    _BitScanForward(&shift, nonce_probe);
#endif
    nonce_output = 1 << shift;
    EnterCriticalSection(&my_thread->ctx->new_thread_search);
#else
    shift = __builtin_ctzll(nonce_probe);
    nonce_output = 1 << shift;
    pthread_mutex_lock(&my_thread->ctx->new_thread_search);
#endif

    if (ctx->status != PD_FOUND) {
      ctx->status = PD_FOUND;
      for (i = 0; i < HASH_LENGTH; i++) {
        trits[i] =
            (((bc_trit_t)(midStateCopyLow[i]) & nonce_output) == 0)
                ? 1
                : ((((bc_trit_t)(midStateCopyHigh[i]) & nonce_output) == 0) ? -1
                                                                         : 0);
      }
    }

#if defined(_WIN32) && !defined(__MINGW32__)
    LeaveCriticalSection(&my_thread->ctx->new_thread_search);
#else
    pthread_mutex_unlock(&my_thread->ctx->new_thread_search);
#endif

    return 0;
  }
  return 0;
}
示例#17
0
unsigned char test_BitScanForward64(unsigned LONG *Index, unsigned __int64 Mask) {
  return _BitScanForward64(Index, Mask);
}
示例#18
0
文件: bitscan.hpp 项目: blooto/blooto
 //! Find least significant one bit in 64-bit number
 //! @param data 64-bit number to scan
 //! @return index (0..63) of least significant one bit
 //! This algorithm uses _BitScanForward64 intrinsic to find LS1B.
 constexpr static unsigned ls1b(std::uint64_t data) {
     unsigned long result;
     _BitScanForward64(&result, mask);
     return result;
 }