//Return the number of trailing zeros. Deliberately undefined if value == 0 inline unsigned countTrailingUnsetBits(unsigned value) { dbgassertex(value != 0); #if defined(__GNUC__) return __builtin_ctz(value); #elif defined (_WIN32) unsigned long index; _BitScanForward(&index, value); return (unsigned)index; #else unsigned mask = 1U; unsigned i; for (i=0; i < sizeof(unsigned)*8; i++) { if (value & mask) return i; mask = mask << 1; } return i; #endif }
static item_t *lookup_item(type_t t, imask_t mask) { assert(t != NULL); assert((mask & (mask - 1)) == 0); const imask_t has = has_map[t->kind]; const int tzc = __builtin_ctz(mask); const int n = item_lookup[t->kind][tzc]; if (unlikely((has & mask) == 0)) { int item; for (item = 0; (mask & (1 << item)) == 0; item++) ; assert(item < ARRAY_LEN(item_text_map)); fatal_trace("type kind %s does not have item %s", kind_text_map[t->kind], item_text_map[item]); } return &(t->items[n]); }
int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue, gxio_mpipe_context_t *context, unsigned int ering, unsigned int channel, void *mem, unsigned int mem_size, unsigned int mem_flags) { /* The init call below will verify that "mem_size" is legal. */ unsigned int num_entries = mem_size / sizeof(gxio_mpipe_edesc_t); /* Offset used to read number of completed commands. */ MPIPE_EDMA_POST_REGION_ADDR_t offset; int result = gxio_mpipe_init_edma_ring(context, ering, channel, mem, mem_size, mem_flags); if (result < 0) return result; memset(equeue, 0, sizeof(*equeue)); offset.word = 0; offset.region = MPIPE_MMIO_ADDR__REGION_VAL_EDMA - MPIPE_MMIO_ADDR__REGION_VAL_IDMA; offset.ring = ering; __gxio_dma_queue_init(&equeue->dma_queue, context->mmio_fast_base + offset.word, num_entries); equeue->edescs = mem; equeue->mask_num_entries = num_entries - 1; equeue->log2_num_entries = __builtin_ctz(num_entries); equeue->context = context; equeue->ering = ering; equeue->channel = channel; return 0; }
unsigned tsumo_select_action(Taku *taku, unsigned pai) { unsigned shanten_after_tsumo; unsigned shanten_after_sute; unsigned idx = 0; unsigned pos = 0; unsigned sutehai; unsigned info; Te *te = &taku->te[taku->teban]; te_add(te, pai); shanten_after_tsumo = te_shanten(te); if (shanten_after_tsumo == 0) return make_info(ACTION_TSUMO, taku->teban, pai); while (idx < 4) { unsigned v = te->tehai.octmap[idx]; while (pos < 9 && (v &= ~0U << 3 * pos)) { pos = div3_restrict(__builtin_ctz(v)); { sutehai = (idx << 4) + pos; te_del(te, sutehai); shanten_after_sute = te_shanten(te); if (shanten_after_sute == shanten_after_tsumo) goto tegiri; te_add(te, sutehai); } ++pos; } ++idx; pos = 0; } tsumogiri: te_del(te, pai); return make_info(ACTION_SUTE, taku->teban, pai); tegiri: return make_info(ACTION_SUTE, taku->teban, sutehai); }
size_t FORCE_INLINE sse_naive_strstr_anysize(const char* s, size_t n, const char* needle, size_t k) { assert(k > 0); assert(n > 0); if (n == k) { return (memcmp(s, needle, k) == 0) ? 0 : std::string::npos; } for (size_t i = 0; i < n - k + 1; i += 16) { uint16_t found = 0xffff; for (size_t j = 0; (j < k) && (found != 0) ; ++j) { const __m128i textvector = _mm_loadu_si128((const __m128i *)(s + i + j)); const __m128i needlevector = _mm_set1_epi8(needle[j]); uint16_t bitmask = _mm_movemask_epi8(_mm_cmpeq_epi8(textvector, needlevector)); found = found & bitmask; } if (found != 0) { return i + __builtin_ctz(found); } } return std::string::npos; }
StgWord hs_ctz8(StgWord x) { return (uint8_t)x ? __builtin_ctz(x) : 8; }
static NV group_nvalue_smart(Group *gr) { unsigned nvals; /* nim-values of reachable states */ NV nval; /* group nim-value (result) */ Rect m; /* move */ /* Consider all possible moves */ nvals = 0; for (m.p.r = 0; m.p.r < gr->height; ++m.p.r) { for (m.p.c = 0; m.p.c < gr->width; ++m.p.c) { if (!GR_GET(gr, m.p.r, m.p.c)) continue; for (m.q.r = m.p.r; m.q.r < gr->height; ++m.q.r) { if (!GR_GET(gr, m.q.r, m.p.c)) break; for (m.q.c = m.p.c; m.q.c < gr->width; ++m.q.c) { Group ngr, nngr; int r, c; NV nnval; /* nim-value of new group */ /* Check if rectangle is covered completely by fields */ for (r = m.p.r; r <= m.q.r; ++r) { for (c = m.p.c; c <= m.q.c; ++c) { if (!GR_GET(gr, r, c)) goto invalid; } } /* Construct new group, with selected rectangle removed */ ngr.height = gr->height; ngr.width = gr->width; for (r = 0; r < m.p.r; ++r) ngr.rows[r] = gr->rows[r]; for (; r <= m.q.r; ++r) { ngr.rows[r] = gr->rows[r] ^ (((1<<(m.q.c - m.p.c + 1)) - 1) << m.p.c); } for (; r < gr->height; ++r) ngr.rows[r] = gr->rows[r]; /* Split up into groups */ nnval = 0; for (r = 0; r < gr->height; ++r) { while (ngr.rows[r] != 0) { c = __builtin_ctz(ngr.rows[r]); nnval ^= group_isolate(&ngr, r, c, &nngr) > 1 ? group_nvalue(&nngr) : 1; } } nvals |= (1u << nnval); continue; invalid: break; } } } } /* Compute nvalue */ nval = 0; while (nvals & (1u << nval)) ++nval; return nval; }
static unsigned rand_link_count(skiplist* list) { unsigned count = (unsigned) __builtin_ctz(dict_rand()) / 2 + 1; return (count >= list->max_link) ? list->max_link - 1 : count; }
StgWord hs_ctz32(StgWord x) { return (uint32_t)x ? __builtin_ctz(x) : 32; }
void PIOS_Video_Init(const struct pios_video_cfg *cfg) { dev_cfg = cfg; // store config before enabling interrupt configure_hsync_timers(); /* needed for HW hack */ const GPIO_InitTypeDef initStruct = { .GPIO_Pin = GPIO_Pin_12, .GPIO_Speed = GPIO_Speed_100MHz, .GPIO_Mode = GPIO_Mode_IN, .GPIO_OType = GPIO_OType_PP, .GPIO_PuPd = GPIO_PuPd_NOPULL }; GPIO_Init(GPIOC, &initStruct); /* SPI3 - MASKBUFFER */ GPIO_Init(cfg->mask.sclk.gpio, (GPIO_InitTypeDef *)&(cfg->mask.sclk.init)); GPIO_Init(cfg->mask.miso.gpio, (GPIO_InitTypeDef *)&(cfg->mask.miso.init)); /* SPI1 SLAVE FRAMEBUFFER */ GPIO_Init(cfg->level.sclk.gpio, (GPIO_InitTypeDef *)&(cfg->level.sclk.init)); GPIO_Init(cfg->level.miso.gpio, (GPIO_InitTypeDef *)&(cfg->level.miso.init)); if (cfg->mask.remap) { GPIO_PinAFConfig(cfg->mask.sclk.gpio, __builtin_ctz(cfg->mask.sclk.init.GPIO_Pin), cfg->mask.remap); GPIO_PinAFConfig(cfg->mask.miso.gpio, __builtin_ctz(cfg->mask.miso.init.GPIO_Pin), cfg->mask.remap); } if (cfg->level.remap) { GPIO_PinAFConfig(cfg->level.sclk.gpio, __builtin_ctz(cfg->level.sclk.init.GPIO_Pin), cfg->level.remap); GPIO_PinAFConfig(cfg->level.miso.gpio, __builtin_ctz(cfg->level.miso.init.GPIO_Pin), cfg->level.remap); } /* Initialize the SPI block */ SPI_Init(cfg->level.regs, (SPI_InitTypeDef *)&(cfg->level.init)); SPI_Init(cfg->mask.regs, (SPI_InitTypeDef *)&(cfg->mask.init)); /* Enable SPI */ SPI_Cmd(cfg->level.regs, ENABLE); SPI_Cmd(cfg->mask.regs, ENABLE); /* Configure DMA for SPI Tx SLAVE Maskbuffer */ DMA_Cmd(cfg->mask.dma.tx.channel, DISABLE); DMA_Init(cfg->mask.dma.tx.channel, (DMA_InitTypeDef *)&(cfg->mask.dma.tx.init)); /* Configure DMA for SPI Tx SLAVE Framebuffer*/ DMA_Cmd(cfg->level.dma.tx.channel, DISABLE); DMA_Init(cfg->level.dma.tx.channel, (DMA_InitTypeDef *)&(cfg->level.dma.tx.init)); /* Trigger interrupt when for half conversions too to indicate double buffer */ DMA_ITConfig(cfg->level.dma.tx.channel, DMA_IT_TC, ENABLE); /* Configure and clear buffers */ draw_buffer_level = buffer0_level; draw_buffer_mask = buffer0_mask; disp_buffer_level = buffer1_level; disp_buffer_mask = buffer1_mask; memset(disp_buffer_mask, 0, GRAPHICS_WIDTH * GRAPHICS_HEIGHT); memset(disp_buffer_level, 0, GRAPHICS_WIDTH * GRAPHICS_HEIGHT); memset(draw_buffer_mask, 0, GRAPHICS_WIDTH * GRAPHICS_HEIGHT); memset(draw_buffer_level, 0, GRAPHICS_WIDTH * GRAPHICS_HEIGHT); /* Configure DMA interrupt */ NVIC_Init(&cfg->level.dma.irq.init); /* Enable SPI interrupts to DMA */ SPI_I2S_DMACmd(cfg->mask.regs, SPI_I2S_DMAReq_Tx, ENABLE); SPI_I2S_DMACmd(cfg->level.regs, SPI_I2S_DMAReq_Tx, ENABLE); mask_dma = DMA1; main_dma = DMA2; main_stream = cfg->level.dma.tx.channel; mask_stream = cfg->mask.dma.tx.channel; /* Configure the Video Line interrupt */ PIOS_EXTI_Init(cfg->hsync); PIOS_EXTI_Init(cfg->vsync); // set levels to zero PIOS_Servo_Set(0, 0); PIOS_Servo_Set(1, 0); }
int snoob2(int x) //g++ { int t=x|(x-1); return (t+1)|(((~t&-~t)-1)>>(__builtin_ctz(x)+1)); }
int a(int a) {return __builtin_ctz(a) + __builtin_clz(a);}
/** * Initialise a single Overo device */ int32_t PIOS_OVERO_Init(uint32_t *overo_id, const struct pios_overo_cfg *cfg) { PIOS_DEBUG_Assert(overo_id); PIOS_DEBUG_Assert(cfg); struct pios_overo_dev *overo_dev; overo_dev = (struct pios_overo_dev *)PIOS_OVERO_alloc(); if (!overo_dev) { goto out_fail; } /* Bind the configuration to the device instance */ overo_dev->cfg = cfg; overo_dev->writing_buffer = 1; // First writes to second buffer /* Put buffers to a known state */ memset(&overo_dev->tx_buffer[0][0], 0xFF, PACKET_SIZE); memset(&overo_dev->tx_buffer[1][0], 0xFF, PACKET_SIZE); memset(&overo_dev->rx_buffer[0][0], 0xFF, PACKET_SIZE); memset(&overo_dev->rx_buffer[1][0], 0xFF, PACKET_SIZE); /* * Enable the SPI device * * 1. Enable the SPI port * 2. Enable DMA with circular buffered DMA (validate config) * 3. Enable the DMA Tx IRQ */ // PIOS_Assert(overo_dev->cfg->dma.tx-> == CIRCULAR); // PIOS_Assert(overo_dev->cfg->dma.rx-> == CIRCULAR); /* only legal for single-slave config */ PIOS_Assert(overo_dev->cfg->slave_count == 1); SPI_SSOutputCmd(overo_dev->cfg->regs, DISABLE); /* Initialize the GPIO pins */ /* note __builtin_ctz() due to the difference between GPIO_PinX and GPIO_PinSourceX */ GPIO_PinAFConfig(overo_dev->cfg->sclk.gpio, __builtin_ctz(overo_dev->cfg->sclk.init.GPIO_Pin), overo_dev->cfg->remap); GPIO_PinAFConfig(overo_dev->cfg->mosi.gpio, __builtin_ctz(overo_dev->cfg->mosi.init.GPIO_Pin), overo_dev->cfg->remap); GPIO_PinAFConfig(overo_dev->cfg->miso.gpio, __builtin_ctz(overo_dev->cfg->miso.init.GPIO_Pin), overo_dev->cfg->remap); GPIO_PinAFConfig(overo_dev->cfg->ssel[0].gpio, __builtin_ctz(overo_dev->cfg->ssel[0].init.GPIO_Pin), overo_dev->cfg->remap); GPIO_Init(overo_dev->cfg->sclk.gpio, (GPIO_InitTypeDef *)&(overo_dev->cfg->sclk.init)); GPIO_Init(overo_dev->cfg->mosi.gpio, (GPIO_InitTypeDef *)&(overo_dev->cfg->mosi.init)); GPIO_Init(overo_dev->cfg->miso.gpio, (GPIO_InitTypeDef *)&(overo_dev->cfg->miso.init)); /* Configure circular buffer targets. Configure 0 to be initially active */ DMA_InitTypeDef dma_init; DMA_DeInit(overo_dev->cfg->dma.rx.channel); dma_init = overo_dev->cfg->dma.rx.init; dma_init.DMA_Memory0BaseAddr = (uint32_t)overo_dev->rx_buffer[0]; dma_init.DMA_MemoryInc = DMA_MemoryInc_Enable; dma_init.DMA_BufferSize = PACKET_SIZE; DMA_Init(overo_dev->cfg->dma.rx.channel, &dma_init); DMA_DoubleBufferModeConfig(overo_dev->cfg->dma.rx.channel, (uint32_t)overo_dev->rx_buffer[1], DMA_Memory_0); DMA_DoubleBufferModeCmd(overo_dev->cfg->dma.rx.channel, ENABLE); DMA_DeInit(overo_dev->cfg->dma.tx.channel); dma_init = overo_dev->cfg->dma.tx.init; dma_init.DMA_Memory0BaseAddr = (uint32_t)overo_dev->tx_buffer[0]; dma_init.DMA_MemoryInc = DMA_MemoryInc_Enable; dma_init.DMA_BufferSize = PACKET_SIZE; DMA_Init(overo_dev->cfg->dma.tx.channel, &dma_init); DMA_DoubleBufferModeConfig(overo_dev->cfg->dma.tx.channel, (uint32_t)overo_dev->tx_buffer[1], DMA_Memory_0); DMA_DoubleBufferModeCmd(overo_dev->cfg->dma.tx.channel, ENABLE); /* Set the packet size */ DMA_SetCurrDataCounter(overo_dev->cfg->dma.rx.channel, PACKET_SIZE); DMA_SetCurrDataCounter(overo_dev->cfg->dma.tx.channel, PACKET_SIZE); /* Initialize the SPI block */ SPI_DeInit(overo_dev->cfg->regs); SPI_Init(overo_dev->cfg->regs, (SPI_InitTypeDef *)&(overo_dev->cfg->init)); SPI_CalculateCRC(overo_dev->cfg->regs, DISABLE); /* Enable SPI */ SPI_Cmd(overo_dev->cfg->regs, ENABLE); /* Enable SPI interrupts to DMA */ SPI_I2S_DMACmd(overo_dev->cfg->regs, SPI_I2S_DMAReq_Tx | SPI_I2S_DMAReq_Rx, ENABLE); /* Configure DMA interrupt */ NVIC_Init((NVIC_InitTypeDef *)&(overo_dev->cfg->dma.irq.init)); DMA_ITConfig(overo_dev->cfg->dma.tx.channel, DMA_IT_TC, ENABLE); /* Enable the DMA channels */ DMA_Cmd(overo_dev->cfg->dma.tx.channel, ENABLE); DMA_Cmd(overo_dev->cfg->dma.rx.channel, ENABLE); *overo_id = (uint32_t)overo_dev; return 0; out_fail: return -1; }
static inline int find_lsb_set_non_zero(u32 n) { return __builtin_ctz(n); }
unsigned int foo (unsigned short x) { return x ? __builtin_ctz (x) : 16U; }
int last_one1( unsigned int u1 ) { return 53 - __builtin_ctz( u1 ); }
int last_one0( unsigned int u0 ) { return 26 - __builtin_ctz( u0 ); }
static inline int compat_ctz(unsigned int x) { return __builtin_ctz(x); }
int find_first_set_bit(uint32_t value) { if (value == 0) return 32; return __builtin_ctz(value); }
static INLINE int compat_ctz(unsigned x) { return __builtin_ctz(x); }
//double g19 = __builtin_powi(2.0, 4); //float g20 = __builtin_powif(2.0f, 4); //long double g21 = __builtin_powil(2.0L, 4); #define BITSIZE(x) (sizeof(x) * 8) char clz1[__builtin_clz(1) == BITSIZE(int) - 1 ? 1 : -1]; char clz2[__builtin_clz(7) == BITSIZE(int) - 3 ? 1 : -1]; char clz3[__builtin_clz(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1]; int clz4 = __builtin_clz(0); // expected-error {{not a compile-time constant}} char clz5[__builtin_clzl(0xFL) == BITSIZE(long) - 4 ? 1 : -1]; char clz6[__builtin_clzll(0xFFLL) == BITSIZE(long long) - 8 ? 1 : -1]; char clz7[__builtin_clzs(0x1) == BITSIZE(short) - 1 ? 1 : -1]; char clz8[__builtin_clzs(0xf) == BITSIZE(short) - 4 ? 1 : -1]; char clz9[__builtin_clzs(0xfff) == BITSIZE(short) - 12 ? 1 : -1]; char ctz1[__builtin_ctz(1) == 0 ? 1 : -1]; char ctz2[__builtin_ctz(8) == 3 ? 1 : -1]; char ctz3[__builtin_ctz(1 << (BITSIZE(int) - 1)) == BITSIZE(int) - 1 ? 1 : -1]; int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time constant}} char ctz5[__builtin_ctzl(0x10L) == 4 ? 1 : -1]; char ctz6[__builtin_ctzll(0x100LL) == 8 ? 1 : -1]; char ctz7[__builtin_ctzs(1 << (BITSIZE(short) - 1)) == BITSIZE(short) - 1 ? 1 : -1]; char popcount1[__builtin_popcount(0) == 0 ? 1 : -1]; char popcount2[__builtin_popcount(0xF0F0) == 8 ? 1 : -1]; char popcount3[__builtin_popcount(~0) == BITSIZE(int) ? 1 : -1]; char popcount4[__builtin_popcount(~0L) == BITSIZE(int) ? 1 : -1]; char popcount5[__builtin_popcountl(0L) == 0 ? 1 : -1]; char popcount6[__builtin_popcountl(0xF0F0L) == 8 ? 1 : -1]; char popcount7[__builtin_popcountl(~0L) == BITSIZE(long) ? 1 : -1]; char popcount8[__builtin_popcountll(0LL) == 0 ? 1 : -1];
void foo(int P) { G = __builtin_clz(P); H = __builtin_ctz(P); I = __builtin_popcount(P); }
void inline BSF (unsigned long * index, size_t & mask) { *index = __builtin_ctz (mask); }
int __ctzsi2 (uSI x) { return __builtin_ctz (x); }
status_t TextureManager::loadTexture(Texture* texture, const Region& dirty, const GGLSurface& t) { if (texture->name == -1UL) { status_t err = initTexture(texture); LOGE_IF(err, "loadTexture failed in initTexture (%s)", strerror(err)); return err; } if (texture->target != Texture::TEXTURE_2D) return INVALID_OPERATION; glBindTexture(GL_TEXTURE_2D, texture->name); /* * In OpenGL ES we can't specify a stride with glTexImage2D (however, * GL_UNPACK_ALIGNMENT is a limited form of stride). * So if the stride here isn't representable with GL_UNPACK_ALIGNMENT, we * need to do something reasonable (here creating a bigger texture). * * extra pixels = (((stride - width) * pixelsize) / GL_UNPACK_ALIGNMENT); * * This situation doesn't happen often, but some h/w have a limitation * for their framebuffer (eg: must be multiple of 8 pixels), and * we need to take that into account when using these buffers as * textures. * * This should never be a problem with POT textures */ int unpack = __builtin_ctz(t.stride * bytesPerPixel(t.format)); unpack = 1 << ((unpack > 3) ? 3 : unpack); glPixelStorei(GL_UNPACK_ALIGNMENT, unpack); /* * round to POT if needed */ if (!mGLExtensions.haveNpot()) { texture->NPOTAdjust = true; } if (texture->NPOTAdjust) { // find the smallest power-of-two that will accommodate our surface texture->potWidth = 1 << (31 - clz(t.width)); texture->potHeight = 1 << (31 - clz(t.height)); if (texture->potWidth < t.width) texture->potWidth <<= 1; if (texture->potHeight < t.height) texture->potHeight <<= 1; texture->wScale = float(t.width) / texture->potWidth; texture->hScale = float(t.height) / texture->potHeight; } else { texture->potWidth = t.width; texture->potHeight = t.height; } Rect bounds(dirty.bounds()); GLvoid* data = 0; if (texture->width != t.width || texture->height != t.height) { texture->width = t.width; texture->height = t.height; // texture size changed, we need to create a new one bounds.set(Rect(t.width, t.height)); if (t.width == texture->potWidth && t.height == texture->potHeight) { // we can do it one pass data = t.data; } if (t.format == HAL_PIXEL_FORMAT_RGB_565) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, texture->potWidth, texture->potHeight, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, data); } else if (t.format == HAL_PIXEL_FORMAT_RGBA_4444) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texture->potWidth, texture->potHeight, 0, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4, data); } else if (t.format == HAL_PIXEL_FORMAT_RGBA_8888 || t.format == HAL_PIXEL_FORMAT_RGBX_8888) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texture->potWidth, texture->potHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); } else if (isSupportedYuvFormat(t.format)) { // just show the Y plane of YUV buffers glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, texture->potWidth, texture->potHeight, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, data); } else { // oops, we don't handle this format! LOGE("texture=%d, using format %d, which is not " "supported by the GL", texture->name, t.format); } } if (!data) { if (t.format == HAL_PIXEL_FORMAT_RGB_565) { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, bounds.top, t.width, bounds.height(), GL_RGB, GL_UNSIGNED_SHORT_5_6_5, t.data + bounds.top*t.stride*2); } else if (t.format == HAL_PIXEL_FORMAT_RGBA_4444) { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, bounds.top, t.width, bounds.height(), GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4, t.data + bounds.top*t.stride*2); } else if (t.format == HAL_PIXEL_FORMAT_RGBA_8888 || t.format == HAL_PIXEL_FORMAT_RGBX_8888) { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, bounds.top, t.width, bounds.height(), GL_RGBA, GL_UNSIGNED_BYTE, t.data + bounds.top*t.stride*4); } else if (isSupportedYuvFormat(t.format)) { // just show the Y plane of YUV buffers glTexSubImage2D(GL_TEXTURE_2D, 0, 0, bounds.top, t.width, bounds.height(), GL_LUMINANCE, GL_UNSIGNED_BYTE, t.data + bounds.top*t.stride); } } return NO_ERROR; }
inline void u32toa_sse2(uint32_t value, char* buffer) { if (value < 10000) { const uint32_t d1 = (value / 100) << 1; const uint32_t d2 = (value % 100) << 1; if (value >= 1000) *buffer++ = gDigitsLut[d1]; if (value >= 100) *buffer++ = gDigitsLut[d1 + 1]; if (value >= 10) *buffer++ = gDigitsLut[d2]; *buffer++ = gDigitsLut[d2 + 1]; *buffer++ = '\0'; } else if (value < 100000000) { // Experiment shows that this case SSE2 is slower #if 0 const __m128i a = Convert8DigitsSSE2(value); // Convert to bytes, add '0' const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]); // Count number of digit const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0])); unsigned long digit; #ifdef _MSC_VER _BitScanForward(&digit, ~mask | 0x8000); #else digit = __builtin_ctz(~mask | 0x8000); #endif // Shift digits to the beginning __m128i result = ShiftDigits_SSE2(va, digit); //__m128i result = _mm_srl_epi64(va, _mm_cvtsi32_si128(digit * 8)); _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result); buffer[8 - digit] = '\0'; #else // value = bbbbcccc const uint32_t b = value / 10000; const uint32_t c = value % 10000; const uint32_t d1 = (b / 100) << 1; const uint32_t d2 = (b % 100) << 1; const uint32_t d3 = (c / 100) << 1; const uint32_t d4 = (c % 100) << 1; if (value >= 10000000) *buffer++ = gDigitsLut[d1]; if (value >= 1000000) *buffer++ = gDigitsLut[d1 + 1]; if (value >= 100000) *buffer++ = gDigitsLut[d2]; *buffer++ = gDigitsLut[d2 + 1]; *buffer++ = gDigitsLut[d3]; *buffer++ = gDigitsLut[d3 + 1]; *buffer++ = gDigitsLut[d4]; *buffer++ = gDigitsLut[d4 + 1]; *buffer++ = '\0'; #endif } else { // value = aabbbbbbbb in decimal const uint32_t a = value / 100000000; // 1 to 42 value %= 100000000; if (a >= 10) { const unsigned i = a << 1; *buffer++ = gDigitsLut[i]; *buffer++ = gDigitsLut[i + 1]; } else *buffer++ = '0' + static_cast<char>(a); const __m128i b = Convert8DigitsSSE2(value); const __m128i ba = _mm_add_epi8(_mm_packus_epi16(_mm_setzero_si128(), b), reinterpret_cast<const __m128i*>(kAsciiZero)[0]); const __m128i result = _mm_srli_si128(ba, 8); _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result); buffer[8] = '\0'; } }
/** * Initialise a single USART device */ int32_t PIOS_USART_Init(uintptr_t * usart_id, const struct pios_usart_cfg * cfg) { PIOS_DEBUG_Assert(usart_id); PIOS_DEBUG_Assert(cfg); struct pios_usart_dev * usart_dev; usart_dev = (struct pios_usart_dev *) PIOS_USART_alloc(); if (!usart_dev) goto out_fail; /* Bind the configuration to the device instance */ usart_dev->cfg = cfg; /* Map pins to USART function */ /* note __builtin_ctz() due to the difference between GPIO_PinX and GPIO_PinSourceX */ if (usart_dev->cfg->remap) { if (usart_dev->cfg->rx.gpio != 0) GPIO_PinAFConfig(usart_dev->cfg->rx.gpio, __builtin_ctz(usart_dev->cfg->rx.init.GPIO_Pin), usart_dev->cfg->remap); if (usart_dev->cfg->tx.gpio != 0) GPIO_PinAFConfig(usart_dev->cfg->tx.gpio, __builtin_ctz(usart_dev->cfg->tx.init.GPIO_Pin), usart_dev->cfg->remap); } /* Initialize the USART Rx and Tx pins */ if (usart_dev->cfg->rx.gpio != 0) GPIO_Init(usart_dev->cfg->rx.gpio, (GPIO_InitTypeDef *)&usart_dev->cfg->rx.init); if (usart_dev->cfg->tx.gpio != 0) GPIO_Init(usart_dev->cfg->tx.gpio, (GPIO_InitTypeDef *)&usart_dev->cfg->tx.init); /* Configure the USART */ USART_Init(usart_dev->cfg->regs, (USART_InitTypeDef *)&usart_dev->cfg->init); *usart_id = (uintptr_t)usart_dev; /* Configure USART Interrupts */ switch ((uint32_t)usart_dev->cfg->regs) { case (uint32_t)USART1: PIOS_USART_1_id = (uintptr_t)usart_dev; break; case (uint32_t)USART2: PIOS_USART_2_id = (uintptr_t)usart_dev; break; case (uint32_t)USART3: PIOS_USART_3_id = (uintptr_t)usart_dev; break; case (uint32_t)UART4: PIOS_USART_4_id = (uintptr_t)usart_dev; break; case (uint32_t)UART5: PIOS_USART_5_id = (uintptr_t)usart_dev; break; case (uint32_t)USART6: PIOS_USART_6_id = (uintptr_t)usart_dev; break; } NVIC_Init((NVIC_InitTypeDef *)&(usart_dev->cfg->irq.init)); USART_ITConfig(usart_dev->cfg->regs, USART_IT_RXNE, ENABLE); USART_ITConfig(usart_dev->cfg->regs, USART_IT_TXE, ENABLE); // FIXME XXX Clear / reset uart here - sends NUL char else /* Enable USART */ USART_Cmd(usart_dev->cfg->regs, ENABLE); return(0); out_fail: return(-1); }
void __csi_before_store(const csi_id_t store_id, const void *addr, const int32_t num_bytes, const uint64_t prop) { times_accessed_by_size[__builtin_ctz(num_bytes)]++; }
/** * result is undefined if called with ~0 */ static uint32_t get_first_zero(const uint32_t n) { /* __builtin_ctz returns number of trailing zeros. */ return 1 << __builtin_ctz(~n); }
StgWord hs_ctz16(StgWord x) { return (uint16_t)x ? __builtin_ctz(x) : 16; }