static uint32_t avx512maxbitas32int(const __m512i accumulator) {
  uint32_t ans1 = maxbitas32int(_mm512_castsi512_si256(accumulator));
  uint32_t ans2 = maxbitas32int(_mm512_extracti64x4_epi64(accumulator, 1));
  printf("ans1 = %u ans2 = % u\n", ans1, ans2);
  uint32_t ans = ans1 > ans2 ? ans1 : ans2;
  return bits(ans);
}
Exemplo n.º 2
0
SIMDCOMP_PURE uint32_t maxbits(const uint32_t * begin) {
	    const __m128i* pin = (const __m128i*)(begin);
	    __m128i accumulator = _mm_loadu_si128(pin);
	    uint32_t k = 1;
	    for(; 4*k < SIMDBlockSize; ++k) {
	    	__m128i newvec = _mm_loadu_si128(pin+k);
	        accumulator = _mm_or_si128(accumulator,newvec);
	    }
	    return maxbitas32int(accumulator);
}
Exemplo n.º 3
0
/* maxbit over 128 integers (SIMDBlockSize) with provided initial value */
uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in) {
    __m128i  initoffset = _mm_set1_epi32 (initvalue);
    const __m128i* pin = (const __m128i*)(in);
    __m128i newvec = _mm_loadu_si128(pin);
    __m128i accumulator = Delta(newvec , initoffset);
    __m128i oldvec = newvec;
    uint32_t k = 1;
    for(; 4*k < SIMDBlockSize; ++k) {
        newvec = _mm_loadu_si128(pin+k);
        accumulator = _mm_or_si128(accumulator,Delta(newvec , oldvec));
        oldvec = newvec;
    }
    initoffset = oldvec;
    return maxbitas32int(accumulator);
}