static uint32_t avx512maxbitas32int(const __m512i accumulator) { uint32_t ans1 = maxbitas32int(_mm512_castsi512_si256(accumulator)); uint32_t ans2 = maxbitas32int(_mm512_extracti64x4_epi64(accumulator, 1)); printf("ans1 = %u ans2 = % u\n", ans1, ans2); uint32_t ans = ans1 > ans2 ? ans1 : ans2; return bits(ans); }
SIMDCOMP_PURE uint32_t maxbits(const uint32_t * begin) { const __m128i* pin = (const __m128i*)(begin); __m128i accumulator = _mm_loadu_si128(pin); uint32_t k = 1; for(; 4*k < SIMDBlockSize; ++k) { __m128i newvec = _mm_loadu_si128(pin+k); accumulator = _mm_or_si128(accumulator,newvec); } return maxbitas32int(accumulator); }
/* maxbit over 128 integers (SIMDBlockSize) with provided initial value */ uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in) { __m128i initoffset = _mm_set1_epi32 (initvalue); const __m128i* pin = (const __m128i*)(in); __m128i newvec = _mm_loadu_si128(pin); __m128i accumulator = Delta(newvec , initoffset); __m128i oldvec = newvec; uint32_t k = 1; for(; 4*k < SIMDBlockSize; ++k) { newvec = _mm_loadu_si128(pin+k); accumulator = _mm_or_si128(accumulator,Delta(newvec , oldvec)); oldvec = newvec; } initoffset = oldvec; return maxbitas32int(accumulator); }