Exemplo n.º 1
0
 template <bool align, bool mask> void LbpEstimate(const uint8_t * src, ptrdiff_t stride, uint8_t * dst, __mmask64 tail = -1)
 {
     __m512i threshold = Load<false, mask>(src, tail);
     __m512i lbp = _mm512_setzero_si512();
     lbp = _mm512_or_si512(lbp, _mm512_maskz_set1_epi8(_mm512_cmpge_epu8_mask((Load<align, mask>(src - 1 - stride, tail)), threshold), (char)0x01));
     lbp = _mm512_or_si512(lbp, _mm512_maskz_set1_epi8(_mm512_cmpge_epu8_mask((Load<false, mask>(src - stride, tail)), threshold), (char)0x02));
     lbp = _mm512_or_si512(lbp, _mm512_maskz_set1_epi8(_mm512_cmpge_epu8_mask((Load<false, mask>(src + 1 - stride, tail)), threshold), (char)0x04));
     lbp = _mm512_or_si512(lbp, _mm512_maskz_set1_epi8(_mm512_cmpge_epu8_mask((Load<false, mask>(src + 1, tail)), threshold), (char)0x08));
     lbp = _mm512_or_si512(lbp, _mm512_maskz_set1_epi8(_mm512_cmpge_epu8_mask((Load<false, mask>(src + 1 + stride, tail)), threshold), (char)0x10));
     lbp = _mm512_or_si512(lbp, _mm512_maskz_set1_epi8(_mm512_cmpge_epu8_mask((Load<false, mask>(src + stride, tail)), threshold), (char)0x20));
     lbp = _mm512_or_si512(lbp, _mm512_maskz_set1_epi8(_mm512_cmpge_epu8_mask((Load<align, mask>(src - 1 + stride, tail)), threshold), (char)0x40));
     lbp = _mm512_or_si512(lbp, _mm512_maskz_set1_epi8(_mm512_cmpge_epu8_mask((Load<align, mask>(src - 1, tail)), threshold), (char)0x80));
     Store<false, mask>(dst, lbp, tail);
 }
Exemplo n.º 2
0
void extern
avx512bw_test (void)
{
  x = _mm512_broadcastb_epi8 (z);
  x = _mm512_mask_broadcastb_epi8 (x, mx, z);
  x = _mm512_maskz_broadcastb_epi8 (mx, z);
  y = _mm256_mask_broadcastb_epi8 (y, my, z);
  y = _mm256_maskz_broadcastb_epi8 (my, z);
  z = _mm_mask_broadcastb_epi8 (z, mz, z);
  z = _mm_maskz_broadcastb_epi8 (mz, z);

  x = _mm512_set1_epi8 (w);
  x = _mm512_mask_set1_epi8 (x, mx, w);
  x = _mm512_maskz_set1_epi8 (mx, w);
  y = _mm256_mask_set1_epi8 (y, my, w);
  y = _mm256_maskz_set1_epi8 (my, w);
  z = _mm_mask_set1_epi8 (z, mz, w);
  z = _mm_maskz_set1_epi8 (mz, w);
}