std::uint64_t _mm512_hsum_epi64(__m512i v) {
        const __m256i t0 = _mm512_extracti64x4_epi64(v, 0);
        const __m256i t1 = _mm512_extracti64x4_epi64(v, 1);

        return _mm256_hsum_epi64(t0)
             + _mm256_hsum_epi64(t1);
    }
static __m256i popcount(const __m512i v)
{
    const __m256i lo = _mm512_extracti64x4_epi64(v, 0);
    const __m256i hi = _mm512_extracti64x4_epi64(v, 1);
    const __m256i s  = _mm256_add_epi8(avx2_popcount(lo), avx2_popcount(hi));

    return _mm256_sad_epu8(s, _mm256_setzero_si256());
}
static uint32_t avx512maxbitas32int(const __m512i accumulator) {
  uint32_t ans1 = maxbitas32int(_mm512_castsi512_si256(accumulator));
  uint32_t ans2 = maxbitas32int(_mm512_extracti64x4_epi64(accumulator, 1));
  printf("ans1 = %u ans2 = % u\n", ans1, ans2);
  uint32_t ans = ans1 > ans2 ? ans1 : ans2;
  return bits(ans);
}
void extern
avx512f_test (void)
{
  y = _mm512_extracti64x4_epi64 (x, 1);
  y = _mm512_mask_extracti64x4_epi64 (y, 2, x, 1);
  y = _mm512_maskz_extracti64x4_epi64 (2, x, 1);
}
Exemple #5
0
void
test1bit (void) {
  m256d = _mm512_extractf64x4_pd (m512d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
  m256d = _mm512_mask_extractf64x4_pd (m256d, mmask8, m512d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
  m256d = _mm512_maskz_extractf64x4_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */

  m256i = _mm512_extracti64x4_epi64 (m512i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
  m256i = _mm512_mask_extracti64x4_epi64 (m256i, mmask8, m512i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
  m256i = _mm512_maskz_extracti64x4_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */

  m512d = _mm512_insertf64x4 (m512d, m256d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
  m512d = _mm512_mask_insertf64x4 (m512d, mmask8, m512d, m256d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
  m512d = _mm512_maskz_insertf64x4 (mmask8, m512d, m256d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */

  m512i = _mm512_inserti64x4 (m512i, m256i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
  m512i = _mm512_mask_inserti64x4 (m512i, mmask8, m512i, m256i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
  m512i = _mm512_maskz_inserti64x4 (mmask8, m512i, m256i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
}
int main(int argc, char **argv) {
  int w, h, bit_num = 0;
  char byte_acc = 0;
  long byte_total = 0;
  int i, iter = 50;
  double x, y, limit = 2.0;
  double Zr, Zi, Cr, Ci, Tr, Ti;

  w = h = argc > 1 ? atoi(argv[1]) : 32000;

  printf("P4\n%d %d\n", w, h);
#ifdef USEAVX512
  __m512i a = _mm512_set1_epi32(0);
  __m512i b = _mm512_set1_epi32(1);
  __m512i t;
#endif
  for (y = 0; y < h; ++y) {
#ifdef USEAVX512
    t = a;
    a = b;
#ifdef USEHEAVYAVX512
    b = _mm512_mul_epi32(b, t);
#else
    b = _mm512_add_epi32(b, t);
#endif
#endif
    for (x = 0; x < w; ++x) {
      Zr = Zi = Tr = Ti = 0.0;
      Cr = (2.0 * x / w - 1.5);
      Ci = (2.0 * y / h - 1.0);

      for (i = 0; i < iter && (Tr + Ti <= limit * limit); ++i) {
        Zi = 2.0 * Zr * Zi + Ci;
        Zr = Tr - Ti + Cr;
        Tr = Zr * Zr;
        Ti = Zi * Zi;
      }

      byte_acc <<= 1;
      if (Tr + Ti <= limit * limit)
        byte_acc |= 0x01;

      ++bit_num;

      if (bit_num == 8) {
        byte_total += byte_acc;
        // putc(byte_acc,stdout);
        byte_acc = 0;
        bit_num = 0;
      } else if (x == w - 1) {
        byte_acc <<= (8 - w % 8);
        byte_total += byte_acc;
        // putc(byte_acc,stdout);
        byte_acc = 0;
        bit_num = 0;
      }
    }
  }
#ifdef USEAVX512
  printf("we used avx512 %d \n", _mm256_extract_epi32(_mm512_extracti64x4_epi64(b, 1), 7));
#else
  printf("we did not use avx512\n");
#endif
  return byte_total;
}