示例#1
0
inline void avx2_hexid_to_uv_ccw(const __m256i hexid, __m256i& u, __m256i& v)
{
  // if(hexid==0) { u = v = 0; return; }
  // unsigned ringid;
  // unsigned segid;
  // unsigned runid;
  // positive_hexid_to_ringid_segid_runid(hexid, ringid, segid, runid);
  // switch(segid)
  // {
  //   case 0: u = ringid-runid; v = runid;        break;
  //   case 1: u = -runid;       v = ringid;       break;
  //   case 2: u = -ringid;      v = ringid-runid; break;
  //   case 3: u = runid-ringid; v = -runid;       break;
  //   case 4: u = runid;        v = -ringid;      break;
  //   case 5: u = ringid;       v = runid-ringid; break;
  //   default: assert(0);
  // }
  const __m256i one = _mm256_set1_epi32(1);
  __m256i ringid = avx2_positive_hexid_to_ringid(hexid);
  __m256i iring = _mm256_sub_epi32(hexid,
    avx2_ringid_to_nsites_contained(_mm256_sub_epi32(ringid,one)));

  u = ringid;
  v = _mm256_setzero_si256();

  __m256i irun = _mm256_min_epu32(iring, ringid);
  u = _mm256_sub_epi32(u, irun);
  v = _mm256_add_epi32(v, irun);
  iring = _mm256_sub_epi32(iring, irun);

  irun = _mm256_min_epu32(iring, ringid);
  u = _mm256_sub_epi32(u, irun);
  iring = _mm256_sub_epi32(iring, irun);

  irun = _mm256_min_epu32(iring, ringid);
  v = _mm256_sub_epi32(v, irun);
  iring = _mm256_sub_epi32(iring, irun);

  irun = _mm256_min_epu32(iring, ringid);
  u = _mm256_add_epi32(u, irun);
  v = _mm256_sub_epi32(v, irun);
  iring = _mm256_sub_epi32(iring, irun);

  irun = _mm256_min_epu32(iring, ringid);
  u = _mm256_add_epi32(u, irun);
  iring = _mm256_sub_epi32(iring, irun);

  v = _mm256_add_epi32(v, iring);

  const __m256i mask = _mm256_cmpeq_epi32(hexid, _mm256_setzero_si256());
  u = _mm256_andnot_si256(mask, u);
  v = _mm256_andnot_si256(mask, v);
}
示例#2
0
inline void avx2_hexid_to_uv_cw(const __m256i hexid, __m256i& u, __m256i& v)
{
#if 0 // This code is correct but it's not worth maintaining two versions
  const __m256i one = _mm256_set1_epi32(1);
  __m256i ringid = avx2_positive_hexid_to_ringid(hexid);
  __m256i iring = _mm256_sub_epi32(hexid,
    avx2_ringid_to_nsites_contained(_mm256_sub_epi32(ringid,one)));

  u = ringid;
  v = _mm256_setzero_si256();

  __m256i irun = _mm256_min_epu32(iring, ringid);
  v = _mm256_sub_epi32(v, irun);
  iring = _mm256_sub_epi32(iring, irun);

  irun = _mm256_min_epu32(iring, ringid);
  u = _mm256_sub_epi32(u, irun);
  iring = _mm256_sub_epi32(iring, irun);

  irun = _mm256_min_epu32(iring, ringid);
  u = _mm256_sub_epi32(u, irun);
  v = _mm256_add_epi32(v, irun);
  iring = _mm256_sub_epi32(iring, irun);

  irun = _mm256_min_epu32(iring, ringid);
  v = _mm256_add_epi32(v, irun);
  iring = _mm256_sub_epi32(iring, irun);

  irun = _mm256_min_epu32(iring, ringid);
  u = _mm256_add_epi32(u, irun);
  iring = _mm256_sub_epi32(iring, irun);

  u = _mm256_add_epi32(u, irun);
  v = _mm256_add_epi32(v, iring);

  const __m256i mask = _mm256_cmpeq_epi32(hexid, _mm256_setzero_si256());
  u = _mm256_andnot_si256(mask, u);
  v = _mm256_andnot_si256(mask, v);
#else
  // hexid_to_uv_ccw(hexid, u, v);
  // u += v;
  // v = -v;
  avx2_hexid_to_uv_ccw(hexid, u, v);
  u = _mm256_add_epi32(u, v);
  v = _mm256_sign_epi32(v, _mm256_cmpeq_epi32(v, v));
#endif
}
示例#3
0
__m256i test_mm256_min_epu32(__m256i a, __m256i b) {
  // CHECK: @llvm.x86.avx2.pminu.d
  return _mm256_min_epu32(a, b);
}
示例#4
0
__m256i test_mm256_min_epu32(__m256i a, __m256i b) {
  // CHECK-LABEL: test_mm256_min_epu32
  // CHECK:       [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]]
  // CHECK-NEXT:  select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]]
  return _mm256_min_epu32(a, b);
}
示例#5
0
void extern
avx2_test (void)
{
  x = _mm256_min_epu32 (x, x);
}