inline void avx2_hexid_to_uv_ccw(const __m256i hexid, __m256i& u, __m256i& v) { // if(hexid==0) { u = v = 0; return; } // unsigned ringid; // unsigned segid; // unsigned runid; // positive_hexid_to_ringid_segid_runid(hexid, ringid, segid, runid); // switch(segid) // { // case 0: u = ringid-runid; v = runid; break; // case 1: u = -runid; v = ringid; break; // case 2: u = -ringid; v = ringid-runid; break; // case 3: u = runid-ringid; v = -runid; break; // case 4: u = runid; v = -ringid; break; // case 5: u = ringid; v = runid-ringid; break; // default: assert(0); // } const __m256i one = _mm256_set1_epi32(1); __m256i ringid = avx2_positive_hexid_to_ringid(hexid); __m256i iring = _mm256_sub_epi32(hexid, avx2_ringid_to_nsites_contained(_mm256_sub_epi32(ringid,one))); u = ringid; v = _mm256_setzero_si256(); __m256i irun = _mm256_min_epu32(iring, ringid); u = _mm256_sub_epi32(u, irun); v = _mm256_add_epi32(v, irun); iring = _mm256_sub_epi32(iring, irun); irun = _mm256_min_epu32(iring, ringid); u = _mm256_sub_epi32(u, irun); iring = _mm256_sub_epi32(iring, irun); irun = _mm256_min_epu32(iring, ringid); v = _mm256_sub_epi32(v, irun); iring = _mm256_sub_epi32(iring, irun); irun = _mm256_min_epu32(iring, ringid); u = _mm256_add_epi32(u, irun); v = _mm256_sub_epi32(v, irun); iring = _mm256_sub_epi32(iring, irun); irun = _mm256_min_epu32(iring, ringid); u = _mm256_add_epi32(u, irun); iring = _mm256_sub_epi32(iring, irun); v = _mm256_add_epi32(v, iring); const __m256i mask = _mm256_cmpeq_epi32(hexid, _mm256_setzero_si256()); u = _mm256_andnot_si256(mask, u); v = _mm256_andnot_si256(mask, v); }
inline void avx2_hexid_to_uv_cw(const __m256i hexid, __m256i& u, __m256i& v) { #if 0 // This code is correct but it's not worth maintaining two versions const __m256i one = _mm256_set1_epi32(1); __m256i ringid = avx2_positive_hexid_to_ringid(hexid); __m256i iring = _mm256_sub_epi32(hexid, avx2_ringid_to_nsites_contained(_mm256_sub_epi32(ringid,one))); u = ringid; v = _mm256_setzero_si256(); __m256i irun = _mm256_min_epu32(iring, ringid); v = _mm256_sub_epi32(v, irun); iring = _mm256_sub_epi32(iring, irun); irun = _mm256_min_epu32(iring, ringid); u = _mm256_sub_epi32(u, irun); iring = _mm256_sub_epi32(iring, irun); irun = _mm256_min_epu32(iring, ringid); u = _mm256_sub_epi32(u, irun); v = _mm256_add_epi32(v, irun); iring = _mm256_sub_epi32(iring, irun); irun = _mm256_min_epu32(iring, ringid); v = _mm256_add_epi32(v, irun); iring = _mm256_sub_epi32(iring, irun); irun = _mm256_min_epu32(iring, ringid); u = _mm256_add_epi32(u, irun); iring = _mm256_sub_epi32(iring, irun); u = _mm256_add_epi32(u, irun); v = _mm256_add_epi32(v, iring); const __m256i mask = _mm256_cmpeq_epi32(hexid, _mm256_setzero_si256()); u = _mm256_andnot_si256(mask, u); v = _mm256_andnot_si256(mask, v); #else // hexid_to_uv_ccw(hexid, u, v); // u += v; // v = -v; avx2_hexid_to_uv_ccw(hexid, u, v); u = _mm256_add_epi32(u, v); v = _mm256_sign_epi32(v, _mm256_cmpeq_epi32(v, v)); #endif }
__m256i test_mm256_min_epu32(__m256i a, __m256i b) { // CHECK: @llvm.x86.avx2.pminu.d return _mm256_min_epu32(a, b); }
__m256i test_mm256_min_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] return _mm256_min_epu32(a, b); }
void extern avx2_test (void) { x = _mm256_min_epu32 (x, x); }