示例#1
0
void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
                                 const uint8_t *above, const uint8_t *left) {
  const uint8x8_t XABCD_u8 = vld1_u8(above - 1);
  const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
  const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
  const uint32x2_t zero = vdup_n_u32(0);
  const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0);
  const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL);
  const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8));
  const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
  const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
  const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
  const uint8_t D = vget_lane_u8(XABCD_u8, 4);
  const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
  const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
  const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
  const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
  const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
  const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
  const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
  const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
  const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
  vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
  vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
  vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
  vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
}
示例#2
0
uint64x1_t
test_vshl_n_u64 (uint64x1_t a)
{
  return vshl_n_u64 (a, 9);
}
示例#3
0
/*
 * Code BCH
 */
uint64_t BCH128to64(uv64 in){
  register ui64 b1 = in[0];
  register ui64 res_a = b1;
  register ui64 res_b = vshl_n_u64(b1, 2);
  register ui64 res_c = vshl_n_u64(b1, 7);
  register ui64 res_d = vshl_n_u64(b1, 8);

  res_a = ui64_shiftl_xor(res_a, b1, 10);
  res_b = ui64_shiftl_xor(res_b, b1, 12);
  res_c = ui64_shiftl_xor(res_c, b1, 14);
  res_d = ui64_shiftl_xor(res_d, b1, 15);
  res_a = ui64_shiftl_xor(res_a, b1, 16);
  res_b = ui64_shiftl_xor(res_b, b1, 23);
  res_c = ui64_shiftl_xor(res_c, b1, 25);
  res_d = ui64_shiftl_xor(res_d, b1, 27);
  res_a = ui64_shiftl_xor(res_a, b1, 28);
  res_b = ui64_shiftl_xor(res_b, b1, 30);
  res_c = ui64_shiftl_xor(res_c, b1, 31);
  res_d = ui64_shiftl_xor(res_d, b1, 32);
  res_a = ui64_shiftl_xor(res_a, b1, 33);
  res_b = ui64_shiftl_xor(res_b, b1, 37);
  res_c = ui64_shiftl_xor(res_c, b1, 38);
  res_d = ui64_shiftl_xor(res_d, b1, 39);
  res_a = ui64_shiftl_xor(res_a, b1, 40);
  res_b = ui64_shiftl_xor(res_b, b1, 41);
  res_c = ui64_shiftl_xor(res_c, b1, 42);
  res_d = ui64_shiftl_xor(res_d, b1, 44);
  res_a = ui64_shiftl_xor(res_a, b1, 45);
  res_b = ui64_shiftl_xor(res_b, b1, 48);
  res_c = ui64_shiftl_xor(res_c, b1, 58);
  res_d = ui64_shiftl_xor(res_d, b1, 61);
  res_a = ui64_shiftl_xor(res_a, b1, 63);
  
  register ui64 b2 = in[1];
  res_b = ui64_shiftr_xor(res_b, b2, 62);
  res_c = ui64_shiftr_xor(res_c, b2, 57);
  res_d = ui64_shiftr_xor(res_d, b2, 56);
  res_a = ui64_shiftr_xor(res_a, b2, 54);
  res_b = ui64_shiftr_xor(res_b, b2, 52);
  res_c = ui64_shiftr_xor(res_c, b2, 50);
  res_d = ui64_shiftr_xor(res_d, b2, 49);
  res_a = ui64_shiftr_xor(res_a, b2, 48);
  res_b = ui64_shiftr_xor(res_b, b2, 41);
  res_c = ui64_shiftr_xor(res_c, b2, 39);
  res_d = ui64_shiftr_xor(res_d, b2, 37);
  res_a = ui64_shiftr_xor(res_a, b2, 36);
  res_b = ui64_shiftr_xor(res_b, b2, 34);
  res_c = ui64_shiftr_xor(res_c, b2, 33);
  res_d = ui64_shiftr_xor(res_d, b2, 32);
  res_a = ui64_shiftr_xor(res_a, b2, 31);
  res_b = ui64_shiftr_xor(res_b, b2, 27);
  res_c = ui64_shiftr_xor(res_c, b2, 26);
  res_d = ui64_shiftr_xor(res_d, b2, 25);
  res_a = ui64_shiftr_xor(res_a, b2, 24);
  res_b = ui64_shiftr_xor(res_b, b2, 23);
  res_c = ui64_shiftr_xor(res_c, b2, 22);
  res_d = ui64_shiftr_xor(res_d, b2, 20);
  res_a = ui64_shiftr_xor(res_a, b2, 19);
  res_b = ui64_shiftr_xor(res_b, b2, 16);
  res_c = ui64_shiftr_xor(res_c, b2, 6);
  res_d = ui64_shiftr_xor(res_d, b2, 3);
  res_a = ui64_shiftr_xor(res_a, b2, 1);

  ui64 res = res_a ^ res_b ^ res_c ^ res_d;

  return (uint64_t)res ^ ((uint64_t)(-(b2&1)));
}