Example #1
0
void fdct16x8_1d_row(int16_t *input, int16_t *output) {
  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;

  LD_SH8(input, 16, in0, in1, in2, in3, in4, in5, in6, in7);
  LD_SH8((input + 8), 16, in8, in9, in10, in11, in12, in13, in14, in15);
  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
                     in4, in5, in6, in7);
  TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
                     in10, in11, in12, in13, in14, in15);
  ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
  ADD4(in4, 1, in5, 1, in6, 1, in7, 1, in4, in5, in6, in7);
  ADD4(in8, 1, in9, 1, in10, 1, in11, 1, in8, in9, in10, in11);
  ADD4(in12, 1, in13, 1, in14, 1, in15, 1, in12, in13, in14, in15);
  SRA_4V(in0, in1, in2, in3, 2);
  SRA_4V(in4, in5, in6, in7, 2);
  SRA_4V(in8, in9, in10, in11, 2);
  SRA_4V(in12, in13, in14, in15, 2);
  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
               in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6,
               tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
  ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, input, 16);
  FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1,
                tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
  LD_SH8(input, 16, in8, in9, in10, in11, in12, in13, in14, in15);
  FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3,
               in4, in5, in6, in7);
  TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0,
                     tmp1, in1, tmp2, in2, tmp3, in3);
  ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, output, 16);
  TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4,
                     tmp5, in5, tmp6, in6, tmp7, in7);
  ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, output + 8, 16);
}
Example #2
0
static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
  int16_t *temp = intermediate;
  int16_t *out = output;
  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11;
  v8i16 in12, in13, in14, in15;

  LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7);
  temp = intermediate + 8;
  LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
                     in4, in5, in6, in7);
  TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
                     in10, in11, in12, in13, in14, in15);
  FDCT_POSTPROC_2V_NEG_H(in0, in1);
  FDCT_POSTPROC_2V_NEG_H(in2, in3);
  FDCT_POSTPROC_2V_NEG_H(in4, in5);
  FDCT_POSTPROC_2V_NEG_H(in6, in7);
  FDCT_POSTPROC_2V_NEG_H(in8, in9);
  FDCT_POSTPROC_2V_NEG_H(in10, in11);
  FDCT_POSTPROC_2V_NEG_H(in12, in13);
  FDCT_POSTPROC_2V_NEG_H(in14, in15);
  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
               in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6,
               tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
  temp = intermediate;
  ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16);
  FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1,
                tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
  temp = intermediate;
  LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
  FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3,
               in4, in5, in6, in7);
  TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0,
                     tmp1, in1, tmp2, in2, tmp3, in3);
  ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16);
  TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4,
                     tmp5, in5, tmp6, in6, tmp7, in7);
  out = output + 8;
  ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16);
}