void vpx_fdct16x16_msa(const int16_t *input, int16_t *output, int32_t src_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, tmp_buf[16 * 16]); /* column transform */ for (i = 0; i < 2; ++i) { fdct8x16_1d_column((input + 8 * i), (&tmp_buf[0] + 8 * i), src_stride); } /* row transform */ for (i = 0; i < 2; ++i) { fdct16x8_1d_row((&tmp_buf[0] + (128 * i)), (output + (128 * i))); } }
void av1_fht16x16_msa(const int16_t *input, int16_t *output, int32_t stride, int32_t tx_type) { DECLARE_ALIGNED(32, int16_t, tmp[256]); DECLARE_ALIGNED(32, int16_t, trans_buf[256]); DECLARE_ALIGNED(32, int16_t, tmp_buf[128]); int32_t i; int16_t *ptmpbuf = &tmp_buf[0]; int16_t *trans = &trans_buf[0]; const int32_t const_arr[29 * 4] = { 52707308, 52707308, 52707308, 52707308, -1072430300, -1072430300, -1072430300, -1072430300, 795618043, 795618043, 795618043, 795618043, -721080468, -721080468, -721080468, -721080468, 459094491, 459094491, 459094491, 459094491, -970646691, -970646691, -970646691, -970646691, 1010963856, 1010963856, 1010963856, 1010963856, -361743294, -361743294, -361743294, -361743294, 209469125, 209469125, 209469125, 209469125, -1053094788, -1053094788, -1053094788, -1053094788, 1053160324, 1053160324, 1053160324, 1053160324, 639644520, 639644520, 639644520, 639644520, -862444000, -862444000, -862444000, -862444000, 1062144356, 1062144356, 1062144356, 1062144356, -157532337, -157532337, -157532337, -157532337, 260914709, 260914709, 260914709, 260914709, -1041559667, -1041559667, -1041559667, -1041559667, 920985831, 920985831, 920985831, 920985831, -551995675, -551995675, -551995675, -551995675, 596522295, 596522295, 596522295, 596522295, 892853362, 892853362, 892853362, 892853362, -892787826, -892787826, -892787826, -892787826, 410925857, 410925857, 410925857, 410925857, -992012162, -992012162, -992012162, -992012162, 992077698, 992077698, 992077698, 992077698, 759246145, 759246145, 759246145, 759246145, -759180609, -759180609, -759180609, -759180609, -759222975, -759222975, -759222975, -759222975, 759288511, 759288511, 759288511, 759288511 }; switch (tx_type) { case DCT_DCT: /* column transform */ for (i = 0; i < 2; ++i) { fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride); } /* row transform */ for (i = 0; i < 2; ++i) { fdct16x8_1d_row(tmp + (128 * i), output + (128 * i)); } break; case ADST_DCT: /* column transform */ for (i = 0; i < 2; ++i) { fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf); fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3)); } /* row transform */ for (i = 0; i < 2; ++i) { postproc_fdct16x8_1d_row(tmp + (128 * i), output + (128 * i)); } break; case DCT_ADST: /* column transform */ for (i = 0; i < 2; ++i) { fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride); } fadst16_transpose_postproc_msa(tmp, trans); /* row transform */ for (i = 0; i < 2; ++i) { fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf); fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7)); } fadst16_transpose_msa(tmp, output); break; case ADST_ADST: /* column transform */ for (i = 0; i < 2; ++i) { fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf); fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3)); } fadst16_transpose_postproc_msa(tmp, trans); /* row transform */ for (i = 0; i < 2; ++i) { fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf); fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7)); } fadst16_transpose_msa(tmp, output); break; default: assert(0); break; } }