void vpx_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) { int sum = LD_HADD(input, stride); sum += LD_HADD(input + 8, stride); sum += LD_HADD(input + 16 * 8, stride); sum += LD_HADD(input + 16 * 8 + 8, stride); out[0] = (int16_t)(sum >> 1); }
void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) { int sum = LD_HADD(input, stride); sum += LD_HADD(input + 8, stride); sum += LD_HADD(input + 16, stride); sum += LD_HADD(input + 24, stride); sum += LD_HADD(input + 32 * 8, stride); sum += LD_HADD(input + 32 * 8 + 8, stride); sum += LD_HADD(input + 32 * 8 + 16, stride); sum += LD_HADD(input + 32 * 8 + 24, stride); sum += LD_HADD(input + 32 * 16, stride); sum += LD_HADD(input + 32 * 16 + 8, stride); sum += LD_HADD(input + 32 * 16 + 16, stride); sum += LD_HADD(input + 32 * 16 + 24, stride); sum += LD_HADD(input + 32 * 24, stride); sum += LD_HADD(input + 32 * 24 + 8, stride); sum += LD_HADD(input + 32 * 24 + 16, stride); sum += LD_HADD(input + 32 * 24 + 24, stride); out[0] = (int16_t)(sum >> 3); }
void vp9_fdct8x8_1_msa(const int16_t *input, int16_t *out, int32_t stride) { out[0] = LD_HADD(input, stride); out[1] = 0; }