void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { __m128i in0[16], in1[16]; load_buffer_8x16(input, in0); input += 8; load_buffer_8x16(input, in1); switch (tx_type) { case DCT_DCT: idct16_sse2(in0, in1); idct16_sse2(in0, in1); break; case ADST_DCT: idct16_sse2(in0, in1); iadst16_sse2(in0, in1); break; case DCT_ADST: iadst16_sse2(in0, in1); idct16_sse2(in0, in1); break; default: assert(tx_type == ADST_ADST); iadst16_sse2(in0, in1); iadst16_sse2(in0, in1); break; } write_buffer_8x16(dest, in0, stride); dest += 8; write_buffer_8x16(dest, in1, stride); }
void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride, int tx_type) { __m128i in0[16], in1[16]; load_buffer_8x16(input, in0); input += 8; load_buffer_8x16(input, in1); switch (tx_type) { case 0: // DCT_DCT idct16_sse2(in0, in1); idct16_sse2(in0, in1); break; case 1: // ADST_DCT idct16_sse2(in0, in1); iadst16_sse2(in0, in1); break; case 2: // DCT_ADST iadst16_sse2(in0, in1); idct16_sse2(in0, in1); break; case 3: // ADST_ADST iadst16_sse2(in0, in1); iadst16_sse2(in0, in1); break; default: assert(0); break; } write_buffer_8x16(dest, in0, stride); dest += 8; write_buffer_8x16(dest, in1, stride); }