template <bool align> SIMD_INLINE void BgrToYuv420p(const uint8_t * bgr0, size_t bgrStride, uint8_t * y0, size_t yStride, uint8_t * u, uint8_t * v) { const uint8_t * bgr1 = bgr0 + bgrStride; uint8_t * y1 = y0 + yStride; __m256i blue[2][2], green[2][2], red[2][2]; LoadBgr<align>((__m256i*)bgr0 + 0, blue[0][0], green[0][0], red[0][0]); Store<align>((__m256i*)y0 + 0, BgrToY8(blue[0][0], green[0][0], red[0][0])); LoadBgr<align>((__m256i*)bgr0 + 3, blue[0][1], green[0][1], red[0][1]); Store<align>((__m256i*)y0 + 1, BgrToY8(blue[0][1], green[0][1], red[0][1])); LoadBgr<align>((__m256i*)bgr1 + 0, blue[1][0], green[1][0], red[1][0]); Store<align>((__m256i*)y1 + 0, BgrToY8(blue[1][0], green[1][0], red[1][0])); LoadBgr<align>((__m256i*)bgr1 + 3, blue[1][1], green[1][1], red[1][1]); Store<align>((__m256i*)y1 + 1, BgrToY8(blue[1][1], green[1][1], red[1][1])); blue[0][0] = Average16(blue[0][0], blue[1][0]); blue[0][1] = Average16(blue[0][1], blue[1][1]); green[0][0] = Average16(green[0][0], green[1][0]); green[0][1] = Average16(green[0][1], green[1][1]); red[0][0] = Average16(red[0][0], red[1][0]); red[0][1] = Average16(red[0][1], red[1][1]); Store<align>((__m256i*)u, PackU16ToU8(BgrToU16(blue[0][0], green[0][0], red[0][0]), BgrToU16(blue[0][1], green[0][1], red[0][1]))); Store<align>((__m256i*)v, PackU16ToU8(BgrToV16(blue[0][0], green[0][0], red[0][0]), BgrToV16(blue[0][1], green[0][1], red[0][1]))); }
template <bool align> SIMD_INLINE void BgrToYuv444p(const uint8_t * bgr, uint8_t * y, uint8_t * u, uint8_t * v) { __m256i blue, green, red; LoadBgr<align>((__m256i*)bgr, blue, green, red); Store<align>((__m256i*)y, BgrToY8(blue, green, red)); Store<align>((__m256i*)u, BgrToU8(blue, green, red)); Store<align>((__m256i*)v, BgrToV8(blue, green, red)); }
template <bool align> SIMD_INLINE void BgrToYuv422p(const uint8_t * bgr, uint8_t * y, uint8_t * u, uint8_t * v) { __m128i blue[2], green[2], red[2]; LoadBgr<align>((__m128i*)bgr + 0, blue[0], green[0], red[0]); Store<align>((__m128i*)y + 0, BgrToY8(blue[0], green[0], red[0])); LoadBgr<align>((__m128i*)bgr + 3, blue[1], green[1], red[1]); Store<align>((__m128i*)y + 1, BgrToY8(blue[1], green[1], red[1])); Average16(blue[0]); Average16(blue[1]); Average16(green[0]); Average16(green[1]); Average16(red[0]); Average16(red[1]); Store<align>((__m128i*)u, _mm_packus_epi16(BgrToU16(blue[0], green[0], red[0]), BgrToU16(blue[1], green[1], red[1]))); Store<align>((__m128i*)v, _mm_packus_epi16(BgrToV16(blue[0], green[0], red[0]), BgrToV16(blue[1], green[1], red[1]))); }