示例#1
0
        template <bool align> SIMD_INLINE void BgrToYuv420p(const uint8_t * bgr0, size_t bgrStride, uint8_t * y0, size_t yStride, uint8_t * u, uint8_t * v)
        {
            const uint8_t * bgr1 = bgr0 + bgrStride;
            uint8_t * y1 = y0 + yStride;

            __m256i blue[2][2], green[2][2], red[2][2];

            LoadBgr<align>((__m256i*)bgr0 + 0, blue[0][0], green[0][0], red[0][0]);
            Store<align>((__m256i*)y0 + 0, BgrToY8(blue[0][0], green[0][0], red[0][0]));

            LoadBgr<align>((__m256i*)bgr0 + 3, blue[0][1], green[0][1], red[0][1]);
            Store<align>((__m256i*)y0 + 1, BgrToY8(blue[0][1], green[0][1], red[0][1]));

            LoadBgr<align>((__m256i*)bgr1 + 0, blue[1][0], green[1][0], red[1][0]);
            Store<align>((__m256i*)y1 + 0, BgrToY8(blue[1][0], green[1][0], red[1][0]));

            LoadBgr<align>((__m256i*)bgr1 + 3, blue[1][1], green[1][1], red[1][1]);
            Store<align>((__m256i*)y1 + 1, BgrToY8(blue[1][1], green[1][1], red[1][1]));

            blue[0][0] = Average16(blue[0][0], blue[1][0]);
            blue[0][1] = Average16(blue[0][1], blue[1][1]);
            green[0][0] = Average16(green[0][0], green[1][0]);
            green[0][1] = Average16(green[0][1], green[1][1]);
            red[0][0] = Average16(red[0][0], red[1][0]);
            red[0][1] = Average16(red[0][1], red[1][1]);

            Store<align>((__m256i*)u, PackU16ToU8(BgrToU16(blue[0][0], green[0][0], red[0][0]), BgrToU16(blue[0][1], green[0][1], red[0][1])));
            Store<align>((__m256i*)v, PackU16ToU8(BgrToV16(blue[0][0], green[0][0], red[0][0]), BgrToV16(blue[0][1], green[0][1], red[0][1])));
        }
示例#2
0
 template <bool align, bool compensation> SIMD_INLINE void MainRowX5x5(Buffer & buffer, size_t offset, uint8_t * dst)
 {
     __m256i lo = MainRowX5x5<align, compensation>(buffer, offset);
     __m256i hi = MainRowX5x5<align, compensation>(buffer, offset + A);
     Store<false>((__m256i*)dst, PackU16ToU8(lo, hi));
 }
示例#3
0
 template <bool align, bool compensation> SIMD_INLINE __m256i MainRowX5x5(Buffer & buffer, size_t offset)
 {
     const __m256i lo = MainRowX5x5<align, compensation>(buffer.dst + offset);
     const __m256i hi = MainRowX5x5<align, compensation>(buffer.dst + offset + HA);
     return _mm256_and_si256(PackU16ToU8(lo, hi), K16_00FF);
 }
示例#4
0
 template<bool align> SIMD_INLINE void InterpolateY(const uint8_t * bx0, const uint8_t * bx1, __m256i alpha[2], uint8_t * dst)
 {
     __m256i lo = InterpolateY<align>((__m256i*)bx0 + 0, (__m256i*)bx1 + 0, alpha); 
     __m256i hi = InterpolateY<align>((__m256i*)bx0 + 1, (__m256i*)bx1 + 1, alpha); 
     Store<false>((__m256i*)dst, PackU16ToU8(lo, hi));
 }
 SIMD_INLINE __m256i BgraToGray(__m256i bgra[4])
 {
     const __m256i lo = PackI32ToI16(BgraToGray32(bgra[0]), BgraToGray32(bgra[1]));
     const __m256i hi = PackI32ToI16(BgraToGray32(bgra[2]), BgraToGray32(bgra[3]));
     return PackU16ToU8(lo, hi);
 }
 template <bool align> SIMD_INLINE __m256i ReduceRow8(const Buffer & buffer, size_t offset)
 {
     __m256i lo = ReduceRow16<align>(buffer, offset);
     __m256i hi = ReduceRow16<align>(buffer, offset + HA);
     return PackU16ToU8(lo, hi);
 }