コード例 #1
0
static void transpose8x8_to_dst(const uint8_t *src, ptrdiff_t src_stride,
                                uint8_t *dst, ptrdiff_t dst_stride) {
  __m128i A, B, C, D, E, F, G, H;

  A = _mm_loadl_epi64((const __m128i *)src);
  B = _mm_loadl_epi64((const __m128i *)(src + src_stride));
  C = _mm_loadl_epi64((const __m128i *)(src + src_stride * 2));
  D = _mm_loadl_epi64((const __m128i *)(src + src_stride * 3));
  E = _mm_loadl_epi64((const __m128i *)(src + src_stride * 4));
  F = _mm_loadl_epi64((const __m128i *)(src + src_stride * 5));
  G = _mm_loadl_epi64((const __m128i *)(src + src_stride * 6));
  H = _mm_loadl_epi64((const __m128i *)(src + src_stride * 7));

  TRANSPOSE_8X8(A, B, C, D, E, F, G, H, A, B, C, D, E, F, G, H);

  _mm_storel_epi64((__m128i *)dst, A);
  _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), B);
  _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), C);
  _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), D);
  _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), E);
  _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), F);
  _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), G);
  _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), H);
}
コード例 #2
0
mlib_status
__mlib_VideoDCT8x8Quantize_S16_U8_NA(
    mlib_s16 coeffs[64],
    const mlib_u8 *block,
    const mlib_d64 qtable[64],
    mlib_s32 stride)
{
    mlib_u8 *sp = (void *)block;
    mlib_d64 d00, d10, d20, d30, d40, d50, d60, d70;
    mlib_d64 d01, d11, d21, d31, d41, d51, d61, d71;
    mlib_d64 t00, t10, t20, t30, t40, t50, t60, t70, t80, t90;
    mlib_d64 t01, t11, t21, t31, t41, t51, t61, t71, t81, t91;
    mlib_d64 r00, r10, r20, r30, r40, r50, r60, r70;
    mlib_d64 r01, r11, r21, r31, r41, r51, r61, r71;
    mlib_d64 *dp;
    mlib_d64 w_const = vis_to_double_dup(0x4000);

    mlib_s32 mask;
    mlib_f32 FCOS, FONE, c17, c26, c35;

    if (block == NULL || coeffs == NULL || stride <= 0)
        return (MLIB_FAILURE);

    if (!(((mlib_addr)block | (mlib_addr)coeffs | stride) & 7)) {
        return (__mlib_VideoDCT8x8Quantize_S16_U8(coeffs,
                block, qtable, stride));
    }

    vis_write_gsr(1 << 3);
    /*
     * first stage
     */

    LOADCONSTS5;
    LOAD_DATA_GE_INTRA;

    TRANSPOSE_8X8(d00, d10, d20, d30, d40, d50, d60, d70,
                  d01, d11, d21, d31, d41, d51, d61, d71);

    PREPARE_DATA_INTRA(hi, 0);
    COMPUTING_DATA(0);

    PREPARE_DATA_INTRA(lo, 1);
    TRANSPOSE(d40, d50, d60, d70, r01, r11, r21, r31);
    TRANSPOSE(d00, d10, d20, d30, r00, r10, r20, r30);
    COMPUTING_DATA(1);

    TRANSPOSE(d01, d11, d21, d31, r40, r50, r60, r70);

    /*
     * second stage
     */

    PREPARE_DATA_INTER(0);
    COMPUTING_DATA(0);

    ENDSCALE(0);
    TRANSPOSE(d41, d51, d61, d71, r41, r51, r61, r71);

    dp = (mlib_d64 *)vis_alignaddr(coeffs, -1);
    mask = 0xFF >> ((mlib_addr)coeffs - (mlib_addr)dp);
    vis_alignaddrl((void *)coeffs, 0);

    PREPARE_DATA_INTER(1);
    COMPUTING_DATA(1);

    ENDSCALE(1);

    Quant_ST_NA(d00, d00, qtable[0]);
    Quant_ST_NA(d01, d01, qtable[1]);
    Quant_ST_NA(d10, d10, qtable[2]);
    Quant_ST_NA(d11, d11, qtable[3]);
    Quant_ST_NA(d20, d20, qtable[4]);
    Quant_ST_NA(d21, d21, qtable[5]);
    Quant_ST_NA(d30, d30, qtable[6]);
    Quant_ST_NA(d31, d31, qtable[7]);
    Quant_ST_NA(d40, d40, qtable[8]);
    Quant_ST_NA(d41, d41, qtable[9]);
    Quant_ST_NA(d50, d50, qtable[10]);
    Quant_ST_NA(d51, d51, qtable[11]);
    Quant_ST_NA(d60, d60, qtable[12]);
    Quant_ST_NA(d61, d61, qtable[13]);
    Quant_ST_NA(d70, d70, qtable[14]);
    Quant_ST_NA(d71, d71, qtable[15]);

    dp[1] = vis_faligndata(d00, d01);
    dp[2] = vis_faligndata(d01, d10);
    dp[3] = vis_faligndata(d10, d11);
    dp[4] = vis_faligndata(d11, d20);
    dp[5] = vis_faligndata(d20, d21);
    dp[6] = vis_faligndata(d21, d30);
    dp[7] = vis_faligndata(d30, d31);
    dp[8] = vis_faligndata(d31, d40);
    dp[9] = vis_faligndata(d40, d41);
    dp[10] = vis_faligndata(d41, d50);
    dp[11] = vis_faligndata(d50, d51);
    dp[12] = vis_faligndata(d51, d60);
    dp[13] = vis_faligndata(d60, d61);
    dp[14] = vis_faligndata(d61, d70);
    dp[15] = vis_faligndata(d70, d71);
    vis_pst_8(vis_faligndata(d71, d71), dp + 16, ~mask);
    if ((mlib_addr)coeffs & 7)
        vis_pst_8(vis_faligndata(d00, d00), dp, mask);
    return (MLIB_SUCCESS);
}
コード例 #3
0
mlib_status
__mlib_VideoDCT8x8Quantize_S16_U8(
    mlib_s16 coeffs[64],
    const mlib_u8 *block,
    const mlib_d64 qtable[64],
    mlib_s32 stride)
{
    mlib_u8 *sp = (void *)block;
    mlib_d64 d00, d10, d20, d30, d40, d50, d60, d70;
    mlib_d64 d01, d11, d21, d31, d41, d51, d61, d71;
    mlib_d64 t00, t10, t20, t30, t40, t50, t60, t70, t80, t90;
    mlib_d64 t01, t11, t21, t31, t41, t51, t61, t71, t81, t91;
    mlib_d64 r00, r10, r20, r30, r40, r50, r60, r70;
    mlib_d64 r01, r11, r21, r31, r41, r51, r61, r71;
    mlib_d64 *dp = (mlib_d64 *)coeffs;
    mlib_f32 FCOS, FONE, c17, c26, c35;

    vis_write_gsr(1 << 3);
    /*
     * first stage
     */

    LOAD_DATA_AA_INTRA TRANSPOSE_8X8(
        d00,
        d10,
        d20,
        d30,
        d40,
        d50,
        d60,
        d70,
        d01,
        d11,
        d21,
        d31,
        d41,
        d51,
        d61,
        d71);
    LOADCONSTS5 PREPARE_DATA_INTRA(
        hi,
        0);

    COMPUTING_DATA(0);

    PREPARE_DATA_INTRA(lo, 1);
    TRANSPOSE(d40, d50, d60, d70, r01, r11, r21, r31);
    TRANSPOSE(d00, d10, d20, d30, r00, r10, r20, r30);
    COMPUTING_DATA(1);

    TRANSPOSE(d01, d11, d21, d31, r40, r50, r60, r70);

    /*
     * second stage
     */
    PREPARE_DATA_INTER(0);
    COMPUTING_DATA(0);

    TRANSPOSE(d41, d51, d61, d71, r41, r51, r61, r71);

    ENDSCALE(0);

    Quant_ST(0, d00, qtable[0]);
    Quant_ST(2, d10, qtable[2]);
    Quant_ST(4, d20, qtable[4]);
    Quant_ST(6, d30, qtable[6]);
    Quant_ST(8, d40, qtable[8]);
    Quant_ST(10, d50, qtable[10]);
    Quant_ST(12, d60, qtable[12]);
    Quant_ST(14, d70, qtable[14]);

    PREPARE_DATA_INTER(1);
    COMPUTING_DATA(1);

    ENDSCALE(1);

    Quant_ST(1, d01, qtable[1]);
    Quant_ST(3, d11, qtable[3]);
    Quant_ST(5, d21, qtable[5]);
    Quant_ST(7, d31, qtable[7]);
    Quant_ST(9, d41, qtable[9]);
    Quant_ST(11, d51, qtable[11]);
    Quant_ST(13, d61, qtable[13]);
    Quant_ST(15, d71, qtable[15]);


    return (MLIB_SUCCESS);
}