예제 #1
0
void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
{
    LOAD_ZERO;
    vec_u8 t, vdst;
    vec_s16 vdst_16;
    vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst));

    IDCT_START

    IDCT_1D(NOP, NOP)
    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
    IDCT_1D(ADD8, SHIFT4)

#define ADD(a)\
    vdst = vec_ld(0, dst);\
    vdst_16 = (vec_s16)vec_perm(vdst, zero_u8v, vdst_mask);\
    vdst_16 = vec_adds(a, vdst_16);\
    t = vec_packsu(vdst_16, vdst_16);\
    vec_ste((vec_u32)t, 0, (unsigned int *)dst);\
    vec_ste((vec_u32)t, 4, (unsigned int *)dst);

    ADD(b0)     dst += stride;
    ADD(b1)     dst += stride;
    ADD(b2)     dst += stride;
    ADD(b3)     dst += stride;
    ADD(b4)     dst += stride;
    ADD(b5)     dst += stride;
    ADD(b6)     dst += stride;
    ADD(b7)
}
예제 #2
0
void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
{
    vec_u8 t;
    IDCT_START

    // pixels are signed; so add 128*16 in addition to the normal 8
    vec_s16 v2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));
    eight = vec_add(eight, v2048);

    IDCT_1D(NOP, NOP)
    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
    IDCT_1D(ADD8, SHIFT4)

#define PUT(a)\
    t = vec_packsu(a, a);\
    vec_ste((vec_u32)t, 0, (unsigned int *)dst);\
    vec_ste((vec_u32)t, 4, (unsigned int *)dst);

    PUT(b0)     dst += stride;
    PUT(b1)     dst += stride;
    PUT(b2)     dst += stride;
    PUT(b3)     dst += stride;
    PUT(b4)     dst += stride;
    PUT(b5)     dst += stride;
    PUT(b6)     dst += stride;
    PUT(b7)
}
예제 #3
0
void ff_vp3_idct_altivec(DCTELEM block[64])
{
    IDCT_START

    IDCT_1D(NOP, NOP)
    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
    IDCT_1D(ADD8, SHIFT4)

    vec_st(b0, 0x00, block);
    vec_st(b1, 0x10, block);
    vec_st(b2, 0x20, block);
    vec_st(b3, 0x30, block);
    vec_st(b4, 0x40, block);
    vec_st(b5, 0x50, block);
    vec_st(b6, 0x60, block);
    vec_st(b7, 0x70, block);
}
예제 #4
0
static void vp3_idct_add_altivec(uint8_t *dst, int stride, int16_t block[64])
{
    LOAD_ZERO;
    vec_u8 t, vdst;
    vec_s16 vdst_16;
    vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst));

    IDCT_START

    IDCT_1D(NOP, NOP)
    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
    IDCT_1D(ADD8, SHIFT4)

#if HAVE_BIGENDIAN
#define GET_VDST16\
    vdst = vec_ld(0, dst);\
    vdst_16 = (vec_s16)vec_perm(vdst, zero_u8v, vdst_mask);
#else
#define GET_VDST16\
    vdst = vec_vsx_ld(0,dst);\
    vdst_16 = (vec_s16)vec_mergeh(vdst, zero_u8v);
#endif

#define ADD(a)\
    GET_VDST16;\
    vdst_16 = vec_adds(a, vdst_16);\
    t = vec_packsu(vdst_16, vdst_16);\
    vec_ste((vec_u32)t, 0, (unsigned int *)dst);\
    vec_ste((vec_u32)t, 4, (unsigned int *)dst);

    ADD(b0)     dst += stride;
    ADD(b1)     dst += stride;
    ADD(b2)     dst += stride;
    ADD(b3)     dst += stride;
    ADD(b4)     dst += stride;
    ADD(b5)     dst += stride;
    ADD(b6)     dst += stride;
    ADD(b7)
    memset(block, 0, sizeof(*block) * 64);
}