Exemple #1
0
static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size)
{
    vec_s16 dc16;
    vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
    LOAD_ZERO;
    DECLARE_ALIGNED(16, int, dc);
    int i;

    dc = (block[0] + 32) >> 6;
    dc16 = __vsplth(__lvewx(&dc, 0), 1);

    if (size == 4)
        dc16 = __vsldoi(dc16, zero_s16v, 8);
    dcplus = __vpkshus(dc16, zero_s16v);
    dcminus = __vpkshus(__vsubuhm(zero_s16v, dc16), zero_s16v);

    aligner = __lvsr(0, dst);
    dcplus = __perm(dcplus, dcplus, aligner);
    dcminus = __perm(dcminus, dcminus, aligner);

    for (i = 0; i < size; i += 4) {
        v0 = __lvx(dst+0*stride, 0);
        v1 = __lvx(dst+1*stride, 0);
        v2 = __lvx(dst+2*stride, 0);
        v3 = __lvx(dst+3*stride, 0);

        v0 = __vaddubs(v0, dcplus);
        v1 = __vaddubs(v1, dcplus);
        v2 = __vaddubs(v2, dcplus);
        v3 = __vaddubs(v3, dcplus);

        v0 = __vsububs(v0, dcminus);
        v1 = __vsububs(v1, dcminus);
        v2 = __vsububs(v2, dcminus);
        v3 = __vsububs(v3, dcminus);

        __stvx(v0, dst+0*stride, 0);
        __stvx(v1, dst+1*stride, 0);
        __stvx(v2, dst+2*stride, 0);
        __stvx(v3, dst+3*stride, 0);

        dst += 4*stride;
    }
}
Exemple #2
0
/* dest*~srca + src */
static force_inline __vector4
over (__vector4 src,
	  __vector4 srca,
	  __vector4 dest)
{
	__vector4 tmp = pix_multiply (dest, negate (srca));

	tmp = __vaddubs (src, tmp);
	return tmp;
}
Exemple #3
0
static force_inline __vector4
pix_add (__vector4 a, __vector4 b)
{
	return __vaddubs(a,b);
}