static void h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride) { vec_s16 va0, va1, va2, va3; vec_s16 vz0, vz1, vz2, vz3; vec_s16 vtmp0, vtmp1, vtmp2, vtmp3; vec_u8 va_u8; vec_u32 va_u32; vec_s16 vdst_ss; const vec_u16 v6us = vec_splat_u16(6); vec_u8 vdst, vdst_orig; vec_u8 vdst_mask = vec_lvsl(0, dst); int element = ((unsigned long)dst & 0xf) >> 2; LOAD_ZERO; block[0] += 32; /* add 32 as a DC-level for rounding */ vtmp0 = vec_ld(0,block); vtmp1 = vec_sld(vtmp0, vtmp0, 8); vtmp2 = vec_ld(16,block); vtmp3 = vec_sld(vtmp2, vtmp2, 8); memset(block, 0, 16 * sizeof(int16_t)); VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3); VEC_TRANSPOSE_4(va0,va1,va2,va3,vtmp0,vtmp1,vtmp2,vtmp3); VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3); va0 = vec_sra(va0,v6us); va1 = vec_sra(va1,v6us); va2 = vec_sra(va2,v6us); va3 = vec_sra(va3,v6us); VEC_LOAD_U8_ADD_S16_STORE_U8(va0); dst += stride; VEC_LOAD_U8_ADD_S16_STORE_U8(va1); dst += stride; VEC_LOAD_U8_ADD_S16_STORE_U8(va2); dst += stride; VEC_LOAD_U8_ADD_S16_STORE_U8(va3); }
static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride) { vec_s16 va0, va1, va2, va3;//s16 vec_s16 vz0, vz1, vz2, vz3;//s16 vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;//s16 vec_u8 va_u8;//u8 vec_u32 va_u32;//u32 vec_s16 vdst_ss;//s16 const vec_u16 v6us = __vsplth(6);//u16 vec_u8 vdst, vdst_orig;//u8 vec_u8 vdst_mask = __lvsl(dst,0);//u8 int element = ((unsigned long)dst & 0xf) >> 2; LOAD_ZERO; block[0] += 32; /* add 32 as a DC-level for rounding */ vtmp0 = __lvx(block,0); vtmp1 = __vsldoi(vtmp0, vtmp0, 8); vtmp2 = __lvx(block,16); vtmp3 = __vsldoi(vtmp2, vtmp2, 8); VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3); VEC_TRANSPOSE_4(va0,va1,va2,va3,vtmp0,vtmp1,vtmp2,vtmp3); VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3); va0 = __vsrah(va0,v6us); va1 = __vsrah(va1,v6us); va2 = __vsrah(va2,v6us); va3 = __vsrah(va3,v6us); VEC_LOAD_U8_ADD_S16_STORE_U8(va0); dst += stride; VEC_LOAD_U8_ADD_S16_STORE_U8(va1); dst += stride; VEC_LOAD_U8_ADD_S16_STORE_U8(va2); dst += stride; VEC_LOAD_U8_ADD_S16_STORE_U8(va3); }