static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref, const int stride, const int cpu) { do { movq_m2r (*ref, mm0); movq_m2r (*(ref+stride+1), mm1); movq_r2r (mm0, mm7); movq_m2r (*(ref+1), mm2); pxor_r2r (mm1, mm7); movq_m2r (*(ref+stride), mm3); movq_r2r (mm2, mm6); pxor_r2r (mm3, mm6); pavg_r2r (mm1, mm0); pavg_r2r (mm3, mm2); por_r2r (mm6, mm7); movq_r2r (mm0, mm6); pxor_r2r (mm2, mm6); pand_r2r (mm6, mm7); pand_m2r (mask_one, mm7); pavg_r2r (mm2, mm0); psubusb_r2r (mm7, mm0); movq_m2r (*dest, mm1); pavg_r2r (mm1, mm0); ref += stride; movq_r2m (mm0, *dest); dest += stride; } while (--height); }
static inline void MC_put4_8 (int height, uint8_t * dest, uint8_t * ref, int stride, int cpu) { movq_m2r (*ref, mm0); movq_m2r (*(ref+1), mm1); movq_r2r (mm0, mm7); pxor_r2r (mm1, mm7); pavg_r2r (mm1, mm0); ref += stride; do { movq_m2r (*ref, mm2); movq_r2r (mm0, mm5); movq_m2r (*(ref+1), mm3); movq_r2r (mm2, mm6); pxor_r2r (mm3, mm6); pavg_r2r (mm3, mm2); por_r2r (mm6, mm7); pxor_r2r (mm2, mm5); pand_r2r (mm5, mm7); pavg_r2r (mm2, mm0); pand_m2r (mask_one, mm7); psubusb_r2r (mm7, mm0); ref += stride; movq_r2m (mm0, *dest); dest += stride; movq_r2r (mm6, mm7); // unroll ! movq_r2r (mm2, mm0); // unroll ! } while (--height); }