static inline void mean8(unsigned char *refpix,unsigned char *pixel,int radius_count,int row_stride,int threshold,int8_t *diff,unsigned char *count) { int a,b; pxor_r2r(mm6,mm6); // mm6 (aka count) = 0 pxor_r2r(mm7,mm7); // mm7 (aka diff) = 0 movq_m2r(*refpix,mm3); // mm3 = refpix[0] movd_g2r(0x80808080,mm4); // mm4 = 128 punpcklbw_r2r(mm4,mm4); pxor_r2r(mm4,mm3); // mm3 = refpix[0]-128 movd_g2r(threshold,mm5); // mm5 = threshold punpcklbw_r2r(mm5,mm5); punpcklbw_r2r(mm5,mm5); punpcklbw_r2r(mm5,mm5); for( b=0; b<radius_count; b++ ) { for( a=0; a<radius_count; a++ ) { movq_m2r(*pixel,mm0); // mm0 = pixel[0] pxor_r2r(mm4,mm0); // mm0 = pixel[0]-128 movq_r2r(mm3,mm2); // mm2 = refpix[0]-128 psubsb_r2r(mm0,mm2); // mm2 = refpix[0]-pixel[0] psubsb_r2r(mm3,mm0); // mm0 = pixel[0]-refpix[0] pminub_r2r(mm0,mm2); // mm2 = abs(pixel[0]-refpix[0]) movq_r2r(mm5,mm1); // mm1 = threshold pcmpgtb_r2r(mm2,mm1); // mm1 = (threshold > abs(pixel[0]-refpix[0])) ? -1 : 0 psubb_r2r(mm1,mm6); // mm6 += (threshold > abs(pixel[0]-refpix[0])) pand_r2r(mm1,mm0); // mm0 = (threshold > abs(pixel[0]-refpix[0])) ? pixel[0]-refpix[0] : 0 paddb_r2r(mm0,mm7); // mm7 += (threshold > abs(pixel[0]-refpix[0])) ? pixel[0]-refpix[0] : 0 ++pixel; } pixel += row_stride - radius_count; } movq_r2m(mm6,*count); movq_r2m(mm7,*diff); emms(); }
VLC_MMX static int TestForMotionInBlockMMX( uint8_t *p_pix_p, uint8_t *p_pix_c, int i_pitch_prev, int i_pitch_curr, int* pi_top, int* pi_bot ) { int32_t i_motion = 0; int32_t i_top_motion = 0; int32_t i_bot_motion = 0; static alignas (8) const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } }; pxor_r2r( mm6, mm6 ); /* zero, used in psadbw */ movq_m2r( bT, mm5 ); pxor_r2r( mm3, mm3 ); /* score (top field) */ pxor_r2r( mm4, mm4 ); /* score (bottom field) */ for( int y = 0; y < 8; y+=2 ) { /* top field */ movq_m2r( *((uint64_t*)p_pix_c), mm0 ); movq_m2r( *((uint64_t*)p_pix_p), mm1 ); movq_r2r( mm0, mm2 ); psubusb_r2r( mm1, mm2 ); psubusb_r2r( mm0, mm1 ); pcmpgtb_r2r( mm5, mm2 ); pcmpgtb_r2r( mm5, mm1 ); psadbw_r2r( mm6, mm2 ); psadbw_r2r( mm6, mm1 ); paddd_r2r( mm2, mm1 ); paddd_r2r( mm1, mm3 ); /* add to top field score */ p_pix_c += i_pitch_curr; p_pix_p += i_pitch_prev; /* bottom field - handling identical to top field, except... */ movq_m2r( *((uint64_t*)p_pix_c), mm0 ); movq_m2r( *((uint64_t*)p_pix_p), mm1 ); movq_r2r( mm0, mm2 ); psubusb_r2r( mm1, mm2 ); psubusb_r2r( mm0, mm1 ); pcmpgtb_r2r( mm5, mm2 ); pcmpgtb_r2r( mm5, mm1 ); psadbw_r2r( mm6, mm2 ); psadbw_r2r( mm6, mm1 ); paddd_r2r( mm2, mm1 ); paddd_r2r( mm1, mm4 ); /* ...here we add to bottom field score */ p_pix_c += i_pitch_curr; p_pix_p += i_pitch_prev; } movq_r2r( mm3, mm7 ); /* score (total) */ paddd_r2r( mm4, mm7 ); movd_r2m( mm3, i_top_motion ); movd_r2m( mm4, i_bot_motion ); movd_r2m( mm7, i_motion ); /* The loop counts actual score * 255. */ i_top_motion /= 255; i_bot_motion /= 255; i_motion /= 255; emms(); (*pi_top) = ( i_top_motion >= 8 ); (*pi_bot) = ( i_bot_motion >= 8 ); return (i_motion >= 8); }
//VLC_MMX // sunqueen delete static int TestForMotionInBlockMMX( uint8_t *p_pix_p, uint8_t *p_pix_c, int i_pitch_prev, int i_pitch_curr, int* pi_top, int* pi_bot ) { int32_t i_motion = 0; int32_t i_top_motion = 0; int32_t i_bot_motion = 0; uint64_t ui_pix_c, ui_pix_p; // sunqueen add // static const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } }; __declspec(align(8)) static const mmx_t bT = { 0x0A0A0A0A0A0A0A0AULL }; // sunqueen modify pxor_r2r( mm6, mm6 ); /* zero, used in psadbw */ movq_m2r( bT, mm5 ); pxor_r2r( mm3, mm3 ); /* score (top field) */ pxor_r2r( mm4, mm4 ); /* score (bottom field) */ for( int y = 0; y < 8; y+=2 ) { /* top field */ // sunqueen add start ui_pix_c = *((uint64_t*)p_pix_c); movq_m2r( ui_pix_c, mm0 ); ui_pix_p = *((uint64_t*)p_pix_p); movq_m2r( ui_pix_p, mm1 ); // sunqueen add end #if 0 // sunqueen delete start movq_m2r( *((uint64_t*)p_pix_c), mm0 ); movq_m2r( *((uint64_t*)p_pix_p), mm1 ); #endif // sunqueen delete end movq_r2r( mm0, mm2 ); psubusb_r2r( mm1, mm2 ); psubusb_r2r( mm0, mm1 ); pcmpgtb_r2r( mm5, mm2 ); pcmpgtb_r2r( mm5, mm1 ); psadbw_r2r( mm6, mm2 ); psadbw_r2r( mm6, mm1 ); paddd_r2r( mm2, mm1 ); paddd_r2r( mm1, mm3 ); /* add to top field score */ p_pix_c += i_pitch_curr; p_pix_p += i_pitch_prev; /* bottom field - handling identical to top field, except... */ // sunqueen add start ui_pix_c = *((uint64_t*)p_pix_c); movq_m2r( ui_pix_c, mm0 ); ui_pix_p = *((uint64_t*)p_pix_p); movq_m2r( ui_pix_p, mm1 ); // sunqueen add end #if 0 // sunqueen delete start movq_m2r( *((uint64_t*)p_pix_c), mm0 ); movq_m2r( *((uint64_t*)p_pix_p), mm1 ); #endif // sunqueen delete end movq_r2r( mm0, mm2 ); psubusb_r2r( mm1, mm2 ); psubusb_r2r( mm0, mm1 ); pcmpgtb_r2r( mm5, mm2 ); pcmpgtb_r2r( mm5, mm1 ); psadbw_r2r( mm6, mm2 ); psadbw_r2r( mm6, mm1 ); paddd_r2r( mm2, mm1 ); paddd_r2r( mm1, mm4 ); /* ...here we add to bottom field score */ p_pix_c += i_pitch_curr; p_pix_p += i_pitch_prev; } movq_r2r( mm3, mm7 ); /* score (total) */ paddd_r2r( mm4, mm7 ); movd_r2m( mm3, i_top_motion ); movd_r2m( mm4, i_bot_motion ); movd_r2m( mm7, i_motion ); /* The loop counts actual score * 255. */ i_top_motion /= 255; i_bot_motion /= 255; i_motion /= 255; emms(); (*pi_top) = ( i_top_motion >= 8 ); (*pi_bot) = ( i_bot_motion >= 8 ); return (i_motion >= 8); }
/** * Internal helper function for EstimateNumBlocksWithMotion(): * estimates whether there is motion in the given 8x8 block on one plane * between two images. The block as a whole and its fields are evaluated * separately, and use different motion thresholds. * * This is a low-level function only used by EstimateNumBlocksWithMotion(). * There is no need to call this function manually. * * For interpretation of pi_top and pi_bot, it is assumed that the block * starts on an even-numbered line (belonging to the top field). * * The b_mmx parameter avoids the need to call vlc_CPU() separately * for each block. * * @param[in] p_pix_p Base pointer to the block in previous picture * @param[in] p_pix_c Base pointer to the same block in current picture * @param i_pitch_prev i_pitch of previous picture * @param i_pitch_curr i_pitch of current picture * @param b_mmx (vlc_CPU() & CPU_CAPABILITY_MMXEXT) or false. * @param[out] pi_top 1 if top field of the block had motion, 0 if no * @param[out] pi_bot 1 if bottom field of the block had motion, 0 if no * @return 1 if the block had motion, 0 if no * @see EstimateNumBlocksWithMotion() */ static inline int TestForMotionInBlock( uint8_t *p_pix_p, uint8_t *p_pix_c, int i_pitch_prev, int i_pitch_curr, bool b_mmx, int* pi_top, int* pi_bot ) { /* Pixel luma/chroma difference threshold to detect motion. */ #define T 10 int32_t i_motion = 0; int32_t i_top_motion = 0; int32_t i_bot_motion = 0; /* See below for the C version to see more quickly what this does. */ #ifdef CAN_COMPILE_MMXEXT if( b_mmx ) { static const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } }; pxor_r2r( mm6, mm6 ); /* zero, used in psadbw */ movq_m2r( bT, mm5 ); pxor_r2r( mm3, mm3 ); /* score (top field) */ pxor_r2r( mm4, mm4 ); /* score (bottom field) */ for( int y = 0; y < 8; y+=2 ) { /* top field */ movq_m2r( *((uint64_t*)p_pix_c), mm0 ); movq_m2r( *((uint64_t*)p_pix_p), mm1 ); movq_r2r( mm0, mm2 ); psubusb_r2r( mm1, mm2 ); psubusb_r2r( mm0, mm1 ); pcmpgtb_r2r( mm5, mm2 ); pcmpgtb_r2r( mm5, mm1 ); psadbw_r2r( mm6, mm2 ); psadbw_r2r( mm6, mm1 ); paddd_r2r( mm2, mm1 ); paddd_r2r( mm1, mm3 ); /* add to top field score */ p_pix_c += i_pitch_curr; p_pix_p += i_pitch_prev; /* bottom field - handling identical to top field, except... */ movq_m2r( *((uint64_t*)p_pix_c), mm0 ); movq_m2r( *((uint64_t*)p_pix_p), mm1 ); movq_r2r( mm0, mm2 ); psubusb_r2r( mm1, mm2 ); psubusb_r2r( mm0, mm1 ); pcmpgtb_r2r( mm5, mm2 ); pcmpgtb_r2r( mm5, mm1 ); psadbw_r2r( mm6, mm2 ); psadbw_r2r( mm6, mm1 ); paddd_r2r( mm2, mm1 ); paddd_r2r( mm1, mm4 ); /* ...here we add to bottom field score */ p_pix_c += i_pitch_curr; p_pix_p += i_pitch_prev; } movq_r2r( mm3, mm7 ); /* score (total) */ paddd_r2r( mm4, mm7 ); movd_r2m( mm3, i_top_motion ); movd_r2m( mm4, i_bot_motion ); movd_r2m( mm7, i_motion ); /* The loop counts actual score * 255. */ i_top_motion /= 255; i_bot_motion /= 255; i_motion /= 255; emms(); } else #endif { for( int y = 0; y < 8; ++y )