void I420_R5G5B5_MMX(void * pHandle, uint8_t *p_y, uint8_t *p_u, uint8_t *p_v, uint16_t *p_d) { PSOFTCONV pconv = pHandle; if (!pHandle) return; int b_hscale = pconv->b_hscale; /* horizontal scaling type */ unsigned int i_vscale = pconv->i_vscale; /* vertical scaling type */ unsigned int i_x, i_y; /* horizontal and vertical indexes */ CC_RECT * dst_rect = &pconv->dst_dis_rect; CC_RECT * src_rect = &pconv->src_rect; int dst_stride = pconv->dst_pitch; uint16_t *p_pic = (uint16_t *)((uint8_t *)p_d + pconv->dst_offset); int i_right_margin = pconv->i_right_margin; int i_rewind; int i_scale_count; /* scale modulo counter */ int i_chroma_width = WIDTHOF(src_rect) / 2; /* chroma width */ uint16_t * p_pic_start; /* beginning of the current line for copy */ /* Conversion buffer pointer */ uint16_t * p_buffer_start = pconv->pConvBuffer; uint16_t * p_buffer; /* Offset array pointer */ int * p_offset_start = pconv->pOffsetBuffer; int * p_offset; const int i_source_margin = pconv->i_source_margin; const int i_source_margin_c = pconv->i_source_margin_c; /* * Perform conversion */ i_scale_count = ( i_vscale == 1 ) ? HEIGHTOF(dst_rect) : HEIGHTOF(src_rect); if( WIDTHOF(src_rect) & 7 ) { i_rewind = 8 - ( WIDTHOF(src_rect) & 7 ); } else { i_rewind = 0; } for( i_y = 0; i_y < HEIGHTOF(src_rect); i_y++ ) { p_pic_start = p_pic; p_buffer = b_hscale ? p_buffer_start : p_pic; for ( i_x = WIDTHOF(src_rect) / 8; i_x--; ) { MMX_CALL ( MMX_INIT_16 MMX_YUV_MUL MMX_YUV_ADD MMX_UNPACK_15 ); p_y += 8; p_u += 4; p_v += 4; p_buffer += 8; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; MMX_CALL ( MMX_INIT_16 MMX_YUV_MUL MMX_YUV_ADD MMX_UNPACK_15 ); p_y += 8; p_u += 4; p_v += 4; p_buffer += 8; } SCALE_WIDTH; SCALE_HEIGHT( 2 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } } /* re-enable FPU registers */ MMX_END; }
VLC_TARGET void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) { /* We got this one from the old arguments */ uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels; uint8_t *p_y = p_src->Y_PIXELS; uint8_t *p_u = p_src->U_PIXELS; uint8_t *p_v = p_src->V_PIXELS; bool b_hscale; /* horizontal scaling type */ unsigned int i_vscale; /* vertical scaling type */ unsigned int i_x, i_y; /* horizontal and vertical indexes */ int i_right_margin; int i_rewind; int i_scale_count; /* scale modulo counter */ int i_chroma_width = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 2; /* chroma width */ uint16_t * p_pic_start; /* beginning of the current line for copy */ /* Conversion buffer pointer */ uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer; uint16_t * p_buffer; /* Offset array pointer */ int * p_offset_start = p_filter->p_sys->p_offset; int * p_offset; const int i_source_margin = p_src->p[0].i_pitch - p_src->p[0].i_visible_pitch - p_filter->fmt_in.video.i_x_offset; const int i_source_margin_c = p_src->p[1].i_pitch - p_src->p[1].i_visible_pitch - ( p_filter->fmt_in.video.i_x_offset / 2 ); i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch; /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1' * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */ SetOffset( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width), (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height), (p_filter->fmt_out.video.i_x_offset + p_filter->fmt_out.video.i_visible_width), (p_filter->fmt_out.video.i_y_offset + p_filter->fmt_out.video.i_visible_height), &b_hscale, &i_vscale, p_offset_start ); /* * Perform conversion */ i_scale_count = ( i_vscale == 1 ) ? (p_filter->fmt_out.video.i_y_offset + p_filter->fmt_out.video.i_visible_height) : (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height); #ifdef SSE2 i_rewind = (-(p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width)) & 15; /* ** SSE2 128 bits fetch/store instructions are faster ** if memory access is 16 bytes aligned */ p_buffer = b_hscale ? p_buffer_start : p_pic; if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch| p_dest->p->i_pitch| ((intptr_t)p_y)| ((intptr_t)p_buffer))) ) { /* use faster SSE2 aligned fetch and store */ for( i_y = 0; i_y < (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height); i_y++ ) { p_pic_start = p_pic; for ( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width)/16; i_x--; ) { SSE2_CALL ( SSE2_INIT_16_ALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_15_ALIGNED ); p_y += 16; p_u += 8; p_v += 8; p_buffer += 16; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; SSE2_CALL ( SSE2_INIT_16_UNALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_15_UNALIGNED ); p_y += 16; p_u += 8; p_v += 8; } SCALE_WIDTH; SCALE_HEIGHT( 420, 2 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } p_buffer = b_hscale ? p_buffer_start : p_pic; } } else { /* use slower SSE2 unaligned fetch and store */ for( i_y = 0; i_y < (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height); i_y++ )
void I420_RGB32( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) { /* We got this one from the old arguments */ uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels; uint8_t *p_y = p_src->Y_PIXELS; uint8_t *p_u = p_src->U_PIXELS; uint8_t *p_v = p_src->V_PIXELS; bool b_hscale; /* horizontal scaling type */ unsigned int i_vscale; /* vertical scaling type */ unsigned int i_x, i_y; /* horizontal and vertical indexes */ int i_right_margin; int i_rewind; int i_scale_count; /* scale modulo counter */ int i_chroma_width = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 2; /* chroma width */ uint32_t * p_pic_start; /* beginning of the current line for copy */ int i_uval, i_vval; /* U and V samples */ int i_red, i_green, i_blue; /* U and V modified samples */ uint32_t * p_yuv = p_filter->p_sys->p_rgb32; uint32_t * p_ybase; /* Y dependant conversion table */ /* Conversion buffer pointer */ uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer; uint32_t * p_buffer; /* Offset array pointer */ int * p_offset_start = p_filter->p_sys->p_offset; int * p_offset; const int i_source_margin = p_src->p[0].i_pitch - p_src->p[0].i_visible_pitch - p_filter->fmt_in.video.i_x_offset; const int i_source_margin_c = p_src->p[1].i_pitch - p_src->p[1].i_visible_pitch - ( p_filter->fmt_in.video.i_x_offset / 2 ); i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch; i_rewind = (-(p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width)) & 7; /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1' * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */ SetOffset( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width), (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height), (p_filter->fmt_out.video.i_x_offset + p_filter->fmt_out.video.i_visible_width), (p_filter->fmt_out.video.i_y_offset + p_filter->fmt_out.video.i_visible_height), &b_hscale, &i_vscale, p_offset_start ); /* * Perform conversion */ i_scale_count = ( i_vscale == 1 ) ? (p_filter->fmt_out.video.i_y_offset + p_filter->fmt_out.video.i_visible_height) : (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height); for( i_y = 0; i_y < (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height); i_y++ ) { p_pic_start = p_pic; p_buffer = b_hscale ? p_buffer_start : p_pic; for ( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8; i_x--; ) { CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4); CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4); CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4); CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4); } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4); CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4); CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4); CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4); } SCALE_WIDTH; SCALE_HEIGHT( 420, 4 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } } }