Пример #1
0
void I420_R5G5B5_MMX(void * pHandle, uint8_t *p_y, uint8_t *p_u, uint8_t *p_v, uint16_t *p_d)                                          
{
    PSOFTCONV pconv = pHandle;
    if (!pHandle)
		return;
    int  b_hscale = pconv->b_hscale;                         /* horizontal scaling type */
    unsigned int i_vscale = pconv->i_vscale;                          /* vertical scaling type */
    unsigned int i_x, i_y;                /* horizontal and vertical indexes */
	CC_RECT * dst_rect = &pconv->dst_dis_rect;
	CC_RECT * src_rect = &pconv->src_rect;
    int dst_stride    = pconv->dst_pitch;
	
	uint16_t *p_pic = (uint16_t *)((uint8_t *)p_d + pconv->dst_offset);
    int         i_right_margin = pconv->i_right_margin;
    int         i_rewind;
    int         i_scale_count;                       /* scale modulo counter */
    int         i_chroma_width = WIDTHOF(src_rect) / 2; /* chroma width */
    uint16_t *  p_pic_start;       /* beginning of the current line for copy */

    /* Conversion buffer pointer */
    uint16_t *  p_buffer_start = pconv->pConvBuffer;
    uint16_t *  p_buffer;

    /* Offset array pointer */
    int *       p_offset_start = pconv->pOffsetBuffer;
    int *       p_offset;

    const int i_source_margin = pconv->i_source_margin;
    const int i_source_margin_c = pconv->i_source_margin_c;

    /*
     * Perform conversion
     */
    i_scale_count = ( i_vscale == 1 ) ?
                    HEIGHTOF(dst_rect) :
                    HEIGHTOF(src_rect);

    if( WIDTHOF(src_rect) & 7 )
    {
        i_rewind = 8 - ( WIDTHOF(src_rect) & 7 );
    }
    else
    {
        i_rewind = 0;
    }

    for( i_y = 0; i_y < HEIGHTOF(src_rect); i_y++ )
    {
        p_pic_start = p_pic;
        p_buffer = b_hscale ? p_buffer_start : p_pic;

        for ( i_x = WIDTHOF(src_rect) / 8; i_x--; )
        {
            MMX_CALL (
                MMX_INIT_16
                MMX_YUV_MUL
                MMX_YUV_ADD
                MMX_UNPACK_15
            );
            p_y += 8;
            p_u += 4;
            p_v += 4;
            p_buffer += 8;
        }

        /* Here we do some unaligned reads and duplicate conversions, but
         * at least we have all the pixels */
        if( i_rewind )
        {
            p_y -= i_rewind;
            p_u -= i_rewind >> 1;
            p_v -= i_rewind >> 1;
            p_buffer -= i_rewind;

            MMX_CALL (
                MMX_INIT_16
                MMX_YUV_MUL
                MMX_YUV_ADD
                MMX_UNPACK_15
            );
            p_y += 8;
            p_u += 4;
            p_v += 4;
            p_buffer += 8;
        }
        SCALE_WIDTH;
        SCALE_HEIGHT( 2 );

        p_y += i_source_margin;
        if( i_y % 2 )
        {
            p_u += i_source_margin_c;
            p_v += i_source_margin_c;
        }
    }
    /* re-enable FPU registers */
    MMX_END;
}
Пример #2
0
VLC_TARGET
void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
    /* We got this one from the old arguments */
    uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
    uint8_t  *p_y   = p_src->Y_PIXELS;
    uint8_t  *p_u   = p_src->U_PIXELS;
    uint8_t  *p_v   = p_src->V_PIXELS;

    bool  b_hscale;                         /* horizontal scaling type */
    unsigned int i_vscale;                          /* vertical scaling type */
    unsigned int i_x, i_y;                /* horizontal and vertical indexes */

    int         i_right_margin;
    int         i_rewind;
    int         i_scale_count;                       /* scale modulo counter */
    int         i_chroma_width = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 2; /* chroma width */
    uint16_t *  p_pic_start;       /* beginning of the current line for copy */

    /* Conversion buffer pointer */
    uint16_t *  p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
    uint16_t *  p_buffer;

    /* Offset array pointer */
    int *       p_offset_start = p_filter->p_sys->p_offset;
    int *       p_offset;

    const int i_source_margin = p_src->p[0].i_pitch
                                 - p_src->p[0].i_visible_pitch
                                 - p_filter->fmt_in.video.i_x_offset;
    const int i_source_margin_c = p_src->p[1].i_pitch
                                 - p_src->p[1].i_visible_pitch
                                 - ( p_filter->fmt_in.video.i_x_offset / 2 );

    i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;

    /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
     * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
     * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
    SetOffset( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width),
               (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height),
               (p_filter->fmt_out.video.i_x_offset + p_filter->fmt_out.video.i_visible_width),
               (p_filter->fmt_out.video.i_y_offset + p_filter->fmt_out.video.i_visible_height),
               &b_hscale, &i_vscale, p_offset_start );


    /*
     * Perform conversion
     */
    i_scale_count = ( i_vscale == 1 ) ?
                    (p_filter->fmt_out.video.i_y_offset + p_filter->fmt_out.video.i_visible_height) :
                    (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height);

#ifdef SSE2

    i_rewind = (-(p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width)) & 15;

    /*
    ** SSE2 128 bits fetch/store instructions are faster
    ** if memory access is 16 bytes aligned
    */

    p_buffer = b_hscale ? p_buffer_start : p_pic;
    if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
                    p_dest->p->i_pitch|
                    ((intptr_t)p_y)|
                    ((intptr_t)p_buffer))) )
    {
        /* use faster SSE2 aligned fetch and store */
        for( i_y = 0; i_y < (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height); i_y++ )
        {
            p_pic_start = p_pic;

            for ( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width)/16; i_x--; )
            {
                SSE2_CALL (
                    SSE2_INIT_16_ALIGNED
                    SSE2_YUV_MUL
                    SSE2_YUV_ADD
                    SSE2_UNPACK_15_ALIGNED
                );
                p_y += 16;
                p_u += 8;
                p_v += 8;
                p_buffer += 16;
            }
            /* Here we do some unaligned reads and duplicate conversions, but
             * at least we have all the pixels */
            if( i_rewind )
            {
                p_y -= i_rewind;
                p_u -= i_rewind >> 1;
                p_v -= i_rewind >> 1;
                p_buffer -= i_rewind;

                SSE2_CALL (
                    SSE2_INIT_16_UNALIGNED
                    SSE2_YUV_MUL
                    SSE2_YUV_ADD
                    SSE2_UNPACK_15_UNALIGNED
                );
                p_y += 16;
                p_u += 8;
                p_v += 8;
            }
            SCALE_WIDTH;
            SCALE_HEIGHT( 420, 2 );

            p_y += i_source_margin;
            if( i_y % 2 )
            {
                p_u += i_source_margin_c;
                p_v += i_source_margin_c;
            }
            p_buffer = b_hscale ? p_buffer_start : p_pic;
        }
    }
    else
    {
        /* use slower SSE2 unaligned fetch and store */
        for( i_y = 0; i_y < (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height); i_y++ )
Пример #3
0
void I420_RGB32( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
    /* We got this one from the old arguments */
    uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
    uint8_t  *p_y   = p_src->Y_PIXELS;
    uint8_t  *p_u   = p_src->U_PIXELS;
    uint8_t  *p_v   = p_src->V_PIXELS;

    bool  b_hscale;                         /* horizontal scaling type */
    unsigned int i_vscale;                          /* vertical scaling type */
    unsigned int i_x, i_y;                /* horizontal and vertical indexes */

    int         i_right_margin;
    int         i_rewind;
    int         i_scale_count;                       /* scale modulo counter */
    int         i_chroma_width = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 2; /* chroma width */
    uint32_t *  p_pic_start;       /* beginning of the current line for copy */
    int         i_uval, i_vval;                           /* U and V samples */
    int         i_red, i_green, i_blue;          /* U and V modified samples */
    uint32_t *  p_yuv = p_filter->p_sys->p_rgb32;
    uint32_t *  p_ybase;                     /* Y dependant conversion table */

    /* Conversion buffer pointer */
    uint32_t *  p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
    uint32_t *  p_buffer;

    /* Offset array pointer */
    int *       p_offset_start = p_filter->p_sys->p_offset;
    int *       p_offset;

    const int i_source_margin = p_src->p[0].i_pitch
                                 - p_src->p[0].i_visible_pitch
                                 - p_filter->fmt_in.video.i_x_offset;
    const int i_source_margin_c = p_src->p[1].i_pitch
                                 - p_src->p[1].i_visible_pitch
                                 - ( p_filter->fmt_in.video.i_x_offset / 2 );

    i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
    i_rewind = (-(p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width)) & 7;

    /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
     * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
     * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
    SetOffset( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width),
               (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height),
               (p_filter->fmt_out.video.i_x_offset + p_filter->fmt_out.video.i_visible_width),
               (p_filter->fmt_out.video.i_y_offset + p_filter->fmt_out.video.i_visible_height),
               &b_hscale, &i_vscale, p_offset_start );

    /*
     * Perform conversion
     */
    i_scale_count = ( i_vscale == 1 ) ?
                    (p_filter->fmt_out.video.i_y_offset + p_filter->fmt_out.video.i_visible_height) :
                    (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height);
    for( i_y = 0; i_y < (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height); i_y++ )
    {
        p_pic_start = p_pic;
        p_buffer = b_hscale ? p_buffer_start : p_pic;

        for ( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8; i_x--; )
        {
            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
        }

        /* Here we do some unaligned reads and duplicate conversions, but
         * at least we have all the pixels */
        if( i_rewind )
        {
            p_y -= i_rewind;
            p_u -= i_rewind >> 1;
            p_v -= i_rewind >> 1;
            p_buffer -= i_rewind;
            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
        }
        SCALE_WIDTH;
        SCALE_HEIGHT( 420, 4 );

        p_y += i_source_margin;
        if( i_y % 2 )
        {
            p_u += i_source_margin_c;
            p_v += i_source_margin_c;
        }
    }
}