mlib_status
__mlib_VideoInterpAveX_U8_U8_8x16(
    mlib_u8 *curr_block,
    const mlib_u8 *ref_block,
    mlib_s32 frame_stride,
    mlib_s32 field_stride)
{
    mlib_s32 y;
    mlib_d64 *dd, ss0[16], *sp1, *sp2, s1hi, s1lo, s2hi, s2lo, s2;
    mlib_d64 mthree = vis_fone();
    mlib_f32 fzero = vis_fzeros();
    mlib_f32 fexpd2 = vis_to_float(0x1000200);

    mthree = vis_fpadd16(mthree, vis_fpadd16(mthree, mthree));

    dd = (mlib_d64 *)curr_block;

    sp1 = (mlib_d64 *)vis_alignaddr((void *)ref_block, 0);

#pragma pipeloop(0)
    MLIB_V_VIDEOCOPY8(16);

    vis_write_gsr((5 << 3) + ((mlib_s32)(ref_block + 1) & 7));
    sp2 = (mlib_d64 *)((mlib_addr)(ref_block + 1) & ~7);

#pragma pipeloop(0)
    MLIB_V_VIDEOINTERPAVG8(16);

    return (MLIB_SUCCESS);
}
mlib_status
__mlib_VideoInterpX_U8_U8(
	mlib_u8 *curr_block,
	const mlib_u8 *ref_block,
	mlib_s32 width,
	mlib_s32 height,
	mlib_s32 frame_stride,
	mlib_s32 field_stride)
{
	mlib_d64 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, *sd, *dd;
	mlib_d64 dzero = vis_fzero();
	const mlib_f32 fm1 = vis_to_float(0x100);
	mlib_f32 fzero = vis_read_hi(dzero);
	mlib_d64 rounder = vis_fone();
	mlib_s32 y;

	vis_write_gsr((6 << 3) + ((mlib_u32)ref_block & 7));
	dd = (mlib_d64 *)curr_block;
	sd = (mlib_d64 *)((mlib_addr)ref_block & ~7);

	if (width == 8) {
		y = height >> 2;

		if (((mlib_s32)(ref_block + 1) & 7)) {
			do {
				s0 = sd[0];
				s1 = sd[1];
				sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
				d0 = vis_faligndata(s0, s1);
				s2 = sd[0];
				s3 = sd[1];
				sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
				d1 = vis_faligndata(s2, s3);
				s4 = sd[0];
				s5 = sd[1];
				sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
				d2 = vis_faligndata(s4, s5);
				s6 = sd[0];
				s7 = sd[1];
				sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
				d3 = vis_faligndata(s6, s7);
				vis_alignaddr((void *)(ref_block + 1), 0);
				s0 = vis_faligndata(s0, s1);
				s1 = vis_faligndata(s2, s3);
				s2 = vis_faligndata(s4, s5);
				s3 = vis_faligndata(s6, s7);

				MLIB_V_VIDEOINTERP(d0, d0, s0);
				MLIB_V_VIDEOINTERP(d1, d1, s1);
				MLIB_V_VIDEOINTERP(d2, d2, s2);
				MLIB_V_VIDEOINTERP4(d3, d3, s3);

				*dd = d0;
				dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
				*dd = d1;
				dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
				*dd = d2;
				dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
				*dd = d3;
				dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);

				vis_alignaddr((void *)ref_block, 0);
			} while (--y);
		} else {
			do {
				s0 = sd[0];
				s1 = sd[1];
				sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
				d0 = vis_faligndata(s0, s1);
				s2 = sd[0];
				s3 = sd[1];
				sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
				d1 = vis_faligndata(s2, s3);
				s4 = sd[0];
				s5 = sd[1];
				sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
				d2 = vis_faligndata(s4, s5);
				s6 = sd[0];
				s7 = sd[1];
				sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
				d3 = vis_faligndata(s6, s7);

				MLIB_V_VIDEOINTERP4(d0, d0, s1);
				MLIB_V_VIDEOINTERP4(d1, d1, s3);
				MLIB_V_VIDEOINTERP4(d2, d2, s5);
				MLIB_V_VIDEOINTERP4(d3, d3, s7);

				*dd = d0;
				dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
				*dd = d1;
				dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
				*dd = d2;
				dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
				*dd = d3;
				dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);

			} while (--y);
		}
	} else {
	mlib_u8 *dst,
	const mlib_u8 *src,
	mlib_s32 width,
	mlib_s32 height,
	mlib_s32 dst_stride,
	mlib_s32 src_stride)
{
	mlib_s32 x, y, x4 = width >> 2;
	mlib_d64 *sl1, *sl2, s1hi, s1lo, s2hi, s2lo, s1, s2;
	mlib_d64 done = vis_to_double_dup(0x1000100);
	mlib_d64 dmask;
	mlib_f32 *dp;
	mlib_f32 frnd = vis_to_float(0x40404040);
	mlib_s32 src_stride2 = 2 * src_stride;

	dmask = vis_fpadd16(done, vis_fone());
	vis_write_gsr(7 << 3);
	sl1 = (mlib_d64 *)src;
	sl2 = (mlib_d64 *)(src + src_stride);
	dp = (mlib_f32 *)dst;

	for (y = 0; y < height; y++) {
#pragma pipeloop(0)
		for (x = 0; x < x4; x++) {
			s1 = sl1[x];
			s2 = sl2[x];
			s1lo = vis_fand(s1, dmask);
			s1hi = vis_fmul8sux16(s1, done);
			s2lo = vis_fand(s2, dmask);
			s2hi = vis_fmul8sux16(s2, done);
			s1lo = vis_fpadd16(s1lo, s2lo);
Exemple #4
0
void ADD_SUFF(ThreeByteBgrToIntArgbConvert)(BLIT_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 *sp;
    mlib_d64 s_0;
    mlib_d64 s0, s1, s2, s3, sd0, sd1, sd2, dd0, dd1, dd2, dd3;
    mlib_s32 i, i0, j;

    if (width < 16) {
	for (j = 0; j < height; j++) {
	    mlib_u8  *src = srcBase;
	    mlib_s32 *dst = dstBase;

	    for (i = 0; i < width; i++) {
		dst[i] = GBR_PIXEL(i);
	    }

	    PTR_ADD(dstBase, dstScan);
	    PTR_ADD(srcBase, srcScan);
	}
	return;    
    }

    if (srcScan == 3*width && dstScan == 4*width) {
	width *= height;
	height = 1;
    }

    s_0 = vis_fone();

    for (j = 0; j < height; j++) {
	mlib_u8  *src = srcBase;
	mlib_f32 *dst = dstBase;

	i = i0 = 0;

	if ((mlib_s32)dst & 7) {
	    ((mlib_s32*)dst)[i] = GBR_PIXEL(i);
	    i0 = 1;
	}

	sp = vis_alignaddr(src, 3*i0);
	s3 = *sp++;

#pragma pipeloop(0)
	for (i = i0; i <= (mlib_s32)width - 8; i += 8) {
	    s0 = s3;
	    s1 = *sp++;
	    s2 = *sp++;
	    s3 = *sp++;
	    sd0 = vis_faligndata(s0, s1);
	    sd1 = vis_faligndata(s1, s2);
	    sd2 = vis_faligndata(s2, s3);

	    BGR_TO_ARGB

	    *(mlib_d64*)(dst + i    ) = dd0;
	    *(mlib_d64*)(dst + i + 2) = dd1;
	    *(mlib_d64*)(dst + i + 4) = dd2;
	    *(mlib_d64*)(dst + i + 6) = dd3;
	}

	for (; i < width; i++) {
	    ((mlib_s32*)dst)[i] = GBR_PIXEL(i);
	}

	PTR_ADD(dstBase, dstScan);
	PTR_ADD(srcBase, srcScan);
    }
}
Exemple #5
0
void ADD_SUFF(ThreeByteBgrToIntArgbScaleConvert)(SCALE_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 dd, maskFF;
    mlib_s32 i, i0, i1, j;

    if (width < 16) {
	for (j = 0; j < height; j++) {
	    mlib_u8  *src = srcBase;
	    mlib_s32 *dst = dstBase;
	    mlib_s32 *dst_end = dst + width;
	    mlib_s32 tmpsxloc = sxloc;

	    PTR_ADD(src, (syloc >> shift) * srcScan);

	    for (; dst < dst_end; dst++) {
		i = tmpsxloc >> shift;
		tmpsxloc += sxinc;
		*(mlib_s32*)dst = GBR_PIXEL(i);
	    }

	    PTR_ADD(dstBase, dstScan);
	    syloc += syinc;
	}
	return;    
    }

    maskFF = vis_fone();

    vis_alignaddr(NULL, 7);

    for (j = 0; j < height; j++) {
	mlib_u8  *src = srcBase;
	mlib_f32 *dst = dstBase;
	mlib_f32 *dst_end = dst + width;
	mlib_s32 tmpsxloc = sxloc;

	PTR_ADD(src, (syloc >> shift) * srcScan);

	if ((mlib_s32)dst & 7) {
	    i = tmpsxloc >> shift;
	    tmpsxloc += sxinc;
	    *(mlib_s32*)dst = GBR_PIXEL(i);
	    dst++;
	}

#pragma pipeloop(0)
	for (; dst <= dst_end - 2; dst += 2) {
	    i0 = tmpsxloc >> shift;
	    i1 = (tmpsxloc + sxinc) >> shift;
	    tmpsxloc += 2*sxinc;

	    dd = vis_faligndata(vis_ld_u8(src + 3*i1    ), dd);
	    dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 1), dd);
	    dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 2), dd);
	    dd = vis_faligndata(maskFF, dd);
	    dd = vis_faligndata(vis_ld_u8(src + 3*i0    ), dd);
	    dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 1), dd);
	    dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 2), dd);
	    dd = vis_faligndata(maskFF, dd);

	    *(mlib_d64*)dst = dd;
	}

	for (; dst < dst_end; dst++) {
	    i = tmpsxloc >> shift;
	    tmpsxloc += sxinc;
	    *(mlib_s32*)dst = GBR_PIXEL(i);
	}

	PTR_ADD(dstBase, dstScan);
	syloc += syinc;
    }
mlib_status
__mlib_VideoInterpAveX_U8_U8_16x16(
    mlib_u8 *curr_block,
    const mlib_u8 *ref_block,
    mlib_s32 frame_stride,
    mlib_s32 field_stride)
{
    mlib_d64 s0, s1, s2, s3, s4, s5, s6;
    mlib_d64 sd0, sd1, sd2, sd3, d0, d1, d2, d3;
    mlib_d64 *sd, *dd;
    mlib_d64 dzero = vis_fzero();
    const mlib_f32 fm2 = vis_to_float(0x1000200);
    mlib_f32 fzero = vis_read_hi(dzero);
    mlib_d64 rounder = vis_fpsub16(dzero, vis_fone());
    mlib_s32 y;

    rounder = vis_fpadd16(vis_fpadd16(rounder, rounder), rounder);
    vis_write_gsr((5 << 3) + ((mlib_u32)ref_block & 7));
    dd = (mlib_d64 *)curr_block;
    sd = (mlib_d64 *)((mlib_addr)ref_block & ~7);

    y = 8;

    if (((mlib_s32)(ref_block + 1) & 7)) {
        do {
            s0 = sd[0];
            s1 = sd[1];
            s2 = sd[2];
            sd0 = vis_faligndata(s0, s1);
            sd1 = vis_faligndata(s1, s2);
            sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
            s4 = sd[0];
            s5 = sd[1];
            s6 = sd[2];
            sd2 = vis_faligndata(s4, s5);
            sd3 = vis_faligndata(s5, s6);
            vis_alignaddr((void *)(ref_block + 1), 0);
            sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
            d0 = dd[0];
            d1 = dd[1];
            d2 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[0];
            d3 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[1];
            s0 = vis_faligndata(s0, s1);
            s1 = vis_faligndata(s1, s2);
            s2 = vis_faligndata(s4, s5);
            s3 = vis_faligndata(s5, s6);

            MLIB_V_VIDEOINTERPAVG(d0, sd0, s0);
            MLIB_V_VIDEOINTERPAVG(d1, sd1, s1);
            MLIB_V_VIDEOINTERPAVG(d2, sd2, s2);
            MLIB_V_VIDEOINTERPAVG(d3, sd3, s3);

            dd[0] = d0;
            dd[1] = d1;
            dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
            dd[0] = d2;
            dd[1] = d3;
            dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
            vis_alignaddr((void *)ref_block, 0);
        } while (--y);
    } else {
        do {
            s0 = sd[0];
            s1 = sd[1];
            s2 = sd[2];
            sd0 = vis_faligndata(s0, s1);
            sd1 = vis_faligndata(s1, s2);
            sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
            s4 = sd[0];
            s5 = sd[1];
            s6 = sd[2];
            sd2 = vis_faligndata(s4, s5);
            sd3 = vis_faligndata(s5, s6);
            sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);
            d0 = dd[0];
            d1 = dd[1];
            d2 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[0];
            d3 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[1];

            MLIB_V_VIDEOINTERPAVG0(d0, sd0, s1);
            MLIB_V_VIDEOINTERPAVG(d1, sd1, s2);
            MLIB_V_VIDEOINTERPAVG(d2, sd2, s5);
            MLIB_V_VIDEOINTERPAVG(d3, sd3, s6);

            dd[0] = d0;
            dd[1] = d1;
            dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
            dd[0] = d2;
            dd[1] = d3;
            dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);
        } while (--y);
    }
    return (MLIB_SUCCESS);
}