mlib_status __mlib_VideoInterpAveX_U8_U8_8x16( mlib_u8 *curr_block, const mlib_u8 *ref_block, mlib_s32 frame_stride, mlib_s32 field_stride) { mlib_s32 y; mlib_d64 *dd, ss0[16], *sp1, *sp2, s1hi, s1lo, s2hi, s2lo, s2; mlib_d64 mthree = vis_fone(); mlib_f32 fzero = vis_fzeros(); mlib_f32 fexpd2 = vis_to_float(0x1000200); mthree = vis_fpadd16(mthree, vis_fpadd16(mthree, mthree)); dd = (mlib_d64 *)curr_block; sp1 = (mlib_d64 *)vis_alignaddr((void *)ref_block, 0); #pragma pipeloop(0) MLIB_V_VIDEOCOPY8(16); vis_write_gsr((5 << 3) + ((mlib_s32)(ref_block + 1) & 7)); sp2 = (mlib_d64 *)((mlib_addr)(ref_block + 1) & ~7); #pragma pipeloop(0) MLIB_V_VIDEOINTERPAVG8(16); return (MLIB_SUCCESS); }
mlib_status __mlib_VideoInterpX_U8_U8( mlib_u8 *curr_block, const mlib_u8 *ref_block, mlib_s32 width, mlib_s32 height, mlib_s32 frame_stride, mlib_s32 field_stride) { mlib_d64 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, *sd, *dd; mlib_d64 dzero = vis_fzero(); const mlib_f32 fm1 = vis_to_float(0x100); mlib_f32 fzero = vis_read_hi(dzero); mlib_d64 rounder = vis_fone(); mlib_s32 y; vis_write_gsr((6 << 3) + ((mlib_u32)ref_block & 7)); dd = (mlib_d64 *)curr_block; sd = (mlib_d64 *)((mlib_addr)ref_block & ~7); if (width == 8) { y = height >> 2; if (((mlib_s32)(ref_block + 1) & 7)) { do { s0 = sd[0]; s1 = sd[1]; sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d0 = vis_faligndata(s0, s1); s2 = sd[0]; s3 = sd[1]; sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d1 = vis_faligndata(s2, s3); s4 = sd[0]; s5 = sd[1]; sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d2 = vis_faligndata(s4, s5); s6 = sd[0]; s7 = sd[1]; sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d3 = vis_faligndata(s6, s7); vis_alignaddr((void *)(ref_block + 1), 0); s0 = vis_faligndata(s0, s1); s1 = vis_faligndata(s2, s3); s2 = vis_faligndata(s4, s5); s3 = vis_faligndata(s6, s7); MLIB_V_VIDEOINTERP(d0, d0, s0); MLIB_V_VIDEOINTERP(d1, d1, s1); MLIB_V_VIDEOINTERP(d2, d2, s2); MLIB_V_VIDEOINTERP4(d3, d3, s3); *dd = d0; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); *dd = d1; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); *dd = d2; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); *dd = d3; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); vis_alignaddr((void *)ref_block, 0); } while (--y); } else { do { s0 = sd[0]; s1 = sd[1]; sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d0 = vis_faligndata(s0, s1); s2 = sd[0]; s3 = sd[1]; sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d1 = vis_faligndata(s2, s3); s4 = sd[0]; s5 = sd[1]; sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d2 = vis_faligndata(s4, s5); s6 = sd[0]; s7 = sd[1]; sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d3 = vis_faligndata(s6, s7); MLIB_V_VIDEOINTERP4(d0, d0, s1); MLIB_V_VIDEOINTERP4(d1, d1, s3); MLIB_V_VIDEOINTERP4(d2, d2, s5); MLIB_V_VIDEOINTERP4(d3, d3, s7); *dd = d0; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); *dd = d1; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); *dd = d2; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); *dd = d3; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); } while (--y); } } else {
mlib_u8 *dst, const mlib_u8 *src, mlib_s32 width, mlib_s32 height, mlib_s32 dst_stride, mlib_s32 src_stride) { mlib_s32 x, y, x4 = width >> 2; mlib_d64 *sl1, *sl2, s1hi, s1lo, s2hi, s2lo, s1, s2; mlib_d64 done = vis_to_double_dup(0x1000100); mlib_d64 dmask; mlib_f32 *dp; mlib_f32 frnd = vis_to_float(0x40404040); mlib_s32 src_stride2 = 2 * src_stride; dmask = vis_fpadd16(done, vis_fone()); vis_write_gsr(7 << 3); sl1 = (mlib_d64 *)src; sl2 = (mlib_d64 *)(src + src_stride); dp = (mlib_f32 *)dst; for (y = 0; y < height; y++) { #pragma pipeloop(0) for (x = 0; x < x4; x++) { s1 = sl1[x]; s2 = sl2[x]; s1lo = vis_fand(s1, dmask); s1hi = vis_fmul8sux16(s1, done); s2lo = vis_fand(s2, dmask); s2hi = vis_fmul8sux16(s2, done); s1lo = vis_fpadd16(s1lo, s2lo);
void ADD_SUFF(ThreeByteBgrToIntArgbConvert)(BLIT_PARAMS) { mlib_s32 dstScan = pDstInfo->scanStride; mlib_s32 srcScan = pSrcInfo->scanStride; mlib_d64 *sp; mlib_d64 s_0; mlib_d64 s0, s1, s2, s3, sd0, sd1, sd2, dd0, dd1, dd2, dd3; mlib_s32 i, i0, j; if (width < 16) { for (j = 0; j < height; j++) { mlib_u8 *src = srcBase; mlib_s32 *dst = dstBase; for (i = 0; i < width; i++) { dst[i] = GBR_PIXEL(i); } PTR_ADD(dstBase, dstScan); PTR_ADD(srcBase, srcScan); } return; } if (srcScan == 3*width && dstScan == 4*width) { width *= height; height = 1; } s_0 = vis_fone(); for (j = 0; j < height; j++) { mlib_u8 *src = srcBase; mlib_f32 *dst = dstBase; i = i0 = 0; if ((mlib_s32)dst & 7) { ((mlib_s32*)dst)[i] = GBR_PIXEL(i); i0 = 1; } sp = vis_alignaddr(src, 3*i0); s3 = *sp++; #pragma pipeloop(0) for (i = i0; i <= (mlib_s32)width - 8; i += 8) { s0 = s3; s1 = *sp++; s2 = *sp++; s3 = *sp++; sd0 = vis_faligndata(s0, s1); sd1 = vis_faligndata(s1, s2); sd2 = vis_faligndata(s2, s3); BGR_TO_ARGB *(mlib_d64*)(dst + i ) = dd0; *(mlib_d64*)(dst + i + 2) = dd1; *(mlib_d64*)(dst + i + 4) = dd2; *(mlib_d64*)(dst + i + 6) = dd3; } for (; i < width; i++) { ((mlib_s32*)dst)[i] = GBR_PIXEL(i); } PTR_ADD(dstBase, dstScan); PTR_ADD(srcBase, srcScan); } }
void ADD_SUFF(ThreeByteBgrToIntArgbScaleConvert)(SCALE_PARAMS) { mlib_s32 dstScan = pDstInfo->scanStride; mlib_s32 srcScan = pSrcInfo->scanStride; mlib_d64 dd, maskFF; mlib_s32 i, i0, i1, j; if (width < 16) { for (j = 0; j < height; j++) { mlib_u8 *src = srcBase; mlib_s32 *dst = dstBase; mlib_s32 *dst_end = dst + width; mlib_s32 tmpsxloc = sxloc; PTR_ADD(src, (syloc >> shift) * srcScan); for (; dst < dst_end; dst++) { i = tmpsxloc >> shift; tmpsxloc += sxinc; *(mlib_s32*)dst = GBR_PIXEL(i); } PTR_ADD(dstBase, dstScan); syloc += syinc; } return; } maskFF = vis_fone(); vis_alignaddr(NULL, 7); for (j = 0; j < height; j++) { mlib_u8 *src = srcBase; mlib_f32 *dst = dstBase; mlib_f32 *dst_end = dst + width; mlib_s32 tmpsxloc = sxloc; PTR_ADD(src, (syloc >> shift) * srcScan); if ((mlib_s32)dst & 7) { i = tmpsxloc >> shift; tmpsxloc += sxinc; *(mlib_s32*)dst = GBR_PIXEL(i); dst++; } #pragma pipeloop(0) for (; dst <= dst_end - 2; dst += 2) { i0 = tmpsxloc >> shift; i1 = (tmpsxloc + sxinc) >> shift; tmpsxloc += 2*sxinc; dd = vis_faligndata(vis_ld_u8(src + 3*i1 ), dd); dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 1), dd); dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 2), dd); dd = vis_faligndata(maskFF, dd); dd = vis_faligndata(vis_ld_u8(src + 3*i0 ), dd); dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 1), dd); dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 2), dd); dd = vis_faligndata(maskFF, dd); *(mlib_d64*)dst = dd; } for (; dst < dst_end; dst++) { i = tmpsxloc >> shift; tmpsxloc += sxinc; *(mlib_s32*)dst = GBR_PIXEL(i); } PTR_ADD(dstBase, dstScan); syloc += syinc; }
mlib_status __mlib_VideoInterpAveX_U8_U8_16x16( mlib_u8 *curr_block, const mlib_u8 *ref_block, mlib_s32 frame_stride, mlib_s32 field_stride) { mlib_d64 s0, s1, s2, s3, s4, s5, s6; mlib_d64 sd0, sd1, sd2, sd3, d0, d1, d2, d3; mlib_d64 *sd, *dd; mlib_d64 dzero = vis_fzero(); const mlib_f32 fm2 = vis_to_float(0x1000200); mlib_f32 fzero = vis_read_hi(dzero); mlib_d64 rounder = vis_fpsub16(dzero, vis_fone()); mlib_s32 y; rounder = vis_fpadd16(vis_fpadd16(rounder, rounder), rounder); vis_write_gsr((5 << 3) + ((mlib_u32)ref_block & 7)); dd = (mlib_d64 *)curr_block; sd = (mlib_d64 *)((mlib_addr)ref_block & ~7); y = 8; if (((mlib_s32)(ref_block + 1) & 7)) { do { s0 = sd[0]; s1 = sd[1]; s2 = sd[2]; sd0 = vis_faligndata(s0, s1); sd1 = vis_faligndata(s1, s2); sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); s4 = sd[0]; s5 = sd[1]; s6 = sd[2]; sd2 = vis_faligndata(s4, s5); sd3 = vis_faligndata(s5, s6); vis_alignaddr((void *)(ref_block + 1), 0); sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d0 = dd[0]; d1 = dd[1]; d2 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[0]; d3 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[1]; s0 = vis_faligndata(s0, s1); s1 = vis_faligndata(s1, s2); s2 = vis_faligndata(s4, s5); s3 = vis_faligndata(s5, s6); MLIB_V_VIDEOINTERPAVG(d0, sd0, s0); MLIB_V_VIDEOINTERPAVG(d1, sd1, s1); MLIB_V_VIDEOINTERPAVG(d2, sd2, s2); MLIB_V_VIDEOINTERPAVG(d3, sd3, s3); dd[0] = d0; dd[1] = d1; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); dd[0] = d2; dd[1] = d3; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); vis_alignaddr((void *)ref_block, 0); } while (--y); } else { do { s0 = sd[0]; s1 = sd[1]; s2 = sd[2]; sd0 = vis_faligndata(s0, s1); sd1 = vis_faligndata(s1, s2); sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); s4 = sd[0]; s5 = sd[1]; s6 = sd[2]; sd2 = vis_faligndata(s4, s5); sd3 = vis_faligndata(s5, s6); sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride); d0 = dd[0]; d1 = dd[1]; d2 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[0]; d3 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[1]; MLIB_V_VIDEOINTERPAVG0(d0, sd0, s1); MLIB_V_VIDEOINTERPAVG(d1, sd1, s2); MLIB_V_VIDEOINTERPAVG(d2, sd2, s5); MLIB_V_VIDEOINTERPAVG(d3, sd3, s6); dd[0] = d0; dd[1] = d1; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); dd[0] = d2; dd[1] = d3; dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride); } while (--y); } return (MLIB_SUCCESS); }