void mlib_ImageLineXor8000( const mlib_u8 *src, mlib_u8 *dst, mlib_s32 size) { mlib_u8 *dend; mlib_d64 *dptr; mlib_d64 *sptr; mlib_d64 s0, s1; mlib_d64 mask8000 = vis_to_double_dup(0x80008000); mlib_s32 j; mlib_s32 emask; /* prepare the destination addresses */ dptr = (mlib_d64 *)((mlib_addr)dst & (~7)); j = (mlib_addr)dptr - (mlib_addr)dst; dend = (mlib_u8 *)dst + size - 1; /* prepare the source address */ sptr = (mlib_d64 *)VIS_ALIGNADDR(src, j); /* generate edge mask for the start point */ emask = vis_edge8(dst, dend); s1 = vis_ld_d64_nf(sptr); if (emask != 0xff) { s0 = s1; s1 = vis_ld_d64_nf(sptr + 1); s0 = vis_fxor(vis_faligndata(s0, s1), mask8000); vis_pst_8(s0, dptr++, emask); sptr++; j += 8; } #pragma pipeloop(0) for (; j <= (size - 16); j += 8) { s0 = s1; s1 = sptr[1]; (*dptr++) = vis_fxor(vis_faligndata(s0, s1), mask8000); sptr++; } if (j <= (size - 8)) { s0 = s1; s1 = vis_ld_d64_nf(sptr + 1); (*dptr++) = vis_fxor(vis_faligndata(s0, s1), mask8000); sptr++; j += 8; } if (j < size) { s0 = vis_fxor(vis_faligndata(s1, vis_ld_d64_nf(sptr + 1)), mask8000); emask = vis_edge8(dptr, dend); vis_pst_8(s0, dptr, emask); } }
mlib_status mlib_ImageChannelMerge2_S16( mlib_s16 *dst_s16_0, const mlib_s16 *src_s16_0, const mlib_s16 *src_s16_1, mlib_s32 height, mlib_s32 width, mlib_s32 dst_stride, mlib_s32 src0_stride, mlib_s32 src1_stride) { mlib_s32 i, j, k, n = width * 2; mlib_u32 *dp, s0, s1; mlib_s16 *f_ptr, *s_ptr; mlib_d64 *sd0_ptr, *sd1_ptr; mlib_d64 sd0, sd1, sd2, sd3, s0h, s1h, s0l, s1l; mlib_u32 bm0 = 0x018923ab; mlib_u32 bm1 = 0x45cd67ef; mlib_d64 dd0, dd1, dd2, dd3; for (j = 0; j < height; j++) { i = 0; if ((mlib_addr)dst_s16_0 & 3) { dst_s16_0[0] = src_s16_0[0]; f_ptr = (mlib_s16 *)(src_s16_1); s_ptr = (mlib_s16 *)(src_s16_0 + 1); i++; } else { f_ptr = (mlib_s16 *)src_s16_0; s_ptr = (mlib_s16 *)src_s16_1; } if (((mlib_addr)(dst_s16_0 + i) & 7) && (i < (n - 1))) { dst_s16_0[i + 0] = (*f_ptr++); dst_s16_0[i + 1] = (*s_ptr++); i += 2; } dp = (mlib_u32 *)(dst_s16_0 + i); if ((((mlib_addr)f_ptr & 7) == 0) && (((mlib_addr)s_ptr & 7) == 0)) { #pragma pipeloop(0) for (; i < (n - 15); i += 16) { sd0 = ((mlib_d64 *)f_ptr)[0]; sd1 = ((mlib_d64 *)s_ptr)[0]; sd2 = ((mlib_d64 *)f_ptr)[1]; sd3 = ((mlib_d64 *)s_ptr)[1]; vis_write_bmask(bm0, 0); dd0 = vis_bshuffle(sd0, sd1); dd2 = vis_bshuffle(sd2, sd3); vis_write_bmask(bm1, 0); dd1 = vis_bshuffle(sd0, sd1); dd3 = vis_bshuffle(sd2, sd3); ((mlib_d64 *)dp)[0] = dd0; ((mlib_d64 *)dp)[1] = dd1; ((mlib_d64 *)dp)[2] = dd2; ((mlib_d64 *)dp)[3] = dd3; f_ptr += 8; s_ptr += 8; dp += 8; } } else if (((mlib_addr)f_ptr & 7) == ((mlib_addr)s_ptr & 7)) { mlib_d64 s0h, s1h, s0l, s1l; sd0_ptr = VIS_ALIGNADDR(f_ptr, 0); sd1_ptr = VIS_ALIGNADDR(s_ptr, 0); s0h = (*sd0_ptr++); s1h = (*sd1_ptr++); #pragma pipeloop(0) for (; i < (n - 7); i += 8) { s0l = (*sd0_ptr++); s1l = (*sd1_ptr++); sd0 = vis_faligndata(s0h, s0l); sd1 = vis_faligndata(s1h, s1l); vis_write_bmask(bm0, 0); dd0 = vis_bshuffle(sd0, sd1); vis_write_bmask(bm1, 0); dd1 = vis_bshuffle(sd0, sd1); ((mlib_d64 *)dp)[0] = dd0; ((mlib_d64 *)dp)[1] = dd1; s0h = s0l; s1h = s1l; f_ptr += 4; s_ptr += 4; dp += 4; } } else { sd0_ptr = VIS_ALIGNADDR(f_ptr, 0); sd1_ptr = VIS_ALIGNADDR(s_ptr, 0); s0h = vis_ld_d64_nf(sd0_ptr); sd0_ptr++; s1h = vis_ld_d64_nf(sd1_ptr); sd1_ptr++; #pragma pipeloop(0) for (k = 0; i < (n - 7); i += 8, k++) { VIS_ALIGNADDR(f_ptr, 0); s0l = vis_ld_d64_nf(sd0_ptr); sd0_ptr++; sd0 = vis_faligndata(s0h, s0l); VIS_ALIGNADDR(s_ptr, 0); s1l = vis_ld_d64_nf(sd1_ptr); sd1_ptr++; sd1 = vis_faligndata(s1h, s1l); vis_write_bmask(bm0, 0); dd0 = vis_bshuffle(sd0, sd1); vis_write_bmask(bm1, 0); dd1 = vis_bshuffle(sd0, sd1); ((mlib_d64 *)dp)[0] = dd0; ((mlib_d64 *)dp)[1] = dd1; s0h = s0l; s1h = s1l; dp += 4; } f_ptr += (k << 2); s_ptr += (k << 2); } for (; i < (n - 1); i += 2) { s0 = (mlib_u16)((*f_ptr++)); s1 = (mlib_u16)((*s_ptr++)); (*dp++) = (s0 << 16) + s1; } if (i < n) dst_s16_0[i] = *f_ptr; src_s16_0 += src0_stride; src_s16_1 += src1_stride; dst_s16_0 += dst_stride; } return (MLIB_SUCCESS); }
mlib_status mlib_ImageChannelMerge3_S16( mlib_s16 *dst_s16_0, const mlib_s16 *src_s16_0, const mlib_s16 *src_s16_1, const mlib_s16 *src_s16_2, mlib_s32 height, mlib_s32 width, mlib_s32 dst_stride, mlib_s32 src0_stride, mlib_s32 src1_stride, mlib_s32 src2_stride) { mlib_s32 i, j, k, n = width * 3; mlib_f32 *dp; mlib_u32 bm0 = 0x0189ff23; mlib_u32 bm2 = 0xabff45cd; mlib_u32 bm4 = 0xff67efff; mlib_u32 bm1 = 0x01238967; mlib_u32 bm3 = 0x01ab4567; mlib_u32 bm5 = 0xcd2345ef; mlib_d64 sd0, sd1, sd2; mlib_d64 dd0, xx0, dd1, xx1; mlib_d64 dd2, xx2; vis_write_gsr(8 << 3); for (j = 0; j < height; j++) { i = 0; k = 0; for (; (i < (n - 2)) && ((mlib_addr)(dst_s16_0 + i) & 7); i += 3, k++) { dst_s16_0[i + 0] = src_s16_0[k]; dst_s16_0[i + 1] = src_s16_1[k]; dst_s16_0[i + 2] = src_s16_2[k]; } dp = (mlib_f32 *)(dst_s16_0 + i); if (((mlib_addr)(src_s16_0 + k) & 7) || ((mlib_addr)(src_s16_1 + k) & 7) || ((mlib_addr)(src_s16_2 + k) & 7)) { mlib_d64 s0h, s0l, s1h, s1l, s2h, s2l; mlib_d64 *sp0; mlib_d64 *sp1; mlib_d64 *sp2; sp0 = VIS_ALIGNADDR((src_s16_0 + k), 0); s0h = vis_ld_d64_nf(sp0); sp0++; sp1 = VIS_ALIGNADDR((src_s16_1 + k), 0); s1h = vis_ld_d64_nf(sp1); sp1++; sp2 = VIS_ALIGNADDR((src_s16_2 + k), 0); s2h = vis_ld_d64_nf(sp2); sp2++; #pragma pipeloop(0) for (; i < (n - 11); i += 12, k += 4) { s0l = vis_ld_d64_nf(sp0); sp0++; s1l = vis_ld_d64_nf(sp1); sp1++; s2l = vis_ld_d64_nf(sp2); sp2++; VIS_ALIGNADDR((src_s16_0 + k), 0); sd0 = vis_faligndata(s0h, s0l); VIS_ALIGNADDR((src_s16_1 + k), 0); sd1 = vis_faligndata(s1h, s1l); VIS_ALIGNADDR((src_s16_2 + k), 0); sd2 = vis_faligndata(s2h, s2l); vis_write_bmask(bm0, 0); xx0 = vis_bshuffle(sd0, sd1); vis_write_bmask(bm1, 0); dd0 = vis_bshuffle(xx0, sd2); vis_write_bmask(bm2, 0); xx1 = vis_bshuffle(sd0, sd1); vis_write_bmask(bm3, 0); dd1 = vis_bshuffle(xx1, sd2); vis_write_bmask(bm4, 0); xx2 = vis_bshuffle(sd0, sd1); vis_write_bmask(bm5, 0); dd2 = vis_bshuffle(xx2, sd2); ((mlib_d64 *)dp)[0] = dd0; ((mlib_d64 *)dp)[1] = dd1; ((mlib_d64 *)dp)[2] = dd2; dp += 6; s0h = s0l; s1h = s1l; s2h = s2l; } } else { #pragma pipeloop(0) for (; i < (n - 11); i += 12, k += 4) { sd0 = *((mlib_d64 *)(src_s16_0 + k)); sd1 = *((mlib_d64 *)(src_s16_1 + k)); sd2 = *((mlib_d64 *)(src_s16_2 + k)); vis_write_bmask(bm0, 0); xx0 = vis_bshuffle(sd0, sd1); vis_write_bmask(bm1, 0); dd0 = vis_bshuffle(xx0, sd2); vis_write_bmask(bm2, 0); xx1 = vis_bshuffle(sd0, sd1); vis_write_bmask(bm3, 0); dd1 = vis_bshuffle(xx1, sd2); vis_write_bmask(bm4, 0); xx2 = vis_bshuffle(sd0, sd1); vis_write_bmask(bm5, 0); dd2 = vis_bshuffle(xx2, sd2); ((mlib_d64 *)dp)[0] = dd0; ((mlib_d64 *)dp)[1] = dd1; ((mlib_d64 *)dp)[2] = dd2; dp += 6; } } for (; i < (n - 2); i += 3, k++) { dst_s16_0[i + 0] = src_s16_0[k]; dst_s16_0[i + 1] = src_s16_1[k]; dst_s16_0[i + 2] = src_s16_2[k]; } dst_s16_0 += dst_stride; src_s16_0 += src0_stride; src_s16_1 += src1_stride; src_s16_2 += src2_stride; } return (MLIB_SUCCESS); }
mlib_status mlib_ImageChannelMerge4_S16( mlib_s16 *dst_s16_0, const mlib_s16 *src_s16_0, const mlib_s16 *src_s16_1, const mlib_s16 *src_s16_2, const mlib_s16 *src_s16_3, mlib_s32 height, mlib_s32 width, mlib_s32 dst_stride, mlib_s32 src0_stride, mlib_s32 src1_stride, mlib_s32 src2_stride, mlib_s32 src3_stride) { mlib_s32 i, j, n = width << 2; mlib_s16 *fi_ptr, *se_ptr, *th_ptr, *fo_ptr; mlib_d64 *dp; for (j = 0; j < height; j++) { i = 0; if ((mlib_addr)(dst_s16_0 + i) & 7) { dst_s16_0[i++] = src_s16_0[0]; if ((mlib_addr)(dst_s16_0 + i) & 7) { dst_s16_0[i++] = src_s16_1[0]; if ((mlib_addr)(dst_s16_0 + i) & 7) { dst_s16_0[i++] = src_s16_2[0]; } } } if (i == 0) { fi_ptr = (mlib_s16 *)src_s16_0; se_ptr = (mlib_s16 *)src_s16_1; th_ptr = (mlib_s16 *)src_s16_2; fo_ptr = (mlib_s16 *)src_s16_3; } else if (i == 1) { fi_ptr = (mlib_s16 *)src_s16_1; se_ptr = (mlib_s16 *)src_s16_2; th_ptr = (mlib_s16 *)src_s16_3; fo_ptr = (mlib_s16 *)(src_s16_0 + 1); } else if (i == 2) { fi_ptr = (mlib_s16 *)src_s16_2; se_ptr = (mlib_s16 *)src_s16_3; th_ptr = (mlib_s16 *)(src_s16_0 + 1); fo_ptr = (mlib_s16 *)(src_s16_1 + 1); } else if (i == 3) { fi_ptr = (mlib_s16 *)src_s16_3; se_ptr = (mlib_s16 *)(src_s16_0 + 1); th_ptr = (mlib_s16 *)(src_s16_1 + 1); fo_ptr = (mlib_s16 *)(src_s16_2 + 1); } dp = (mlib_d64 *)(dst_s16_0 + i); if ((n - i) > 16) { if (((mlib_addr)fi_ptr & 7) || ((mlib_addr)se_ptr & 7) || ((mlib_addr)th_ptr & 7) || ((mlib_addr)fo_ptr & 7)) { mlib_d64 sd0, sd1, sd2, sd3; mlib_d64 dd0, dd1, dd2, dd3, dr02, dr13; mlib_d64 s0h, s0l, s1h, s1l, s2h, s2l, s3h, s3l; mlib_d64 *sp0; mlib_d64 *sp1; mlib_d64 *sp2; mlib_d64 *sp3; sp0 = VIS_ALIGNADDR(fi_ptr, 0); s0h = (*sp0++); sp1 = VIS_ALIGNADDR(se_ptr, 0); s1h = (*sp1++); sp2 = VIS_ALIGNADDR(th_ptr, 0); s2h = (*sp2++); sp3 = VIS_ALIGNADDR(fo_ptr, 0); s3h = (*sp3++); #pragma pipeloop(0) for (; i < (n - 15); i += 16) { s0l = vis_ld_d64_nf(sp0); sp0++; s1l = vis_ld_d64_nf(sp1); sp1++; s2l = vis_ld_d64_nf(sp2); sp2++; s3l = vis_ld_d64_nf(sp3); sp3++; VIS_ALIGNADDR(fi_ptr, 0); sd0 = vis_faligndata(s0h, s0l); VIS_ALIGNADDR(se_ptr, 0); sd1 = vis_faligndata(s1h, s1l); VIS_ALIGNADDR(th_ptr, 0); sd2 = vis_faligndata(s2h, s2l); VIS_ALIGNADDR(fo_ptr, 0); sd3 = vis_faligndata(s3h, s3l); dr02 = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd2)); dr13 = vis_fpmerge(vis_read_hi(sd1), vis_read_hi(sd3)); dd0 = vis_fpmerge(vis_read_hi(dr02), vis_read_hi(dr13)); dp[0] = vis_fpmerge(vis_read_hi(dd0), vis_read_lo(dd0)); dd1 = vis_fpmerge(vis_read_lo(dr02), vis_read_lo(dr13)); dp[1] = vis_fpmerge(vis_read_hi(dd1), vis_read_lo(dd1)); dr02 = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd2)); dr13 = vis_fpmerge(vis_read_lo(sd1), vis_read_lo(sd3)); dd2 = vis_fpmerge(vis_read_hi(dr02), vis_read_hi(dr13)); dp[2] = vis_fpmerge(vis_read_hi(dd2), vis_read_lo(dd2)); dd3 = vis_fpmerge(vis_read_lo(dr02), vis_read_lo(dr13)); dp[3] = vis_fpmerge(vis_read_hi(dd3), vis_read_lo(dd3)); dp += 4; s0h = s0l; s1h = s1l; s2h = s2l; s3h = s3l; fi_ptr += 4; se_ptr += 4; th_ptr += 4; fo_ptr += 4; } } else { mlib_d64 sd0, sd1, sd2, sd3; mlib_d64 dd0, dd1, dd2, dd3, dr02, dr13; #pragma pipeloop(0) for (; i < (n - 15); i += 16) { sd0 = ((mlib_d64 *)fi_ptr)[0]; sd1 = ((mlib_d64 *)se_ptr)[0]; sd2 = ((mlib_d64 *)th_ptr)[0]; sd3 = ((mlib_d64 *)fo_ptr)[0]; dr02 = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd2)); dr13 = vis_fpmerge(vis_read_hi(sd1), vis_read_hi(sd3)); dd0 = vis_fpmerge(vis_read_hi(dr02), vis_read_hi(dr13)); dp[0] = vis_fpmerge(vis_read_hi(dd0), vis_read_lo(dd0)); dd1 = vis_fpmerge(vis_read_lo(dr02), vis_read_lo(dr13)); dp[1] = vis_fpmerge(vis_read_hi(dd1), vis_read_lo(dd1)); dr02 = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd2)); dr13 = vis_fpmerge(vis_read_lo(sd1), vis_read_lo(sd3)); dd2 = vis_fpmerge(vis_read_hi(dr02), vis_read_hi(dr13)); dp[2] = vis_fpmerge(vis_read_hi(dd2), vis_read_lo(dd2)); dd3 = vis_fpmerge(vis_read_lo(dr02), vis_read_lo(dr13)); dp[3] = vis_fpmerge(vis_read_hi(dd3), vis_read_lo(dd3)); dp += 4; fi_ptr += 4; se_ptr += 4; th_ptr += 4; fo_ptr += 4; } } } #pragma pipeloop(0) for (; i < (n - 3); i += 4) { dst_s16_0[i + 0] = (*fi_ptr++); dst_s16_0[i + 1] = (*se_ptr++); dst_s16_0[i + 2] = (*th_ptr++); dst_s16_0[i + 3] = (*fo_ptr++); } if (i < (n - 2)) { dst_s16_0[i + 0] = *fi_ptr; dst_s16_0[i + 1] = *se_ptr; dst_s16_0[i + 2] = *th_ptr; } else if (i < (n - 1)) { dst_s16_0[i + 0] = *fi_ptr; dst_s16_0[i + 1] = *se_ptr; } else if (i < n) { dst_s16_0[i + 0] = *fi_ptr; } dst_s16_0 += dst_stride; src_s16_0 += src0_stride; src_s16_1 += src1_stride; src_s16_2 += src2_stride; src_s16_3 += src3_stride; } return (MLIB_SUCCESS); }
mask = mask0 >> offset; src = da[0]; da[0] = (src & (~mask)) | (sa[0] & mask); da++; sa++; size = size - 8 + offset; b_size = size >> 3; /* size in bytes */ /* prepare the destination addresses */ dp = (mlib_d64 *) ((mlib_addr) da & (~7)); j = (mlib_addr) dp - (mlib_addr) da; dend = da + b_size - 1; /* prepare the source address */ sp = (mlib_d64 *) VIS_ALIGNADDR(sa, j); /* generate edge mask for the start point */ emask = vis_edge8(da, dend); s1 = vis_ld_d64_nf(sp); if (emask != 0xff) { s0 = s1; s1 = vis_ld_d64_nf(sp+1); s0 = vis_faligndata(s0, s1); vis_pst_8(s0, dp++, emask); sp++; j += 8; } #pragma pipeloop(0) for (; j <= (b_size - 8); j += 8) {