void mlib_v_ImageLookUpSI_U16_S16_2_D1( const mlib_u16 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 **table) { /* pointer to source data */ mlib_u16 *sp; /* source data */ mlib_s32 s0, s1, s2; /* pointer to start of destination */ mlib_s16 *dl; /* aligned pointer to destination */ mlib_d64 *dp; /* destination data */ mlib_d64 t0, t1, t2; /* destination data */ mlib_d64 t3, acc0; /* loop variable */ mlib_s32 i; const mlib_s16 *tab0 = &table[0][0]; const mlib_s16 *tab1 = &table[1][0]; /* destination data */ mlib_d64 acc1; sp = (void *)src; dl = dst; vis_alignaddr((void *)0, 6); s0 = (*sp++); (*dl++) = tab0[s0]; dp = (mlib_d64 *)dl; xsize--; s0 <<= 1; if (xsize >= 2) { s1 = (sp[0] << 1); s2 = (sp[1] << 1); sp += 2; vis_write_bmask(0x012389ab, 0); #pragma pipeloop(0) for (i = 0; i <= xsize - 4; i += 2, sp += 2) { t3 = VIS_LD_U16_I(tab0, s2); t2 = VIS_LD_U16_I(tab1, s1); t1 = VIS_LD_U16_I(tab0, s1); t0 = VIS_LD_U16_I(tab1, s0); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); s0 = s2; s1 = (sp[0] << 1); s2 = (sp[1] << 1); (*dp++) = vis_bshuffle(acc0, acc1); } t3 = VIS_LD_U16_I(tab0, s2); t2 = VIS_LD_U16_I(tab1, s1); t1 = VIS_LD_U16_I(tab0, s1); t0 = VIS_LD_U16_I(tab1, s0); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); s0 = s2; (*dp++) = vis_bshuffle(acc0, acc1); } dl = (mlib_s16 *)dp; if ((xsize & 1) != 0) { s1 = (sp[0] << 1); t1 = VIS_LD_U16_I(tab0, s1); t0 = VIS_LD_U16_I(tab1, s0); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); *(mlib_f32 *)dp = vis_read_hi(acc0); s0 = s1; dl += 2; } s0 >>= 1; *dl = tab1[s0]; }
void mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1( const mlib_s32 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 **table) { /* pointer to source data */ mlib_s32 *sp; /* source data */ mlib_s32 s0, s1; /* pointer to start of destination */ mlib_s16 *dl; /* aligned pointer to destination */ mlib_d64 *dp; /* destination data */ mlib_d64 t0, t1, t2, t3; /* destination data */ mlib_d64 acc; /* loop variable */ mlib_s32 i; const mlib_s16 *tab0 = &table[0][(mlib_u32)2147483648u]; const mlib_s16 *tab1 = &table[1][(mlib_u32)2147483648u]; const mlib_s16 *tab2 = &table[2][(mlib_u32)2147483648u]; const mlib_s16 *tab3 = &table[3][(mlib_u32)2147483648u]; sp = (void *)src; dl = dst; dp = (mlib_d64 *)dl; vis_alignaddr((void *)0, 6); s0 = (*sp++); if (xsize >= 1) { s1 = (*sp++); #pragma pipeloop(0) for (i = 0; i <= xsize - 2; i++) { t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s1)); t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1)); t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1)); t0 = VIS_LD_U16_I(tab3, ((mlib_addr)2 * s0)); acc = vis_faligndata(t3, acc); acc = vis_faligndata(t2, acc); acc = vis_faligndata(t1, acc); acc = vis_faligndata(t0, acc); s0 = s1; s1 = (*sp++); (*dp++) = acc; } t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s1)); t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1)); t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1)); t0 = VIS_LD_U16_I(tab3, ((mlib_addr)2 * s0)); acc = vis_faligndata(t3, acc); acc = vis_faligndata(t2, acc); acc = vis_faligndata(t1, acc); acc = vis_faligndata(t0, acc); s0 = s1; (*dp++) = acc; } dl = (mlib_s16 *)dp; dl[0] = tab3[s0]; }
void mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1( const mlib_s32 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 **table) { /* pointer to source data */ mlib_s32 *sp; /* source data */ mlib_s32 s0, s1; /* pointer to start of destination */ mlib_s16 *dl; /* aligned pointer to destination */ mlib_d64 *dp; /* destination data */ mlib_d64 t0, t1, t2; /* destination data */ mlib_d64 t3, acc; /* loop variable */ mlib_s32 i; const mlib_s16 *tab0 = &table[0][(mlib_u32)2147483648u]; const mlib_s16 *tab1 = &table[1][(mlib_u32)2147483648u]; sp = (void *)src; dl = dst; dp = (mlib_d64 *)dl; vis_alignaddr((void *)0, 6); if (xsize >= 2) { s0 = sp[0]; s1 = sp[1]; sp += 2; #pragma pipeloop(0) for (i = 0; i <= xsize - 4; i += 2, sp += 2) { t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1)); t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1)); t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0)); acc = vis_faligndata(t3, acc); acc = vis_faligndata(t2, acc); acc = vis_faligndata(t1, acc); acc = vis_faligndata(t0, acc); s0 = sp[0]; s1 = sp[1]; (*dp++) = acc; } t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1)); t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1)); t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0)); acc = vis_faligndata(t3, acc); acc = vis_faligndata(t2, acc); acc = vis_faligndata(t1, acc); acc = vis_faligndata(t0, acc); (*dp++) = acc; } if ((xsize & 1) != 0) { s0 = sp[0]; t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0)); acc = vis_faligndata(t1, acc); acc = vis_faligndata(t0, acc); *(mlib_f32 *)dp = vis_read_hi(acc); } }
void mlib_v_ImageLookUpSI_S16_S16_4_DstOff2_D1(const mlib_s16 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 **table) { mlib_s16 *sp; /* pointer to source data */ mlib_s32 s0, s1; /* source data */ mlib_s16 *dl; /* pointer to start of destination */ mlib_d64 *dp; /* aligned pointer to destination */ mlib_d64 t0, t1, t2, t3; /* destination data */ mlib_d64 acc; /* destination data */ mlib_s32 i; /* loop variable */ const mlib_s16 *tab0 = &table[0][32768]; const mlib_s16 *tab1 = &table[1][32768]; const mlib_s16 *tab2 = &table[2][32768]; const mlib_s16 *tab3 = &table[3][32768]; sp = (void *)src; dl = dst; dp = (mlib_d64 *) dl; vis_alignaddr((void *) 0, 6); s0 = (*sp++) << 1; if (xsize >= 1) { s1 = (*sp++) << 1; #pragma pipeloop(0) for(i = 0; i <= xsize - 2; i++) { t3 = VIS_LD_U16_I(tab1, s1); t2 = VIS_LD_U16_I(tab0, s1); t1 = VIS_LD_U16_I(tab3, s0); t0 = VIS_LD_U16_I(tab2, s0); acc = vis_faligndata(t3, acc); acc = vis_faligndata(t2, acc); acc = vis_faligndata(t1, acc); acc = vis_faligndata(t0, acc); s0 = s1; s1 = (*sp++) << 1; *dp++ = acc; } t3 = VIS_LD_U16_I(tab1, s1); t2 = VIS_LD_U16_I(tab0, s1); t1 = VIS_LD_U16_I(tab3, s0); t0 = VIS_LD_U16_I(tab2, s0); acc = vis_faligndata(t3, acc); acc = vis_faligndata(t2, acc); acc = vis_faligndata(t1, acc); acc = vis_faligndata(t0, acc); s0 = s1; *dp++ = acc; } dl = (mlib_s16*)dp; s0 >>= 1; dl[0] = tab2[s0]; dl[1] = tab3[s0]; }
void mlib_v_ImageLookUpSI_S32_S16_3_D1( const mlib_s32 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 **table) { /* pointer to source data */ mlib_s32 *sp; /* pointer to start of destination */ mlib_s16 *dl; /* aligned pointer to destination */ mlib_d64 *dp; /* destination data */ mlib_d64 t0, t1, t2, t3; /* destination data */ mlib_d64 acc0, acc1, acc2; /* loop variable */ mlib_s32 i; const mlib_s16 *tab0 = &table[0][(mlib_u32)2147483648u]; const mlib_s16 *tab1 = &table[1][(mlib_u32)2147483648u]; const mlib_s16 *tab2 = &table[2][(mlib_u32)2147483648u]; mlib_s32 s00, s01, s02, s03; sp = (void *)src; dl = dst; dp = (mlib_d64 *)dl; vis_alignaddr((void *)0, 6); i = 0; if (xsize >= 4) { s00 = sp[0]; s01 = sp[1]; s02 = sp[2]; s03 = sp[3]; sp += 4; #pragma pipeloop(0) for (i = 0; i <= xsize - 8; i += 4, sp += 4) { t3 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s01)); t2 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s00)); t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s00)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s00)); acc0 = vis_faligndata(t3, acc0); acc0 = vis_faligndata(t2, acc0); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s02)); t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s02)); t1 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s01)); t0 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s01)); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc1 = vis_faligndata(t1, acc1); acc1 = vis_faligndata(t0, acc1); t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s03)); t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s03)); t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s03)); t0 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s02)); acc2 = vis_faligndata(t3, acc2); acc2 = vis_faligndata(t2, acc2); acc2 = vis_faligndata(t1, acc2); acc2 = vis_faligndata(t0, acc2); s00 = sp[0]; s01 = sp[1]; s02 = sp[2]; s03 = sp[3]; (*dp++) = acc0; (*dp++) = acc1; (*dp++) = acc2; } t3 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s01)); t2 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s00)); t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s00)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s00)); acc0 = vis_faligndata(t3, acc0); acc0 = vis_faligndata(t2, acc0); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s02)); t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s02)); t1 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s01)); t0 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s01)); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc1 = vis_faligndata(t1, acc1); acc1 = vis_faligndata(t0, acc1); t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s03)); t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s03)); t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s03)); t0 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s02)); acc2 = vis_faligndata(t3, acc2); acc2 = vis_faligndata(t2, acc2); acc2 = vis_faligndata(t1, acc2); acc2 = vis_faligndata(t0, acc2); (*dp++) = acc0; (*dp++) = acc1; (*dp++) = acc2; i += 4; } dl = (mlib_s16 *)dp; #pragma pipeloop(0) for (; i < xsize; i++) { s00 = sp[0]; dl[0] = tab0[s00]; dl[1] = tab1[s00]; dl[2] = tab2[s00]; dl += 3; sp++; } }
void mlib_v_ImageLookUp_U16_U16_124_D1(const mlib_u16 *src, mlib_u16 *dst, mlib_s32 xsize, const mlib_u16 *table0, const mlib_u16 *table1, const mlib_u16 *table2, const mlib_u16 *table3) { mlib_u16 *sp; /* pointer to source data */ mlib_s32 s0, s1, s2, s3; /* source data */ mlib_u16 *dl; /* pointer to start of destination */ mlib_u16 *dend; /* pointer to end of destination */ mlib_d64 *dp; /* aligned pointer to destination */ mlib_d64 t0, t1, t2; /* destination data */ mlib_d64 t3, acc0; /* destination data */ mlib_s32 emask; /* edge mask */ mlib_s32 i, num; /* loop variable */ dl = dst; sp = (void *)src; dp = (mlib_d64 *) dl; dend = dl + xsize - 1; vis_alignaddr((void *) 0, 6); i = 0; if (xsize >= 4) { s0 = sp[0]; s1 = sp[1]; s2 = sp[2]; s3 = sp[3]; sp += 4; #pragma pipeloop(0) for(i = 0; i <= xsize - 8; i+=4, sp += 4) { t3 = VIS_LD_U16_I(table3, 2*s3); t2 = VIS_LD_U16_I(table2, 2*s2); t1 = VIS_LD_U16_I(table1, 2*s1); t0 = VIS_LD_U16_I(table0, 2*s0); acc0 = vis_faligndata(t3, acc0); acc0 = vis_faligndata(t2, acc0); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); s0 = sp[0]; s1 = sp[1]; s2 = sp[2]; s3 = sp[3]; *dp++ = acc0; } t3 = VIS_LD_U16_I(table3, 2*s3); t2 = VIS_LD_U16_I(table2, 2*s2); t1 = VIS_LD_U16_I(table1, 2*s1); t0 = VIS_LD_U16_I(table0, 2*s0); acc0 = vis_faligndata(t3, acc0); acc0 = vis_faligndata(t2, acc0); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); *dp++ = acc0; } if ((mlib_addr) dp <= (mlib_addr) dend) { num = (mlib_u16*) dend - (mlib_u16*) dp; sp += num; num ++; if (num == 1) { s0 = (mlib_s32) *sp; sp --; t0 = VIS_LD_U16_I(table0, 2*s0); acc0 = vis_faligndata(t0, acc0); } else if (num == 2) { s0 = (mlib_s32) *sp; sp --; t0 = VIS_LD_U16_I(table1, 2*s0); acc0 = vis_faligndata(t0, acc0); s0 = (mlib_s32) *sp; sp --; t0 = VIS_LD_U16_I(table0, 2*s0); acc0 = vis_faligndata(t0, acc0); } else if (num == 3) { s0 = (mlib_s32) *sp; sp --; t0 = VIS_LD_U16_I(table2, 2*s0); acc0 = vis_faligndata(t0, acc0); s0 = (mlib_s32) *sp; sp --; t0 = VIS_LD_U16_I(table1, 2*s0); acc0 = vis_faligndata(t0, acc0); s0 = (mlib_s32) *sp; sp --; t0 = VIS_LD_U16_I(table0, 2*s0); acc0 = vis_faligndata(t0, acc0); } emask = vis_edge16(dp, dend); vis_pst_16(acc0, dp, emask); } }
void mlib_v_ImageLookUp_S16_S16_3_D1( const mlib_s16 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 *table0, const mlib_s16 *table1, const mlib_s16 *table2) { /* pointer to source data */ mlib_s16 *sp; /* source data */ mlib_s32 s0, s1, s2, s3; /* pointer to start of destination */ mlib_s16 *dl; /* pointer to end of destination */ mlib_s16 *dend; /* aligned pointer to destination */ mlib_d64 *dp; /* destination data */ mlib_d64 t0, t1, t2, t3; /* destination data */ mlib_d64 acc0, acc1; /* edge mask */ mlib_s32 emask; /* loop variable */ mlib_s32 i, num; const mlib_s16 *table; dl = dst; sp = (void *)src; dp = (mlib_d64 *)dl; dend = dl + xsize - 1; vis_alignaddr((void *)0, 6); i = 0; if (xsize >= 4) { s0 = sp[0] << 1; s1 = sp[1] << 1; s2 = sp[2] << 1; s3 = sp[3] << 1; sp += 4; vis_write_bmask(0x012389ab, 0); #pragma pipeloop(0) for (i = 0; i <= xsize - 8; i += 4, sp += 4) { t3 = VIS_LD_U16_I(table0, s3); t2 = VIS_LD_U16_I(table2, s2); t1 = VIS_LD_U16_I(table1, s1); t0 = VIS_LD_U16_I(table0, s0); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); s0 = sp[0] << 1; s1 = sp[1] << 1; s2 = sp[2] << 1; s3 = sp[3] << 1; (*dp++) = vis_bshuffle(acc0, acc1); table = table0; table0 = table1; table1 = table2; table2 = table; } t3 = VIS_LD_U16_I(table0, s3); t2 = VIS_LD_U16_I(table2, s2); t1 = VIS_LD_U16_I(table1, s1); t0 = VIS_LD_U16_I(table0, s0); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); (*dp++) = vis_bshuffle(acc0, acc1); table = table0; table0 = table1; table1 = table2; table2 = table; i += 4; } if ((mlib_addr)dp <= (mlib_addr)dend) { num = (mlib_s16 *)dend - (mlib_s16 *)dp; sp += num; num++; if (num == 1) { s0 = (mlib_s32)*sp; sp--; t0 = VIS_LD_U16_I(table0, s0 << 1); acc0 = vis_faligndata(t0, acc0); } else if (num == 2) { s0 = (mlib_s32)*sp; sp--; t0 = VIS_LD_U16_I(table1, s0 << 1); acc0 = vis_faligndata(t0, acc0); s0 = (mlib_s32)*sp; sp--; t0 = VIS_LD_U16_I(table0, s0 << 1); acc0 = vis_faligndata(t0, acc0); } else if (num == 3) { s0 = (mlib_s32)*sp; sp--; t0 = VIS_LD_U16_I(table2, s0 << 1); acc0 = vis_faligndata(t0, acc0); s0 = (mlib_s32)*sp; sp--; t0 = VIS_LD_U16_I(table1, s0 << 1); acc0 = vis_faligndata(t0, acc0); s0 = (mlib_s32)*sp; sp--; t0 = VIS_LD_U16_I(table0, s0 << 1); acc0 = vis_faligndata(t0, acc0); } emask = vis_edge16(dp, dend); vis_pst_16(acc0, dp, emask); } }
void mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1( const mlib_s32 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 **table) { /* pointer to source data */ mlib_s32 *sp; /* source data */ mlib_s32 s0, s1; /* pointer to start of destination */ mlib_s16 *dl; /* aligned pointer to destination */ mlib_d64 *dp; /* destination data */ mlib_d64 t0, t1, t2; /* destination data */ mlib_d64 t3, acc0; /* loop variable */ mlib_s32 i; const mlib_s16 *tab0 = (void *)&(((mlib_u8 **)table)[0][HALF_U64]); const mlib_s16 *tab1 = (void *)&(((mlib_u8 **)table)[1][HALF_U64]); /* destination data */ mlib_d64 acc1; sp = (void *)src; dl = dst; dp = (mlib_d64 *)dl; vis_alignaddr((void *)0, 6); if (xsize >= 2) { s0 = sp[0]; s1 = sp[1]; sp += 2; vis_write_bmask(0x012389ab, 0); #pragma pipeloop(0) for (i = 0; i <= xsize - 4; i += 2, sp += 2) { t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1)); t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1)); t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0)); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); s0 = sp[0]; s1 = sp[1]; (*dp++) = vis_bshuffle(acc0, acc1); } t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1)); t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1)); t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0)); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); (*dp++) = vis_bshuffle(acc0, acc1); } if ((xsize & 1) != 0) { s0 = sp[0]; t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0)); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); *(mlib_f32 *)dp = vis_read_hi(acc0); } }
void mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1( const mlib_s32 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 **table) { /* pointer to source data */ mlib_s32 *sp; /* source data */ mlib_s32 s0, s1; /* pointer to start of destination */ mlib_s16 *dl; /* aligned pointer to destination */ mlib_d64 *dp; /* destination data */ mlib_d64 t0, t1, t2, t3; /* destination data */ mlib_d64 acc0; /* loop variable */ mlib_s32 i; const mlib_s16 *tab0 = (void *)&(((mlib_u8 **)table)[0][HALF_U64]); const mlib_s16 *tab1 = (void *)&(((mlib_u8 **)table)[1][HALF_U64]); const mlib_s16 *tab2 = (void *)&(((mlib_u8 **)table)[2][HALF_U64]); const mlib_s16 *tab3 = (void *)&(((mlib_u8 **)table)[3][HALF_U64]); /* destination data */ mlib_d64 acc1; sp = (void *)src; dl = dst; dp = (mlib_d64 *)dl; vis_alignaddr((void *)0, 6); s0 = (*sp++); if (xsize >= 1) { s1 = (*sp++); vis_write_bmask(0x012389ab, 0); #pragma pipeloop(0) for (i = 0; i <= xsize - 2; i++) { t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s1)); t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1)); t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1)); t0 = VIS_LD_U16_I(tab3, ((mlib_addr)2 * s0)); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); s0 = s1; s1 = (*sp++); (*dp++) = vis_bshuffle(acc0, acc1); } t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s1)); t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1)); t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1)); t0 = VIS_LD_U16_I(tab3, ((mlib_addr)2 * s0)); acc1 = vis_faligndata(t3, acc1); acc1 = vis_faligndata(t2, acc1); acc0 = vis_faligndata(t1, acc0); acc0 = vis_faligndata(t0, acc0); s0 = s1; (*dp++) = vis_bshuffle(acc0, acc1); } dl = (mlib_s16 *)dp; dl[0] = tab3[s0]; }
void mlib_v_ImageLookUpSI_S32_S16_3_D1( const mlib_s32 *src, mlib_s16 *dst, mlib_s32 xsize, const mlib_s16 **table) { /* pointer to source data */ mlib_s32 *sp; /* pointer to start of destination */ mlib_s16 *dl; /* aligned pointer to destination */ mlib_d64 *dp; /* destination data */ mlib_d64 t0, t1, t2, t3; /* destination data */ mlib_d64 acc00, acc01, acc02; /* destination data */ mlib_d64 acc10, acc11, acc12; /* loop variable */ mlib_s32 i; const mlib_s16 *tab0 = (void *)&(((mlib_u8 **)table)[0][HALF_U64]); const mlib_s16 *tab1 = (void *)&(((mlib_u8 **)table)[1][HALF_U64]); const mlib_s16 *tab2 = (void *)&(((mlib_u8 **)table)[2][HALF_U64]); mlib_s32 s00, s01, s02, s03; sp = (void *)src; dl = dst; dp = (mlib_d64 *)dl; vis_alignaddr((void *)0, 6); i = 0; if (xsize >= 4) { s00 = sp[0]; s01 = sp[1]; s02 = sp[2]; s03 = sp[3]; sp += 4; vis_write_bmask(0x012389ab, 0); #pragma pipeloop(0) for (i = 0; i <= xsize - 8; i += 4, sp += 4) { t3 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s01)); t2 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s00)); t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s00)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s00)); acc10 = vis_faligndata(t3, acc10); acc10 = vis_faligndata(t2, acc10); acc00 = vis_faligndata(t1, acc00); acc00 = vis_faligndata(t0, acc00); t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s02)); t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s02)); t1 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s01)); t0 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s01)); acc11 = vis_faligndata(t3, acc11); acc11 = vis_faligndata(t2, acc11); acc01 = vis_faligndata(t1, acc01); acc01 = vis_faligndata(t0, acc01); t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s03)); t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s03)); t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s03)); t0 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s02)); acc12 = vis_faligndata(t3, acc12); acc12 = vis_faligndata(t2, acc12); acc02 = vis_faligndata(t1, acc02); acc02 = vis_faligndata(t0, acc02); s00 = sp[0]; s01 = sp[1]; s02 = sp[2]; s03 = sp[3]; (*dp++) = vis_bshuffle(acc00, acc10); (*dp++) = vis_bshuffle(acc01, acc11); (*dp++) = vis_bshuffle(acc02, acc12); } t3 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s01)); t2 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s00)); t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s00)); t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s00)); acc10 = vis_faligndata(t3, acc10); acc10 = vis_faligndata(t2, acc10); acc00 = vis_faligndata(t1, acc00); acc00 = vis_faligndata(t0, acc00); t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s02)); t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s02)); t1 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s01)); t0 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s01)); acc11 = vis_faligndata(t3, acc11); acc11 = vis_faligndata(t2, acc11); acc01 = vis_faligndata(t1, acc01); acc01 = vis_faligndata(t0, acc01); t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s03)); t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s03)); t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s03)); t0 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s02)); acc12 = vis_faligndata(t3, acc12); acc12 = vis_faligndata(t2, acc12); acc02 = vis_faligndata(t1, acc02); acc02 = vis_faligndata(t0, acc02); (*dp++) = vis_bshuffle(acc00, acc10); (*dp++) = vis_bshuffle(acc01, acc11); (*dp++) = vis_bshuffle(acc02, acc12); i += 4; } dl = (mlib_s16 *)dp; #pragma pipeloop(0) for (; i < xsize; i++) { s00 = sp[0]; dl[0] = tab0[s00]; dl[1] = tab1[s00]; dl[2] = tab2[s00]; dl += 3; sp++; } }