void mlib_v_ImageLookUp_S16_S16_4(mlib_s16 *src, mlib_s32 slb,
                                  mlib_s16 *dst, mlib_s32 dlb,
                                  mlib_s32 xsize, mlib_s32 ysize,
                                  mlib_s16 **table)
{
  mlib_s16  *sl;
  mlib_s16  *dl;
  mlib_s16  *tab;
  mlib_s32  j;

  sl = src;
  dl = dst;

  /* row loop */
  for (j = 0; j < ysize; j ++) {
    mlib_s16  *sp = sl;
    mlib_s16  *dp = dl;
    mlib_s16 *tab0 = &table[0][32768];
    mlib_s16 *tab1 = &table[1][32768];
    mlib_s16 *tab2 = &table[2][32768];
    mlib_s16 *tab3 = &table[3][32768];
    mlib_s32  off, size = xsize * 4;

    off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;

    off = (off < size) ? off : size;

    if (off == 1) {
      *dp++ = tab0[sp[0]];
      tab = tab0; tab0 = tab1;
      tab1 = tab2; tab2 = tab3; tab3 = tab;
      size--; sp++;
    } else if (off == 2) {
      *dp++ = tab0[sp[0]];
      *dp++ = tab1[sp[1]];
      tab = tab0; tab0 = tab2; tab2 = tab;
      tab = tab1; tab1 = tab3; tab3 = tab;
      size-=2; sp += 2;
    } else if (off == 3) {
      *dp++ = tab0[sp[0]];
      *dp++ = tab1[sp[1]];
      *dp++ = tab2[sp[2]];
      tab = tab3; tab3 = tab2;
      tab2 = tab1; tab1 = tab0; tab0 = tab;
      size-=3; sp += 3;
    }


    if (size > 0) {
      mlib_v_ImageLookUp_S16_S16_124_D1(sp, dp, size, tab0, tab1, tab2, tab3);
    }

    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
  }
}
void
mlib_v_ImageLookUp_S16_S16_2(
    const mlib_s16 *src,
    mlib_s32 slb,
    mlib_s16 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s16 **table)
{
	mlib_s16 *sl;
	mlib_s16 *dl;
	const mlib_s16 *tab;
	mlib_s32 j, i;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_s16 *sp = sl;
		mlib_s16 *dp = dl;
		mlib_s32 off, size = xsize * 2;
		const mlib_s16 *tab0 = &table[0][32768];
		const mlib_s16 *tab1 = &table[1][32768];

		off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;

		off = (off < size) ? off : size;

		for (i = 0; i < off - 1; i += 2, sp += 2) {
			(*dp++) = tab0[sp[0]];
			(*dp++) = tab1[sp[1]];
			size -= 2;
		}

		if ((off & 1) != 0) {
			(*dp++) = tab0[sp[0]];
			size--;
			sp++;
			tab = tab0;
			tab0 = tab1;
			tab1 = tab;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_S16_S16_124_D1(sp, dp, size, tab0,
			    tab1, tab0, tab1);
		}

		sl = (mlib_s16 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s16 *)((mlib_u8 *)dl + dlb);
	}
}
void
mlib_v_ImageLookUp_S16_S16_1(
    const mlib_s16 *src,
    mlib_s32 slb,
    mlib_s16 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s16 **table)
{
	mlib_s16 *sl;
	mlib_s16 *dl;
	const mlib_s16 *tab = &table[0][32768];
	mlib_s32 j, i;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_s16 *sp = sl;
		mlib_s16 *dp = dl;
		mlib_s32 off, size = xsize;

		off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;

		off = (off < size) ? off : size;

		for (i = 0; i < off; i++, sp++) {
			(*dp++) = tab[sp[0]];
			size--;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_S16_S16_124_D1(sp, dp, size, tab,
			    tab, tab, tab);
		}

		sl = (mlib_s16 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s16 *)((mlib_u8 *)dl + dlb);
	}
}