コード例 #1
0
void
mlib_v_ImageLookUpSI_U16_S16_2_D1(
    const mlib_u16 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 **table)
{
/* pointer to source data */
	mlib_u16 *sp;

/* source data */
	mlib_s32 s0, s1, s2;

/* pointer to start of destination */
	mlib_s16 *dl;

/* aligned pointer to destination */
	mlib_d64 *dp;

/* destination data */
	mlib_d64 t0, t1, t2;

/* destination data */
	mlib_d64 t3, acc0;

/* loop variable */
	mlib_s32 i;
	const mlib_s16 *tab0 = &table[0][0];
	const mlib_s16 *tab1 = &table[1][0];

/* destination data */
	mlib_d64 acc1;

	sp = (void *)src;
	dl = dst;

	vis_alignaddr((void *)0, 6);

	s0 = (*sp++);
	(*dl++) = tab0[s0];
	dp = (mlib_d64 *)dl;
	xsize--;
	s0 <<= 1;

	if (xsize >= 2) {

		s1 = (sp[0] << 1);
		s2 = (sp[1] << 1);
		sp += 2;

		vis_write_bmask(0x012389ab, 0);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
			t3 = VIS_LD_U16_I(tab0, s2);
			t2 = VIS_LD_U16_I(tab1, s1);
			t1 = VIS_LD_U16_I(tab0, s1);
			t0 = VIS_LD_U16_I(tab1, s0);
			acc1 = vis_faligndata(t3, acc1);
			acc1 = vis_faligndata(t2, acc1);
			acc0 = vis_faligndata(t1, acc0);
			acc0 = vis_faligndata(t0, acc0);
			s0 = s2;
			s1 = (sp[0] << 1);
			s2 = (sp[1] << 1);
			(*dp++) = vis_bshuffle(acc0, acc1);
		}

		t3 = VIS_LD_U16_I(tab0, s2);
		t2 = VIS_LD_U16_I(tab1, s1);
		t1 = VIS_LD_U16_I(tab0, s1);
		t0 = VIS_LD_U16_I(tab1, s0);
		acc1 = vis_faligndata(t3, acc1);
		acc1 = vis_faligndata(t2, acc1);
		acc0 = vis_faligndata(t1, acc0);
		acc0 = vis_faligndata(t0, acc0);
		s0 = s2;
		(*dp++) = vis_bshuffle(acc0, acc1);
	}

	dl = (mlib_s16 *)dp;

	if ((xsize & 1) != 0) {
		s1 = (sp[0] << 1);
		t1 = VIS_LD_U16_I(tab0, s1);
		t0 = VIS_LD_U16_I(tab1, s0);
		acc0 = vis_faligndata(t1, acc0);
		acc0 = vis_faligndata(t0, acc0);
		*(mlib_f32 *)dp = vis_read_hi(acc0);
		s0 = s1;
		dl += 2;
	}

	s0 >>= 1;
	*dl = tab1[s0];
}
コード例 #2
0
void
mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 **table)
{
    /* pointer to source data */
    mlib_s32 *sp;

    /* source data */
    mlib_s32 s0, s1;

    /* pointer to start of destination */
    mlib_s16 *dl;

    /* aligned pointer to destination */
    mlib_d64 *dp;

    /* destination data */
    mlib_d64 t0, t1, t2, t3;

    /* destination data */
    mlib_d64 acc;

    /* loop variable */
    mlib_s32 i;
    const mlib_s16 *tab0 = &table[0][(mlib_u32)2147483648u];
    const mlib_s16 *tab1 = &table[1][(mlib_u32)2147483648u];
    const mlib_s16 *tab2 = &table[2][(mlib_u32)2147483648u];
    const mlib_s16 *tab3 = &table[3][(mlib_u32)2147483648u];

    sp = (void *)src;
    dl = dst;
    dp = (mlib_d64 *)dl;

    vis_alignaddr((void *)0, 6);

    s0 = (*sp++);

    if (xsize >= 1) {

        s1 = (*sp++);

#pragma pipeloop(0)
        for (i = 0; i <= xsize - 2; i++) {
            t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s1));
            t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1));
            t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1));
            t0 = VIS_LD_U16_I(tab3, ((mlib_addr)2 * s0));
            acc = vis_faligndata(t3, acc);
            acc = vis_faligndata(t2, acc);
            acc = vis_faligndata(t1, acc);
            acc = vis_faligndata(t0, acc);
            s0 = s1;
            s1 = (*sp++);
            (*dp++) = acc;
        }

        t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s1));
        t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1));
        t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1));
        t0 = VIS_LD_U16_I(tab3, ((mlib_addr)2 * s0));
        acc = vis_faligndata(t3, acc);
        acc = vis_faligndata(t2, acc);
        acc = vis_faligndata(t1, acc);
        acc = vis_faligndata(t0, acc);
        s0 = s1;
        (*dp++) = acc;
    }

    dl = (mlib_s16 *)dp;

    dl[0] = tab3[s0];
}
コード例 #3
0
void
mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 **table)
{
    /* pointer to source data */
    mlib_s32 *sp;

    /* source data */
    mlib_s32 s0, s1;

    /* pointer to start of destination */
    mlib_s16 *dl;

    /* aligned pointer to destination */
    mlib_d64 *dp;

    /* destination data */
    mlib_d64 t0, t1, t2;

    /* destination data */
    mlib_d64 t3, acc;

    /* loop variable */
    mlib_s32 i;
    const mlib_s16 *tab0 = &table[0][(mlib_u32)2147483648u];
    const mlib_s16 *tab1 = &table[1][(mlib_u32)2147483648u];

    sp = (void *)src;
    dl = dst;
    dp = (mlib_d64 *)dl;

    vis_alignaddr((void *)0, 6);

    if (xsize >= 2) {

        s0 = sp[0];
        s1 = sp[1];
        sp += 2;

#pragma pipeloop(0)
        for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
            t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1));
            t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1));
            t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0));
            t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0));
            acc = vis_faligndata(t3, acc);
            acc = vis_faligndata(t2, acc);
            acc = vis_faligndata(t1, acc);
            acc = vis_faligndata(t0, acc);
            s0 = sp[0];
            s1 = sp[1];
            (*dp++) = acc;
        }

        t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1));
        t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1));
        t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0));
        t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0));
        acc = vis_faligndata(t3, acc);
        acc = vis_faligndata(t2, acc);
        acc = vis_faligndata(t1, acc);
        acc = vis_faligndata(t0, acc);
        (*dp++) = acc;
    }

    if ((xsize & 1) != 0) {
        s0 = sp[0];
        t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0));
        t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0));
        acc = vis_faligndata(t1, acc);
        acc = vis_faligndata(t0, acc);
        *(mlib_f32 *)dp = vis_read_hi(acc);
    }
}
void mlib_v_ImageLookUpSI_S16_S16_4_DstOff2_D1(const mlib_s16 *src,
                                               mlib_s16       *dst,
                                               mlib_s32       xsize,
                                               const mlib_s16 **table)
{
  mlib_s16 *sp;              /* pointer to source data */
  mlib_s32 s0, s1;           /* source data */
  mlib_s16 *dl;              /* pointer to start of destination */
  mlib_d64 *dp;              /* aligned pointer to destination */
  mlib_d64 t0, t1, t2, t3;   /* destination data */
  mlib_d64 acc;              /* destination data */
  mlib_s32 i;                /* loop variable */
  const mlib_s16 *tab0 = &table[0][32768];
  const mlib_s16 *tab1 = &table[1][32768];
  const mlib_s16 *tab2 = &table[2][32768];
  const mlib_s16 *tab3 = &table[3][32768];

  sp   = (void *)src;
  dl   = dst;
  dp   = (mlib_d64 *) dl;

  vis_alignaddr((void *) 0, 6);

  s0 = (*sp++) << 1;

  if (xsize >= 1) {

    s1 = (*sp++) << 1;

#pragma pipeloop(0)
    for(i = 0; i <= xsize - 2; i++) {
      t3 = VIS_LD_U16_I(tab1, s1);
      t2 = VIS_LD_U16_I(tab0, s1);
      t1 = VIS_LD_U16_I(tab3, s0);
      t0 = VIS_LD_U16_I(tab2, s0);
      acc = vis_faligndata(t3, acc);
      acc = vis_faligndata(t2, acc);
      acc = vis_faligndata(t1, acc);
      acc = vis_faligndata(t0, acc);
      s0 = s1;
      s1 = (*sp++) << 1;
      *dp++ = acc;
    }

    t3 = VIS_LD_U16_I(tab1, s1);
    t2 = VIS_LD_U16_I(tab0, s1);
    t1 = VIS_LD_U16_I(tab3, s0);
    t0 = VIS_LD_U16_I(tab2, s0);
    acc = vis_faligndata(t3, acc);
    acc = vis_faligndata(t2, acc);
    acc = vis_faligndata(t1, acc);
    acc = vis_faligndata(t0, acc);
    s0 = s1;
    *dp++ = acc;
  }

  dl = (mlib_s16*)dp;
  s0 >>= 1;

  dl[0] = tab2[s0];
  dl[1] = tab3[s0];
}
コード例 #5
0
void
mlib_v_ImageLookUpSI_S32_S16_3_D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 **table)
{
    /* pointer to source data */
    mlib_s32 *sp;

    /* pointer to start of destination */
    mlib_s16 *dl;

    /* aligned pointer to destination */
    mlib_d64 *dp;

    /* destination data */
    mlib_d64 t0, t1, t2, t3;

    /* destination data */
    mlib_d64 acc0, acc1, acc2;

    /* loop variable */
    mlib_s32 i;
    const mlib_s16 *tab0 = &table[0][(mlib_u32)2147483648u];
    const mlib_s16 *tab1 = &table[1][(mlib_u32)2147483648u];
    const mlib_s16 *tab2 = &table[2][(mlib_u32)2147483648u];
    mlib_s32 s00, s01, s02, s03;

    sp = (void *)src;
    dl = dst;
    dp = (mlib_d64 *)dl;

    vis_alignaddr((void *)0, 6);

    i = 0;

    if (xsize >= 4) {

        s00 = sp[0];
        s01 = sp[1];
        s02 = sp[2];
        s03 = sp[3];
        sp += 4;

#pragma pipeloop(0)
        for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
            t3 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s01));
            t2 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s00));
            t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s00));
            t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s00));
            acc0 = vis_faligndata(t3, acc0);
            acc0 = vis_faligndata(t2, acc0);
            acc0 = vis_faligndata(t1, acc0);
            acc0 = vis_faligndata(t0, acc0);
            t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s02));
            t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s02));
            t1 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s01));
            t0 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s01));
            acc1 = vis_faligndata(t3, acc1);
            acc1 = vis_faligndata(t2, acc1);
            acc1 = vis_faligndata(t1, acc1);
            acc1 = vis_faligndata(t0, acc1);
            t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s03));
            t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s03));
            t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s03));
            t0 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s02));
            acc2 = vis_faligndata(t3, acc2);
            acc2 = vis_faligndata(t2, acc2);
            acc2 = vis_faligndata(t1, acc2);
            acc2 = vis_faligndata(t0, acc2);
            s00 = sp[0];
            s01 = sp[1];
            s02 = sp[2];
            s03 = sp[3];
            (*dp++) = acc0;
            (*dp++) = acc1;
            (*dp++) = acc2;
        }

        t3 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s01));
        t2 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s00));
        t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s00));
        t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s00));
        acc0 = vis_faligndata(t3, acc0);
        acc0 = vis_faligndata(t2, acc0);
        acc0 = vis_faligndata(t1, acc0);
        acc0 = vis_faligndata(t0, acc0);
        t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s02));
        t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s02));
        t1 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s01));
        t0 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s01));
        acc1 = vis_faligndata(t3, acc1);
        acc1 = vis_faligndata(t2, acc1);
        acc1 = vis_faligndata(t1, acc1);
        acc1 = vis_faligndata(t0, acc1);
        t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s03));
        t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s03));
        t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s03));
        t0 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s02));
        acc2 = vis_faligndata(t3, acc2);
        acc2 = vis_faligndata(t2, acc2);
        acc2 = vis_faligndata(t1, acc2);
        acc2 = vis_faligndata(t0, acc2);
        (*dp++) = acc0;
        (*dp++) = acc1;
        (*dp++) = acc2;
        i += 4;
    }

    dl = (mlib_s16 *)dp;

#pragma pipeloop(0)
    for (; i < xsize; i++) {
        s00 = sp[0];
        dl[0] = tab0[s00];
        dl[1] = tab1[s00];
        dl[2] = tab2[s00];
        dl += 3;
        sp++;
    }
}
コード例 #6
0
void mlib_v_ImageLookUp_U16_U16_124_D1(const mlib_u16 *src,
                                       mlib_u16       *dst,
                                       mlib_s32       xsize,
                                       const mlib_u16 *table0,
                                       const mlib_u16 *table1,
                                       const mlib_u16 *table2,
                                       const mlib_u16 *table3)
{
  mlib_u16 *sp;            /* pointer to source data */
  mlib_s32 s0, s1, s2, s3; /* source data */
  mlib_u16 *dl;            /* pointer to start of destination */
  mlib_u16 *dend;          /* pointer to end of destination */
  mlib_d64 *dp;            /* aligned pointer to destination */
  mlib_d64 t0, t1, t2;     /* destination data */
  mlib_d64 t3, acc0;       /* destination data */
  mlib_s32 emask;          /* edge mask */
  mlib_s32 i, num;         /* loop variable */

  dl   = dst;
  sp   = (void *)src;
  dp   = (mlib_d64 *) dl;
  dend = dl + xsize - 1;

  vis_alignaddr((void *) 0, 6);

  i = 0;

  if (xsize >= 4) {

    s0 = sp[0];
    s1 = sp[1];
    s2 = sp[2];
    s3 = sp[3];
    sp += 4;

#pragma pipeloop(0)
    for(i = 0; i <= xsize - 8; i+=4, sp += 4) {
      t3 = VIS_LD_U16_I(table3, 2*s3);
      t2 = VIS_LD_U16_I(table2, 2*s2);
      t1 = VIS_LD_U16_I(table1, 2*s1);
      t0 = VIS_LD_U16_I(table0, 2*s0);
      acc0 = vis_faligndata(t3, acc0);
      acc0 = vis_faligndata(t2, acc0);
      acc0 = vis_faligndata(t1, acc0);
      acc0 = vis_faligndata(t0, acc0);
      s0 = sp[0];
      s1 = sp[1];
      s2 = sp[2];
      s3 = sp[3];
      *dp++ = acc0;
    }

    t3 = VIS_LD_U16_I(table3, 2*s3);
    t2 = VIS_LD_U16_I(table2, 2*s2);
    t1 = VIS_LD_U16_I(table1, 2*s1);
    t0 = VIS_LD_U16_I(table0, 2*s0);
    acc0 = vis_faligndata(t3, acc0);
    acc0 = vis_faligndata(t2, acc0);
    acc0 = vis_faligndata(t1, acc0);
    acc0 = vis_faligndata(t0, acc0);
    *dp++ = acc0;
  }

  if ((mlib_addr) dp <= (mlib_addr) dend) {

    num = (mlib_u16*) dend - (mlib_u16*) dp;
    sp  += num;
    num ++;

    if (num == 1) {
      s0 = (mlib_s32) *sp;
      sp --;

      t0  = VIS_LD_U16_I(table0, 2*s0);
      acc0 = vis_faligndata(t0, acc0);
    } else if (num  == 2) {
      s0 = (mlib_s32) *sp;
      sp --;

      t0  = VIS_LD_U16_I(table1, 2*s0);
      acc0 = vis_faligndata(t0, acc0);

      s0 = (mlib_s32) *sp;
      sp --;

      t0  = VIS_LD_U16_I(table0, 2*s0);
      acc0 = vis_faligndata(t0, acc0);
    } else if (num == 3) {
      s0 = (mlib_s32) *sp;
      sp --;

      t0  = VIS_LD_U16_I(table2, 2*s0);
      acc0 = vis_faligndata(t0, acc0);

      s0 = (mlib_s32) *sp;
      sp --;

      t0  = VIS_LD_U16_I(table1, 2*s0);
      acc0 = vis_faligndata(t0, acc0);

      s0 = (mlib_s32) *sp;
      sp --;

      t0  = VIS_LD_U16_I(table0, 2*s0);
      acc0 = vis_faligndata(t0, acc0);
    }

    emask = vis_edge16(dp, dend);
    vis_pst_16(acc0, dp, emask);
  }
}
コード例 #7
0
void
mlib_v_ImageLookUp_S16_S16_3_D1(
    const mlib_s16 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 *table0,
    const mlib_s16 *table1,
    const mlib_s16 *table2)
{
/* pointer to source data */
	mlib_s16 *sp;

/* source data */
	mlib_s32 s0, s1, s2, s3;

/* pointer to start of destination */
	mlib_s16 *dl;

/* pointer to end of destination */
	mlib_s16 *dend;

/* aligned pointer to destination */
	mlib_d64 *dp;

/* destination data */
	mlib_d64 t0, t1, t2, t3;

/* destination data */
	mlib_d64 acc0, acc1;

/* edge mask */
	mlib_s32 emask;

/* loop variable */
	mlib_s32 i, num;
	const mlib_s16 *table;

	dl = dst;
	sp = (void *)src;
	dp = (mlib_d64 *)dl;
	dend = dl + xsize - 1;

	vis_alignaddr((void *)0, 6);

	i = 0;

	if (xsize >= 4) {

		s0 = sp[0] << 1;
		s1 = sp[1] << 1;
		s2 = sp[2] << 1;
		s3 = sp[3] << 1;
		sp += 4;

		vis_write_bmask(0x012389ab, 0);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
			t3 = VIS_LD_U16_I(table0, s3);
			t2 = VIS_LD_U16_I(table2, s2);
			t1 = VIS_LD_U16_I(table1, s1);
			t0 = VIS_LD_U16_I(table0, s0);
			acc1 = vis_faligndata(t3, acc1);
			acc1 = vis_faligndata(t2, acc1);
			acc0 = vis_faligndata(t1, acc0);
			acc0 = vis_faligndata(t0, acc0);
			s0 = sp[0] << 1;
			s1 = sp[1] << 1;
			s2 = sp[2] << 1;
			s3 = sp[3] << 1;
			(*dp++) = vis_bshuffle(acc0, acc1);
			table = table0;
			table0 = table1;
			table1 = table2;
			table2 = table;
		}

		t3 = VIS_LD_U16_I(table0, s3);
		t2 = VIS_LD_U16_I(table2, s2);
		t1 = VIS_LD_U16_I(table1, s1);
		t0 = VIS_LD_U16_I(table0, s0);
		acc1 = vis_faligndata(t3, acc1);
		acc1 = vis_faligndata(t2, acc1);
		acc0 = vis_faligndata(t1, acc0);
		acc0 = vis_faligndata(t0, acc0);
		(*dp++) = vis_bshuffle(acc0, acc1);
		table = table0;
		table0 = table1;
		table1 = table2;
		table2 = table;
		i += 4;
	}

	if ((mlib_addr)dp <= (mlib_addr)dend) {

		num = (mlib_s16 *)dend - (mlib_s16 *)dp;
		sp += num;
		num++;

		if (num == 1) {
			s0 = (mlib_s32)*sp;
			sp--;

			t0 = VIS_LD_U16_I(table0, s0 << 1);
			acc0 = vis_faligndata(t0, acc0);
		} else if (num == 2) {
			s0 = (mlib_s32)*sp;
			sp--;

			t0 = VIS_LD_U16_I(table1, s0 << 1);
			acc0 = vis_faligndata(t0, acc0);

			s0 = (mlib_s32)*sp;
			sp--;

			t0 = VIS_LD_U16_I(table0, s0 << 1);
			acc0 = vis_faligndata(t0, acc0);
		} else if (num == 3) {
			s0 = (mlib_s32)*sp;
			sp--;

			t0 = VIS_LD_U16_I(table2, s0 << 1);
			acc0 = vis_faligndata(t0, acc0);

			s0 = (mlib_s32)*sp;
			sp--;

			t0 = VIS_LD_U16_I(table1, s0 << 1);
			acc0 = vis_faligndata(t0, acc0);

			s0 = (mlib_s32)*sp;
			sp--;

			t0 = VIS_LD_U16_I(table0, s0 << 1);
			acc0 = vis_faligndata(t0, acc0);
		}

		emask = vis_edge16(dp, dend);
		vis_pst_16(acc0, dp, emask);
	}
}
コード例 #8
0
void
mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 **table)
{
/* pointer to source data */
	mlib_s32 *sp;

/* source data */
	mlib_s32 s0, s1;

/* pointer to start of destination */
	mlib_s16 *dl;

/* aligned pointer to destination */
	mlib_d64 *dp;

/* destination data */
	mlib_d64 t0, t1, t2;

/* destination data */
	mlib_d64 t3, acc0;

/* loop variable */
	mlib_s32 i;
	const mlib_s16 *tab0 = (void *)&(((mlib_u8 **)table)[0][HALF_U64]);
	const mlib_s16 *tab1 = (void *)&(((mlib_u8 **)table)[1][HALF_U64]);

/* destination data */
	mlib_d64 acc1;

	sp = (void *)src;
	dl = dst;
	dp = (mlib_d64 *)dl;

	vis_alignaddr((void *)0, 6);

	if (xsize >= 2) {

		s0 = sp[0];
		s1 = sp[1];
		sp += 2;

		vis_write_bmask(0x012389ab, 0);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
			t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1));
			t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1));
			t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0));
			t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0));
			acc1 = vis_faligndata(t3, acc1);
			acc1 = vis_faligndata(t2, acc1);
			acc0 = vis_faligndata(t1, acc0);
			acc0 = vis_faligndata(t0, acc0);
			s0 = sp[0];
			s1 = sp[1];
			(*dp++) = vis_bshuffle(acc0, acc1);
		}

		t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1));
		t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1));
		t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0));
		t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0));
		acc1 = vis_faligndata(t3, acc1);
		acc1 = vis_faligndata(t2, acc1);
		acc0 = vis_faligndata(t1, acc0);
		acc0 = vis_faligndata(t0, acc0);
		(*dp++) = vis_bshuffle(acc0, acc1);
	}

	if ((xsize & 1) != 0) {
		s0 = sp[0];
		t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s0));
		t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s0));
		acc0 = vis_faligndata(t1, acc0);
		acc0 = vis_faligndata(t0, acc0);
		*(mlib_f32 *)dp = vis_read_hi(acc0);
	}
}
コード例 #9
0
void
mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 **table)
{
/* pointer to source data */
	mlib_s32 *sp;

/* source data */
	mlib_s32 s0, s1;

/* pointer to start of destination */
	mlib_s16 *dl;

/* aligned pointer to destination */
	mlib_d64 *dp;

/* destination data */
	mlib_d64 t0, t1, t2, t3;

/* destination data */
	mlib_d64 acc0;

/* loop variable */
	mlib_s32 i;
	const mlib_s16 *tab0 = (void *)&(((mlib_u8 **)table)[0][HALF_U64]);
	const mlib_s16 *tab1 = (void *)&(((mlib_u8 **)table)[1][HALF_U64]);
	const mlib_s16 *tab2 = (void *)&(((mlib_u8 **)table)[2][HALF_U64]);
	const mlib_s16 *tab3 = (void *)&(((mlib_u8 **)table)[3][HALF_U64]);

/* destination data */
	mlib_d64 acc1;

	sp = (void *)src;
	dl = dst;
	dp = (mlib_d64 *)dl;

	vis_alignaddr((void *)0, 6);

	s0 = (*sp++);

	if (xsize >= 1) {

		s1 = (*sp++);

		vis_write_bmask(0x012389ab, 0);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 2; i++) {
			t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s1));
			t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1));
			t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1));
			t0 = VIS_LD_U16_I(tab3, ((mlib_addr)2 * s0));
			acc1 = vis_faligndata(t3, acc1);
			acc1 = vis_faligndata(t2, acc1);
			acc0 = vis_faligndata(t1, acc0);
			acc0 = vis_faligndata(t0, acc0);
			s0 = s1;
			s1 = (*sp++);
			(*dp++) = vis_bshuffle(acc0, acc1);
		}

		t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s1));
		t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s1));
		t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s1));
		t0 = VIS_LD_U16_I(tab3, ((mlib_addr)2 * s0));
		acc1 = vis_faligndata(t3, acc1);
		acc1 = vis_faligndata(t2, acc1);
		acc0 = vis_faligndata(t1, acc0);
		acc0 = vis_faligndata(t0, acc0);
		s0 = s1;
		(*dp++) = vis_bshuffle(acc0, acc1);
	}

	dl = (mlib_s16 *)dp;

	dl[0] = tab3[s0];
}
コード例 #10
0
void
mlib_v_ImageLookUpSI_S32_S16_3_D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 **table)
{
/* pointer to source data */
	mlib_s32 *sp;

/* pointer to start of destination */
	mlib_s16 *dl;

/* aligned pointer to destination */
	mlib_d64 *dp;

/* destination data */
	mlib_d64 t0, t1, t2, t3;

/* destination data */
	mlib_d64 acc00, acc01, acc02;

/* destination data */
	mlib_d64 acc10, acc11, acc12;

/* loop variable */
	mlib_s32 i;
	const mlib_s16 *tab0 = (void *)&(((mlib_u8 **)table)[0][HALF_U64]);
	const mlib_s16 *tab1 = (void *)&(((mlib_u8 **)table)[1][HALF_U64]);
	const mlib_s16 *tab2 = (void *)&(((mlib_u8 **)table)[2][HALF_U64]);
	mlib_s32 s00, s01, s02, s03;

	sp = (void *)src;
	dl = dst;
	dp = (mlib_d64 *)dl;

	vis_alignaddr((void *)0, 6);

	i = 0;

	if (xsize >= 4) {

		s00 = sp[0];
		s01 = sp[1];
		s02 = sp[2];
		s03 = sp[3];
		sp += 4;

		vis_write_bmask(0x012389ab, 0);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
			t3 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s01));
			t2 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s00));
			t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s00));
			t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s00));
			acc10 = vis_faligndata(t3, acc10);
			acc10 = vis_faligndata(t2, acc10);
			acc00 = vis_faligndata(t1, acc00);
			acc00 = vis_faligndata(t0, acc00);
			t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s02));
			t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s02));
			t1 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s01));
			t0 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s01));
			acc11 = vis_faligndata(t3, acc11);
			acc11 = vis_faligndata(t2, acc11);
			acc01 = vis_faligndata(t1, acc01);
			acc01 = vis_faligndata(t0, acc01);
			t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s03));
			t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s03));
			t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s03));
			t0 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s02));
			acc12 = vis_faligndata(t3, acc12);
			acc12 = vis_faligndata(t2, acc12);
			acc02 = vis_faligndata(t1, acc02);
			acc02 = vis_faligndata(t0, acc02);
			s00 = sp[0];
			s01 = sp[1];
			s02 = sp[2];
			s03 = sp[3];
			(*dp++) = vis_bshuffle(acc00, acc10);
			(*dp++) = vis_bshuffle(acc01, acc11);
			(*dp++) = vis_bshuffle(acc02, acc12);
		}

		t3 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s01));
		t2 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s00));
		t1 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s00));
		t0 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s00));
		acc10 = vis_faligndata(t3, acc10);
		acc10 = vis_faligndata(t2, acc10);
		acc00 = vis_faligndata(t1, acc00);
		acc00 = vis_faligndata(t0, acc00);
		t3 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s02));
		t2 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s02));
		t1 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s01));
		t0 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s01));
		acc11 = vis_faligndata(t3, acc11);
		acc11 = vis_faligndata(t2, acc11);
		acc01 = vis_faligndata(t1, acc01);
		acc01 = vis_faligndata(t0, acc01);
		t3 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s03));
		t2 = VIS_LD_U16_I(tab1, ((mlib_addr)2 * s03));
		t1 = VIS_LD_U16_I(tab0, ((mlib_addr)2 * s03));
		t0 = VIS_LD_U16_I(tab2, ((mlib_addr)2 * s02));
		acc12 = vis_faligndata(t3, acc12);
		acc12 = vis_faligndata(t2, acc12);
		acc02 = vis_faligndata(t1, acc02);
		acc02 = vis_faligndata(t0, acc02);
		(*dp++) = vis_bshuffle(acc00, acc10);
		(*dp++) = vis_bshuffle(acc01, acc11);
		(*dp++) = vis_bshuffle(acc02, acc12);
		i += 4;
	}

	dl = (mlib_s16 *)dp;

#pragma pipeloop(0)
	for (; i < xsize; i++) {
		s00 = sp[0];
		dl[0] = tab0[s00];
		dl[1] = tab1[s00];
		dl[2] = tab2[s00];
		dl += 3;
		sp++;
	}
}