C++ (Cpp) vec_mradds 예제들

예제 #1

0

파일 보기

파일: yuv2rgb_altivec.c 프로젝트: Brainiarc7/libav

static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y,
                               vector signed short U, vector signed short V,
                               vector signed short *R, vector signed short *G,
                               vector signed short *B)
{
    vector signed short vx, ux, uvx;

    Y = vec_mradds(Y, c->CY, c->OY);
    U = vec_sub(U, (vector signed short)
                       vec_splat((vector signed short) { 128 }, 0));

예제 #2

0

파일 보기

파일: yuv2rgb_altivec.c 프로젝트: Brainiarc7/libav

static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y,
                               vector signed short U, vector signed short V,
                               vector signed short *R, vector signed short *G,
                               vector signed short *B)
{
    vector signed short vx, ux, uvx;

    Y = vec_mradds(Y, c->CY, c->OY);
    U = vec_sub(U, (vector signed short)
                       vec_splat((vector signed short) { 128 }, 0));
    V = vec_sub(V, (vector signed short)
                       vec_splat((vector signed short) { 128 }, 0));

    // ux  = (CBU * (u << c->CSHIFT) + 0x4000) >> 15;
    ux = vec_sl(U, c->CSHIFT);
    *B = vec_mradds(ux, c->CBU, Y);

    // vx  = (CRV * (v << c->CSHIFT) + 0x4000) >> 15;
    vx = vec_sl(V, c->CSHIFT);
    *R = vec_mradds(vx, c->CRV, Y);

    // uvx = ((CGU * u) + (CGV * v)) >> 15;
    uvx = vec_mradds(U, c->CGU, Y);
    *G  = vec_mradds(V, c->CGV, uvx);
}

/*
 * ------------------------------------------------------------------------------
 * CS converters
 * ------------------------------------------------------------------------------
 */

예제 #3

0

파일 보기

파일: PixConvertAltivec.cpp 프로젝트: Jackovic/Gem

void BGRA_to_YCbCr_altivec(const unsigned char *bgradata, size_t BGRA_size,
                           unsigned char *pixels)
{
  vector signed short  r0, r1, r2, g0, g1, g2, b0, b1, b2, c0, c16, c128;
  vector unsigned char z0, tc0, tc1, tc2, tc3;
  vector signed short tr0, tr1, tg0, tg1, tb0, tb1;
  vector signed short t0, t1, t2, t3, t4, t5;
  vector signed short u1, u2, uAvg, v1, v2, vAvg, out1, out2, out3, out4, uv1, uv2;
  unsigned int i;

  const vector unsigned char	*BGRA_ptr = reinterpret_cast<const vector unsigned char*>( bgradata);
  vector unsigned char	*UYVY_ptr = reinterpret_cast<vector unsigned char*>( pixels);

  /* Permutation vector is used to extract the interleaved BGRA. */
  vector unsigned char vPerm1 =
    static_cast<vector unsigned char>( 3,  7, 11, 15, 19, 23, 27, 31, // B0..B7
                            2,  6, 10, 14, 18, 22, 26, 30  /* G0..G7    */);
  vector unsigned char vPerm2 =
    static_cast<vector unsigned char>( 1,  5,  9, 13, 17, 21, 25, 29, /* R0..R7    */
                            0,  0,  0,  0,  0,  0,  0,  0  /* dont care */);

  /* Load the equation constants. */
  vector signed short vConst1 =
    static_cast<vector signed short>( 8432,  16425,  3176,
                           -4818,  -9527, 14345,
                           0,      0 );
  vector signed short vConst2 =
    static_cast<vector signed short>( 14345, -12045, -2300,
                           16, 128, 0, 0, 0 );

  vector unsigned char avgPerm1 =
    static_cast<vector unsigned char>(  0,  1,  4,  5,  8,  9, 12, 13,
                             16, 17, 20, 21, 24, 25, 28, 29 );
  vector unsigned char avgPerm2 =
    static_cast<vector unsigned char>(  2,  3,  6,  7, 10, 11, 14, 15,
                             18, 19, 22, 23, 26, 27, 30, 31 );
  vector unsigned char Perm1 =
    static_cast<vector unsigned char>( 0, 1, 16, 17, 2, 3, 18, 19,
                            4, 5, 20, 21, 6, 7, 22, 23 );
  vector unsigned char Perm2 =
    static_cast<vector unsigned char>(  8,  9, 24, 25, 10, 11, 26, 27,
                             12, 13, 28, 29, 14, 15, 30, 31 );

  r0 = vec_splat( vConst1, 2 ); /*  8432 */
  g0 = vec_splat( vConst1, 1 ); /* 16425 */
  b0 = vec_splat( vConst1, 0 ); /*  3176 */
  r1 = vec_splat( vConst1, 5 ); /* -4818 */
  g1 = vec_splat( vConst1, 4 ); /* -9527 */
  b1 = vec_splat( vConst1, 3 ); /* 14345 */
  r2 = vec_splat( vConst2, 2 ); /* 14345 */
  g2 = vec_splat( vConst2, 1 ); /*-12045 */
  b2 = vec_splat( vConst2, 0 ); /* -2300 */
  c16  = vec_splat( vConst2, 3 ); /*  16 */
  c128 = vec_splat( vConst2, 4 ); /* 128 */
  c0 = static_cast<vector signed short> (0); /*   0 */
  z0 = static_cast<vector unsigned char> (0); /*  0 */

  for ( i = 0; i < (BGRA_size/sizeof(vector unsigned char)); i++ ) {

    /* Load the 4 BGRA input vectors and seperate into red,
       green and blue from the interleaved format. */
    const vector unsigned char *vec1 = BGRA_ptr++;
    const vector unsigned char *vec2 = BGRA_ptr++;
    const vector unsigned char *vec3 = BGRA_ptr++;
    const vector unsigned char *vec4 = BGRA_ptr++;

    tc0 = vec_perm( *vec1, *vec2, vPerm1 ); // B0..B7  G0..G7
    tc1 = vec_perm( *vec1, *vec2, vPerm2 ); // R0..R7
    tc2 = vec_perm( *vec3, *vec4, vPerm1 ); // B8..B15 G8..G15
    tc3 = vec_perm( *vec3, *vec4, vPerm2 ); // R8..R15

    /* Unpack to 16 bit arithmatic for conversion. */
    tr0 = static_cast<vector signed short>(vec_mergeh( z0, tc0 ));  /* tr0 = R0 .. R7  */
    tg0 = static_cast<vector signed short>(vec_mergel( z0, tc0 ));  /* tg0 = G0 .. G7  */
    tb0 = static_cast<vector signed short>(vec_mergeh( z0, tc1 ));  /* tb0 = B0 .. B7  */
    tr1 = static_cast<vector signed short>(vec_mergeh( z0, tc2 ));  /* tr0 = R8 .. R15 */
    tg1 = static_cast<vector signed short>(vec_mergel( z0, tc2 ));  /* tg0 = G8 .. G15 */
    tb1 = static_cast<vector signed short>(vec_mergeh( z0, tc3 ));  /* tb0 = B8 .. B15 */

    /* Convert the first three input vectors.  Note that
       only the top 17 bits of the 32 bit product are
       stored.  This is the same as doing the divide by 32768. */

    t0 = vec_mradds( tr0, r0, c0 ); /* (R0 .. R7) *  8432 */
    t1 = vec_mradds( tr0, r1, c0 ); /* (R0 .. R7) * -4818 */
    t2 = vec_mradds( tr0, r2, c0 ); /* (R0 .. R7) * 14345 */

    t0 = vec_mradds( tg0, g0, t0 ); /* += (G0 .. G7) *  16425 */
    t1 = vec_mradds( tg0, g1, t1 ); /* += (G0 .. G7) *  -9527 */
    t2 = vec_mradds( tg0, g2, t2 ); /* += (G0 .. G7) * -12045 */

    t0 = vec_mradds( tb0, b0, t0 ); /* += (B0 .. B7) *  3176 */
    t1 = vec_mradds( tb0, b1, t1 ); /* += (B0 .. B7) * 14345 */
    t2 = vec_mradds( tb0, b2, t2 ); /* += (B0 .. B7) * -2300 */

    /* Convert the next three input vectors. */
    t3 = vec_mradds( tr1, r0, c0 ); /* (R8 .. R15) *  8432 */
    t4 = vec_mradds( tr1, r1, c0 ); /* (R8 .. R15) * -4818 */
    t5 = vec_mradds( tr1, r2, c0 ); /* (R8 .. R15) * 14345 */

    t3 = vec_mradds( tg1, g0, t3 ); /* += (G8 .. G15) *  16425 */
    t4 = vec_mradds( tg1, g1, t4 ); /* += (G8 .. G15) *  -9527 */
    t5 = vec_mradds( tg1, g2, t5 ); /* += (G8 .. G15) * -12045 */

    t3 = vec_mradds( tb1, b0, t3 ); /* += (B8 .. B15) *  3176 */
    t4 = vec_mradds( tb1, b1, t4 ); /* += (B8 .. B15) * 14345 */
    t5 = vec_mradds( tb1, b2, t5 ); /* += (B8 .. B15) * -2300 */

    /* Add the constants. */
    t0 = vec_adds( t0, c16 );
    t3 = vec_adds( t3, c16 );
    t1 = vec_adds( t1, c128 );
    t4 = vec_adds( t4, c128 );
    t2 = vec_adds( t2, c128 );
    t5 = vec_adds( t5, c128 );

    u1 = vec_perm( t1, t4, avgPerm1 ); // rearrange U's for averaging
    u2 = vec_perm( t1, t4, avgPerm2 );
    uAvg = vec_avg( u1, u2 );
    v1 = vec_perm( t2, t5, avgPerm1 ); // rearrange V's for averaging
    v2 = vec_perm( t2, t5, avgPerm2 );
    vAvg = vec_avg( v1, v2 );

    uv1 = vec_perm( uAvg, vAvg, Perm1 );
    uv2 = vec_perm( uAvg, vAvg, Perm2 );
    out1 = vec_perm( uv1, t0, Perm1 );
    out2 = vec_perm( uv1, t0, Perm2 );
    out3 = vec_perm( uv2, t3, Perm1 );
    out4 = vec_perm( uv2, t3, Perm2 );

    *UYVY_ptr = vec_packsu( out1, out2 );	// pack down to char's
    UYVY_ptr++;
    *UYVY_ptr = vec_packsu( out3, out4 );
    UYVY_ptr++;
  }
}

예제 #4

0

파일 보기

파일: dct.c 프로젝트: 0day-ci/gcc

void
dct_vmx (vector signed short *input, vector signed short *output,
	 vector signed short *postscale)
{
  vector signed short mul0, mul1, mul2, mul3, mul4, mul5, mul6, mul;
  vector signed short v0, v1, v2, v3, v4, v5, v6, v7, v8, v9;
  vector signed short v20, v21, v22, v23, v24, v25, v26, v27, v31;
  int i;
  vector signed short in[8], out[8];

  /* Load first eight rows of input data */

  /* Load multiplication constants */

  /* Splat multiplication constants */
  mul0 = vec_splat(input[8],0);
  mul1 = vec_splat(input[8],1);
  mul2 = vec_splat(input[8],2);
  mul3 = vec_splat(input[8],3);
  mul4 = vec_splat(input[8],4);
  mul5 = vec_splat(input[8],5);
  mul6 = vec_splat(input[8],6);

  /* Perform DCT on the eight columns */

  /*********** Stage 1 ***********/

  v8 = vec_adds (input[0], input[7]);
  v9 = vec_subs (input[0], input[7]);
  v0 = vec_adds (input[1], input[6]);
  v7 = vec_subs (input[1], input[6]);
  v1 = vec_adds (input[2], input[5]);
  v6 = vec_subs (input[2], input[5]);
  v2 = vec_adds (input[3], input[4]);
  v5 = vec_subs (input[3], input[4]);

  /*********** Stage 2 ***********/

  /* Top */
  v3 = vec_adds (v8, v2);		/* (V0+V7) + (V3+V4) */
  v4 = vec_subs (v8, v2);		/* (V0+V7) - (V3+V4) */
  v2 = vec_adds (v0, v1);		/* (V1+V6) + (V2+V5) */
  v8 = vec_subs (v0, v1);		/* (V1+V6) - (V2+V5) */

  /* Bottom */
  v0 = vec_subs (v7, v6);		/* (V1-V6) - (V2-V5) */
  v1 = vec_adds (v7, v6);		/* (V1-V6) + (V2-V5) */

  /*********** Stage 3 ***********/

  /* Top */
  in[0] = vec_adds (v3, v2);		/* y0 = v3 + v2 */
  in[4] = vec_subs (v3, v2);		/* y4 = v3 - v2 */
  in[2] = vec_mradds (v8, mul2, v4);	/* y2 = v8 * a0 + v4 */
  v6 = vec_mradds (v4, mul2, mul6);	
  in[6] = vec_subs (v6, v8);		/* y6 = v4 * a0 - v8 */

  /* Bottom */
  v6 = vec_mradds (v0, mul0, v5);	/* v6 = v0 * (c4) + v5 */
  v7 = vec_mradds (v0, mul4, v5);	/* v7 = v0 * (-c4) + v5 */
  v2 = vec_mradds (v1, mul4, v9);	/* v2 = v1 * (-c4) + v9 */
  v3 = vec_mradds (v1, mul0, v9);	/* v3 = v1 * (c4) + v9 */

  /*********** Stage 4 ***********/

  /* Bottom */
  in[1] = vec_mradds (v6, mul3, v3);	/* y1 = v6 * (a1) + v3 */
  v23 = vec_mradds (v3, mul3, mul6);
  in[7] = vec_subs (v23, v6);		/* y7 = v3 * (a1) - v6 */
  in[5] = vec_mradds (v2, mul1, v7);	/* y5 = v2 * (a2) + v7 */
  in[3] = vec_mradds (v7, mul5, v2);	/* y3 = v7 * (-a2) + v2 */

  transpose_vmx (in, out);

  /* Perform DCT on the eight rows */

  /*********** Stage 1 ***********/

  v8 = vec_adds (out[0], out[7]);
  v9 = vec_subs (out[0], out[7]);
  v0 = vec_adds (out[1], out[6]);
  v7 = vec_subs (out[1], out[6]);
  v1 = vec_adds (out[2], out[5]);
  v6 = vec_subs (out[2], out[5]);
  v2 = vec_adds (out[3], out[4]);
  v5 = vec_subs (out[3], out[4]);

  /*********** Stage 2 ***********/

  /* Top */
  v3 = vec_adds (v8, v2);		/* (V0+V7) + (V3+V4) */
  v4 = vec_subs (v8, v2);		/* (V0+V7) - (V3+V4) */
  v2 = vec_adds (v0, v1);		/* (V1+V6) + (V2+V5) */
  v8 = vec_subs (v0, v1);		/* (V1+V6) - (V2+V5) */

  /* Bottom */
  v0 = vec_subs (v7, v6);		/* (V1-V6) - (V2-V5) */
  v1 = vec_adds (v7, v6);		/* (V1-V6) + (V2-V5) */

  /*********** Stage 3 ***********/

  /* Top */
  v25 = vec_subs (v25, v25);          /* reinit v25 = 0 */

  v20 = vec_adds (v3, v2);		/* y0 = v3 + v2 */
  v24 = vec_subs (v3, v2);		/* y4 = v3 - v2 */
  v22 = vec_mradds (v8, mul2, v4);	/* y2 = v8 * a0 + v4 */
  v6 = vec_mradds (v4, mul2, v25);	
  v26 = vec_subs (v6, v8);		/* y6 = v4 * a0 - v8 */

  /* Bottom */
  v6 = vec_mradds (v0, mul0, v5);	/* v6 = v0 * (c4) + v5 */
  v7 = vec_mradds (v0, mul4, v5);	/* v7 = v0 * (-c4) + v5 */
  v2 = vec_mradds (v1, mul4, v9);	/* v2 = v1 * (-c4) + v9 */
  v3 = vec_mradds (v1, mul0, v9);	/* v3 = v1 * (c4) + v9 */

  /*********** Stage 4 ***********/

  /* Bottom */
  v21 = vec_mradds (v6, mul3, v3);	/* y1 = v6 * (a1) + v3 */
  v23 = vec_mradds (v3, mul3, v25);
  v27 = vec_subs (v23, v6);		/* y7 = v3 * (a1) - v6 */
  v25 = vec_mradds (v2, mul1, v7);	/* y5 = v2 * (a2) + v7 */
  v23 = vec_mradds (v7, mul5, v2);	/* y3 = v7 * (-a2) + v2 */

  /* Post-scale and store reults */

  v31 = vec_subs (v31, v31);          /* reinit v25 = 0 */

  output[0] = vec_mradds (postscale[0], v20, v31);
  output[2] = vec_mradds (postscale[2], v22, v31);
  output[4] = vec_mradds (postscale[4], v24, v31);
  output[6] = vec_mradds (postscale[6], v26, v31);
  output[1] = vec_mradds (postscale[1], v21, v31);
  output[3] = vec_mradds (postscale[3], v23, v31);
  output[5] = vec_mradds (postscale[5], v25, v31);
  output[7] = vec_mradds (postscale[7], v27, v31);
}

예제 #5

0

파일 보기

파일: PixConvertAltivec.cpp 프로젝트: Jackovic/Gem

void BGR_to_YCbCr_altivec(const unsigned char *bgrdata, size_t BGR_size,
                          unsigned char *pixels)
{
  vector signed short  r0, r1, r2, g0, g1, g2, b0, b1, b2, c0, c16, c128;
  vector unsigned char z0, tc0, tc1, tc2, tc3;
  vector signed short tr0, tr1, tg0, tg1, tb0, tb1;
  vector signed short t0, t1, t2, t3, t4, t5;
  unsigned int i;

  const vector unsigned char	*BGR_ptr = reinterpret_cast<const vector unsigned char*>( bgrdata);
  vector unsigned char	*YCC_ptr = reinterpret_cast<vector unsigned char*>( pixels);

  /* Permutation vector is used to extract the interleaved RGB. */
  vector unsigned char vPerm1 =
    static_cast<vector unsigned char>( 0,  3,  6,  9, 12, 15, 18, 21, /* R0..R7    */
                            1,  4,  7, 10, 13, 16, 19, 22  /* G0..G7    */);
  vector unsigned char vPerm2 =
    static_cast<vector unsigned char>( 2,  5,  8, 11, 14, 17, 20, 23, /* B0..B7    */
                            0,  0,  0,  0,  0,  0,  0,  0  /* dont care */);
  vector unsigned char vPerm3 =
    static_cast<vector unsigned char>( 8, 11, 14, 17, 20, 23, 26, 29, /* R8..R15   */
                            9, 12, 15, 18, 21, 24, 27, 30  /* G8..G15   */);
  vector unsigned char vPerm4 =
    static_cast<vector unsigned char>(10, 13, 16, 19, 22, 25, 28, 31, /* B8..B15   */
                           0,  0,  0,  0,  0,  0,  0,  0  /* dont care */);

  /* Load the equation constants. */
  vector signed short vConst1 =
    static_cast<vector signed short>( 8432,  16425,  3176,
                           -4818,  -9527, 14345,
                           0,      0 );

  vector signed short vConst2 =
    static_cast<vector signed short>( 14345, -12045, -2300,
                           16, 128, 0, 0, 0 );

  r0 = vec_splat( vConst1, 0 ); /*  8432 */
  g0 = vec_splat( vConst1, 1 ); /* 16425 */
  b0 = vec_splat( vConst1, 2 ); /*  3176 */
  r1 = vec_splat( vConst1, 3 ); /* -4818 */
  g1 = vec_splat( vConst1, 4 ); /* -9527 */
  b1 = vec_splat( vConst1, 5 ); /* 14345 */
  r2 = vec_splat( vConst2, 0 ); /* 14345 */
  g2 = vec_splat( vConst2, 1 ); /*-12045 */
  b2 = vec_splat( vConst2, 2 ); /* -2300 */
  c16  = vec_splat( vConst2, 3 ); /*  16 */
  c128 = vec_splat( vConst2, 4 ); /* 128 */
  c0 = static_cast<vector signed short> (0); /*   0 */
  z0 = static_cast<vector unsigned char> (0); /*  0 */

  for ( i = 0; i < (BGR_size/sizeof(vector unsigned char)); i+=3 ) {

    /* Load the 3 RGB input vectors and seperate into red,
       green and blue from the interleaved format. */
    tc0 = vec_perm( BGR_ptr[i], BGR_ptr[i+1], vPerm1 );   /* R0..R7  G0..G7  */
    tc1 = vec_perm( BGR_ptr[i], BGR_ptr[i+1], vPerm2 );   /* B0..B7          */
    tc2 = vec_perm( BGR_ptr[i+1], BGR_ptr[i+2], vPerm3 ); /* R8..R15 G8..G15 */
    tc3 = vec_perm( BGR_ptr[i+1], BGR_ptr[i+2], vPerm4 ); /* B8..B15         */

    /* Unpack to 16 bit arithmatic for converstion. */
    tr0 = static_cast<vector signed short>(vec_mergeh( z0, tc0 ));  /* tr0 = R0 .. R7  */
    tg0 = static_cast<vector signed short>(vec_mergel( z0, tc0 ));  /* tg0 = G0 .. G7  */
    tb0 = static_cast<vector signed short>(vec_mergeh( z0, tc1 ));  /* tb0 = B0 .. B7  */
    tr1 = static_cast<vector signed short>(vec_mergeh( z0, tc2 ));  /* tr0 = R8 .. R15 */
    tg1 = static_cast<vector signed short>(vec_mergel( z0, tc2 ));  /* tg0 = G8 .. G15 */
    tb1 = static_cast<vector signed short>(vec_mergeh( z0, tc3 ));  /* tb0 = B8 .. B15 */

    /* Convert the first three input vectors.  Note that
       only the top 17 bits of the 32 bit product are
       stored.  This is the same as doing the divide by 32768. */

    t0 = vec_mradds( tr0, r0, c0 ); /* (R0 .. R7) *  8432 */
    t1 = vec_mradds( tr0, r1, c0 ); /* (R0 .. R7) * -4818 */
    t2 = vec_mradds( tr0, r2, c0 ); /* (R0 .. R7) * 14345 */

    t0 = vec_mradds( tg0, g0, t0 ); /* += (G0 .. G7) *  16425 */
    t1 = vec_mradds( tg0, g1, t1 ); /* += (G0 .. G7) *  -9527 */
    t2 = vec_mradds( tg0, g2, t2 ); /* += (G0 .. G7) * -12045 */

    t0 = vec_mradds( tb0, b0, t0 ); /* += (B0 .. B7) *  3176 */
    t1 = vec_mradds( tb0, b1, t1 ); /* += (B0 .. B7) * 14345 */
    t2 = vec_mradds( tb0, b2, t2 ); /* += (B0 .. B7) * -2300 */

    /* Convert the next three input vectors. */
    t3 = vec_mradds( tr1, r0, c0 ); /* (R8 .. R15) *  8432 */
    t4 = vec_mradds( tr1, r1, c0 ); /* (R8 .. R15) * -4818 */
    t5 = vec_mradds( tr1, r2, c0 ); /* (R8 .. R15) * 14345 */

    t3 = vec_mradds( tg1, g0, t3 ); /* += (G8 .. G15) *  16425 */
    t4 = vec_mradds( tg1, g1, t4 ); /* += (G8 .. G15) *  -9527 */
    t5 = vec_mradds( tg1, g2, t5 ); /* += (G8 .. G15) * -12045 */

    t3 = vec_mradds( tb1, b0, t3 ); /* += (B8 .. B15) *  3176 */
    t4 = vec_mradds( tb1, b1, t4 ); /* += (B8 .. B15) * 14345 */
    t5 = vec_mradds( tb1, b2, t5 ); /* += (B8 .. B15) * -2300 */

    /* Add the constants. */
    t0 = vec_adds( t0, c16 );
    t3 = vec_adds( t3, c16 );
    t1 = vec_adds( t1, c128 );
    t4 = vec_adds( t4, c128 );
    t2 = vec_adds( t2, c128 );
    t5 = vec_adds( t5, c128 );

    /* Pack the results, and store them. */
    YCC_ptr[i]   = vec_packsu( t0, t3 );  /*  Y0 .. Y15  */
    YCC_ptr[i+1] = vec_packsu( t1, t4 );  /* Cb0 .. Cb15 */
    YCC_ptr[i+2] = vec_packsu( t2, t5 );  /* Cr0 .. Cr15 */

  }
}