Example #1
0
fast_idct_8 (short *in, int stride)
{
    INT32 tmp10, tmp11, tmp12, tmp13;
    INT32 tmp20, tmp21, tmp22, tmp23;
    INT32 tmp30, tmp31;
    INT32 tmp40, tmp41, tmp42, tmp43;
    INT32 tmp50, tmp51, tmp52, tmp53;
    INT32 in0, in1, in2, in3, in4, in5, in6, in7;
    int i, j;

    in0 = in[       0];
    in1 = in[stride  ];
    in2 = in[stride*2];
    in3 = in[stride*3];
    in4 = in[stride*4];
    in5 = in[stride*5];
    in6 = in[stride*6];
    in7 = in[stride*7];

    tmp10 = (in0 + in4) * COS_1_4;
    tmp11 = (in0 - in4) * COS_1_4;
    tmp12 = in2 * SIN_1_8 - in6 * COS_1_8;
    tmp13 = in6 * SIN_1_8 + in2 * COS_1_8;

    tmp20 = tmp10 + tmp13;
    tmp21 = tmp11 + tmp12;
    tmp22 = tmp11 - tmp12;
    tmp23 = tmp10 - tmp13;

    tmp30 = UNFIXO((in3 + in5) * COS_1_4);
    tmp31 = UNFIXO((in3 - in5) * COS_1_4);

    tmp40 = OVERSH(in1) + tmp30;
    tmp41 = OVERSH(in7) + tmp31;
    tmp42 = OVERSH(in1) - tmp30;
    tmp43 = OVERSH(in7) - tmp31;

    tmp50 = tmp40 * OCOS_1_16 + tmp41 * OSIN_1_16;
    tmp51 = tmp40 * OSIN_1_16 - tmp41 * OCOS_1_16;
    tmp52 = tmp42 * OCOS_5_16 + tmp43 * OSIN_5_16;
    tmp53 = tmp42 * OSIN_5_16 - tmp43 * OCOS_5_16;

    in[       0] = UNFIXH(tmp20 + tmp50);
    in[stride  ] = UNFIXH(tmp21 + tmp53);
    in[stride*2] = UNFIXH(tmp22 + tmp52);
    in[stride*3] = UNFIXH(tmp23 + tmp51);
    in[stride*4] = UNFIXH(tmp23 - tmp51);
    in[stride*5] = UNFIXH(tmp22 - tmp52);
    in[stride*6] = UNFIXH(tmp21 - tmp53);
    in[stride*7] = UNFIXH(tmp20 - tmp50);
}
Example #2
0
/*
__inline__ voidmp_fwd_dct_fast(data2d, dest2d)
    Block data2d, dest2d;
    */
__inline__ void mp_fwd_dct_fast(Block data2d, Block dest2d)
{
    int32 *data = (int32 *) data2d;	/* this algorithm wants
					 * a 1-d array */
    int32 *dest = (int32 *) dest2d;
    int rowctr, columncounter;
    register int32 *inptr, *outptr;
    int32 workspace[DCTSIZE_SQ];
    int32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    int32 tmp10, tmp11, tmp12, tmp13;
    int32 tmp14, tmp15, tmp16, tmp17;
    int32 tmp25, tmp26;

    SHIFT_TEMPS;

    /*
     * Each iteration of the inner loop performs one 8-point 1-D DCT. It
     * reads from a *row* of the input matrix and stores into a *column*
     * of the output matrix.  In the first pass, we read from the data[]
     * array and store into the local workspace[].  In the second pass,
     * we read from the workspace[] array and store into data[], thus
     * performing the equivalent of a columnar DCT pass with no variable
     * array indexing.
     */

    inptr = data;		/* initialize pointers for first pass */
    outptr = workspace;
   
    /* PASS ONE */
	
    for (rowctr = DCTSIZE - 1; rowctr >= 0; rowctr--) {
      /*
       * many tmps have nonoverlapping lifetime -- flashy
       * register colourers should be able to do this lot
       * very well
       */
      /* SHIFT_TEMPS */
      
      /* temp0 through tmp7:  -512 to +512 */
      /* if I-block, then -256 to +256 */
      tmp0 = inptr[7] + inptr[0];
      tmp1 = inptr[6] + inptr[1];
      tmp2 = inptr[5] + inptr[2];
      tmp3 = inptr[4] + inptr[3];
      tmp4 = inptr[3] - inptr[4];
      tmp5 = inptr[2] - inptr[5];
      tmp6 = inptr[1] - inptr[6];
      tmp7 = inptr[0] - inptr[7];
      
      /* tmp10 through tmp13:  -1024 to +1024 */
      /* if I-block, then -512 to +512 */
      tmp10 = tmp3 + tmp0;
      tmp11 = tmp2 + tmp1;
      tmp12 = tmp1 - tmp2;
      
      tmp13 = tmp0 - tmp3;
      
      outptr[0] = (int32) UNFIXH((tmp10 + tmp11) * SIN_1_4);
      outptr[DCTSIZE * 4] = (int32) UNFIXH((tmp10 - tmp11) * COS_1_4);
      
      outptr[DCTSIZE * 2] = (int32) UNFIXH(tmp13 * COS_1_8 + tmp12 * SIN_1_8);
      outptr[DCTSIZE * 6] = (int32) UNFIXH(tmp13 * SIN_1_8 - tmp12 * COS_1_8);
      
      tmp16 = UNFIXO((tmp6 + tmp5) * SIN_1_4);
      tmp15 = UNFIXO((tmp6 - tmp5) * COS_1_4);
      
      OVERSHIFT(tmp4);
      OVERSHIFT(tmp7);
      
      /*
       * tmp4, tmp7, tmp15, tmp16 are overscaled by
       * OVERSCALE
       */
      
      tmp14 = tmp4 + tmp15;
      tmp25 = tmp4 - tmp15;
      tmp26 = tmp7 - tmp16;
      tmp17 = tmp7 + tmp16;
      
      outptr[DCTSIZE] = (int32) UNFIXH(tmp17 * OCOS_1_16 + tmp14 * OSIN_1_16);
      outptr[DCTSIZE * 7] = (int32) UNFIXH(tmp17 * OCOS_7_16 - tmp14 * OSIN_7_16);
      outptr[DCTSIZE * 5] = (int32) UNFIXH(tmp26 * OCOS_5_16 + tmp25 * OSIN_5_16);
      outptr[DCTSIZE * 3] = (int32) UNFIXH(tmp26 * OCOS_3_16 - tmp25 * OSIN_3_16);
      
      inptr += DCTSIZE;	/* advance inptr to next row */
      outptr++;		/* advance outptr to next column */
    
    }

    /* end of pass; in case it was pass 1, set up for pass 2 */
    inptr = workspace;
    outptr = dest;

    columncounter = 0;
    /* PASS TWO */

    for (rowctr = DCTSIZE - 1; rowctr >= 0; rowctr--) {
      /*
       * many tmps have nonoverlapping lifetime -- flashy
       * register colourers should be able to do this lot
       * very well
       */
      /* SHIFT_TEMPS */
      
      /* temp0 through tmp7:  -512 to +512 */
      /* if I-block, then -256 to +256 */
      tmp0 = inptr[7] + inptr[0];
      tmp1 = inptr[6] + inptr[1];
      tmp2 = inptr[5] + inptr[2];
      tmp3 = inptr[4] + inptr[3];
      tmp4 = inptr[3] - inptr[4];
      tmp5 = inptr[2] - inptr[5];
      tmp6 = inptr[1] - inptr[6];
      tmp7 = inptr[0] - inptr[7];
      
      /* tmp10 through tmp13:  -1024 to +1024 */
      /* if I-block, then -512 to +512 */
      tmp10 = tmp3 + tmp0;
      tmp11 = tmp2 + tmp1;
      tmp12 = tmp1 - tmp2;
      
      tmp13 = tmp0 - tmp3;
      
      outptr[ zigzag[0][columncounter] ] = (int32) UNFIXH((tmp10 + tmp11) * SIN_1_4);
      outptr[ zigzag[4][columncounter] ] = (int32) UNFIXH((tmp10 - tmp11) * COS_1_4);
      
      outptr[ zigzag[2][columncounter] ] = (int32) UNFIXH(tmp13 * COS_1_8 + tmp12 * SIN_1_8);
      outptr[ zigzag[6][columncounter] ] = (int32) UNFIXH(tmp13 * SIN_1_8 - tmp12 * COS_1_8);
      
      tmp16 = UNFIXO((tmp6 + tmp5) * SIN_1_4);
      tmp15 = UNFIXO((tmp6 - tmp5) * COS_1_4);
      
      OVERSHIFT(tmp4);
      OVERSHIFT(tmp7);
      
      /*
       * tmp4, tmp7, tmp15, tmp16 are overscaled by
       * OVERSCALE
       */
      
      tmp14 = tmp4 + tmp15;
      tmp25 = tmp4 - tmp15;
      tmp26 = tmp7 - tmp16;
      tmp17 = tmp7 + tmp16;
      
      outptr[ zigzag[1][columncounter] ] = (int32) UNFIXH(tmp17 * OCOS_1_16 + tmp14 * OSIN_1_16);
      outptr[ zigzag[7][columncounter] ] = (int32) UNFIXH(tmp17 * OCOS_7_16 - tmp14 * OSIN_7_16);
      outptr[ zigzag[5][columncounter] ] = (int32) UNFIXH(tmp26 * OCOS_5_16 + tmp25 * OSIN_5_16);
      outptr[ zigzag[3][columncounter] ] = (int32) UNFIXH(tmp26 * OCOS_3_16 - tmp25 * OSIN_3_16);
      
      inptr += DCTSIZE;	/* advance inptr to next row */	
                   /*      outptr++;*/		/* advance outptr to next column */
      columncounter++;

    }
				/* END OF PASS TWO */


}