예제 #1
0
mlib_status
__mlib_VideoIDCT8x8_S16_S16_Q1(
	mlib_s16 *block,
	const mlib_s16 *coeffs)
{
	const mlib_s16 *coeffPtr = coeffs;
	mlib_s16 *blockPtr = block;
	mlib_s64 workspace[64];
	mlib_s64 *workPtr = workspace;
	mlib_s64 x0, x1, x2, x3, x4, x5, x6, x7, x8;
	mlib_s32 str = 8;
	mlib_s32 i;

	IDCT1(coeffPtr, workPtr, 16384);

	workPtr = workspace;

	IDCT2(workPtr, blockPtr, 2048);

	return (MLIB_SUCCESS);
}
예제 #2
0
mlib_status
__mlib_VideoIDCT8x8_U8_S16_Q1(
	mlib_u8 *block,
	const mlib_s16 *coeffs,
	mlib_s32 stride)
{
	const mlib_s16 *coeffPtr = coeffs;
	mlib_u8 *blockPtr = block;
	mlib_s64 workspace[64];
	mlib_s64 *workPtr = workspace;
	mlib_s64 x0, x1, x2, x3, x4, x5, x6, x7, x8;
	mlib_s32 i;
	mlib_s32 str = 8;

	mlib_s64 *inPtr;

	IDCT1(coeffPtr, workPtr, (-128 * 8));

	inPtr = workspace;

	for (i = 0; i < 8; i++) {
/* first stage */
		x4 = RCOS_1_16 * inPtr[8 * 1];
		x5 = RCOS_7_16 * inPtr[8 * 1];

		x6 = RCOS_3_16 * inPtr[8 * 3];
		x7 = RCOS_5_16 * inPtr[8 * 3];

/* second stage */
		x8 = RTWOSQRT2 * (inPtr[8 * 0]);
		x0 = RTWOSQRT2 * (inPtr[8 * 0]);

		x2 = RCOS_6_16 * inPtr[8 * 2];
		x3 = RCOS_2_16 * inPtr[8 * 2];

		x1 = x4 + x6;
		x4 = ROUND(COS_4_16 * (x4 - x6));
		x6 = x5 - x7;
		x5 = ROUND(COS_4_16 * (x5 + x7));

/* third stage */
		x7 = x8 + x3;
		x8 -= x3;
		x3 = x0 + x2;
		x0 -= x2;
		x2 = (x4 + x5);
		x4 -= x5;

/* fourth stage */
		SATURATE(x7 + x1, blockPtr[0]);
		SATURATE(x3 + x2, blockPtr[1]);
		SATURATE(x0 + x4, blockPtr[2]);
		SATURATE(x8 + x6, blockPtr[3]);
		SATURATE(x8 - x6, blockPtr[4]);
		SATURATE(x0 - x4, blockPtr[5]);
		SATURATE(x3 - x2, blockPtr[6]);
		SATURATE(x7 - x1, blockPtr[7]);

		inPtr++;
		blockPtr += stride;
	}

	return (MLIB_SUCCESS);
}
예제 #3
0
파일: idctfst.c 프로젝트: duke1102/psxdev
void IDCT(BLOCK *block,int k)
{
  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  int z5, z10, z11, z12, z13;
  BLOCK *ptr;
  int i;

  /* Pass 1: process columns from input, store into work array. */
  switch(k){
  case 1:IDCT1(block); return;
  }

  ptr = block;
  for (i = 0; i< DCTSIZE; i++,ptr++) {
    /* Due to quantization, we will usually find that many of the input
     * coefficients are zero, especially the AC terms.  We can exploit this
     * by short-circuiting the IDCT calculation for any column in which all
     * the AC terms are zero.  In that case each output is equal to the
     * DC coefficient (with scale factor as needed).
     * With typical images and quantization tables, half or more of the
     * column DCT calculations can be simplified this way.
     */
    
    if ((ptr[DCTSIZE*1] | ptr[DCTSIZE*2] | ptr[DCTSIZE*3] |
	 ptr[DCTSIZE*4] | ptr[DCTSIZE*5] | ptr[DCTSIZE*6] |
	 ptr[DCTSIZE*7]) == 0) {
      /* AC terms all zero */
      ptr[DCTSIZE*0] = 
      ptr[DCTSIZE*1] = 
      ptr[DCTSIZE*2] = 
      ptr[DCTSIZE*3] = 
      ptr[DCTSIZE*4] = 
      ptr[DCTSIZE*5] = 
      ptr[DCTSIZE*6] = 
      ptr[DCTSIZE*7] = 
      	ptr[DCTSIZE*0];
      
      continue;
    }
    
    /* Even part */

    z10 = ptr[DCTSIZE*0] + ptr[DCTSIZE*4];	/* phase 3 */
    z11 = ptr[DCTSIZE*0] - ptr[DCTSIZE*4];
    z13 = ptr[DCTSIZE*2] + ptr[DCTSIZE*6];	/* phases 5-3 */
    z12 = MULTIPLY(ptr[DCTSIZE*2] - ptr[DCTSIZE*6], FIX_1_414213562) - z13; /* 2*c4 */

    tmp0 = z10 + z13;	/* phase 2 */
    tmp3 = z10 - z13;
    tmp1 = z11 + z12;
    tmp2 = z11 - z12;
    
    /* Odd part */

    z13 = ptr[DCTSIZE*3] + ptr[DCTSIZE*5];		/* phase 6 */
    z10 = ptr[DCTSIZE*3] - ptr[DCTSIZE*5];
    z11 = ptr[DCTSIZE*1] + ptr[DCTSIZE*7];
    z12 = ptr[DCTSIZE*1] - ptr[DCTSIZE*7];

    z5 = MULTIPLY(z12 - z10, FIX_1_847759065);
    tmp7 = z11 + z13;		/* phase 5 */
    tmp6 = MULTIPLY(z10, FIX_2_613125930) + z5 - tmp7;	/* phase 2 */
    tmp5 = MULTIPLY(z11 - z13, FIX_1_414213562) - tmp6;
    tmp4 = MULTIPLY(z12, FIX_1_082392200) - z5 + tmp5;

    ptr[DCTSIZE*0] = (tmp0 + tmp7);
    ptr[DCTSIZE*7] = (tmp0 - tmp7);
    ptr[DCTSIZE*1] = (tmp1 + tmp6);
    ptr[DCTSIZE*6] = (tmp1 - tmp6);
    ptr[DCTSIZE*2] = (tmp2 + tmp5);
    ptr[DCTSIZE*5] = (tmp2 - tmp5);
    ptr[DCTSIZE*4] = (tmp3 + tmp4);
    ptr[DCTSIZE*3] = (tmp3 - tmp4);

  }
  
  /* Pass 2: process rows from work array, store into output array. */
  /* Note that we must descale the results by a factor of 8 == 2**3, */
  /* and also undo the PASS1_BITS scaling. */

  ptr = block;
  for (i = 0; i < DCTSIZE; i++ ,ptr+=DCTSIZE) {
    /* Rows of zeroes can be exploited in the same way as we did with columns.
     * However, the column calculation has created many nonzero AC terms, so
     * the simplification applies less often (typically 5% to 10% of the time).
     * On machines with very fast multiplication, it's possible that the
     * test takes more time than it's worth.  In that case this section
     * may be commented out.
     */
    
#ifndef NO_ZERO_ROW_TEST
    if ((ptr[1] | ptr[2] | ptr[3] | ptr[4] | ptr[5] | ptr[6] |
	 ptr[7]) == 0) {
      /* AC terms all zero */
      ptr[0] = 
      ptr[1] = 
      ptr[2] = 
      ptr[3] = 
      ptr[4] = 
      ptr[5] = 
      ptr[6] = 
      ptr[7] = 
      	RANGE(DESCALE(ptr[0], PASS1_BITS+3));;

      continue;
    }
#endif
    
    /* Even part */

    z10 = ptr[0] + ptr[4];
    z11 = ptr[0] - ptr[4];
    z13 = ptr[2] + ptr[6];
    z12 = MULTIPLY(ptr[2] - ptr[6], FIX_1_414213562) - z13;

    tmp0 = z10 + z13;
    tmp3 = z10 - z13;
    tmp1 = z11 + z12;
    tmp2 = z11 - z12;

    /* Odd part */

    z13 = ptr[3] + ptr[5];
    z10 = ptr[3] - ptr[5];
    z11 = ptr[1] + ptr[7];
    z12 = ptr[1] - ptr[7];

    z5 = MULTIPLY(z12 - z10, FIX_1_847759065);
    tmp7 = z11 + z13;		/* phase 5 */
    tmp6 = MULTIPLY(z10, FIX_2_613125930) + z5 - tmp7;	/* phase 2 */
    tmp5 = MULTIPLY(z11 - z13, FIX_1_414213562) - tmp6;
    tmp4 = MULTIPLY(z12, FIX_1_082392200) - z5 + tmp5;

    /* Final output stage: scale down by a factor of 8 and range-limit */

    ptr[0] = RANGE(DESCALE(tmp0 + tmp7, PASS1_BITS+3));;
    ptr[7] = RANGE(DESCALE(tmp0 - tmp7, PASS1_BITS+3));;
    ptr[1] = RANGE(DESCALE(tmp1 + tmp6, PASS1_BITS+3));;
    ptr[6] = RANGE(DESCALE(tmp1 - tmp6, PASS1_BITS+3));;
    ptr[2] = RANGE(DESCALE(tmp2 + tmp5, PASS1_BITS+3));;
    ptr[5] = RANGE(DESCALE(tmp2 - tmp5, PASS1_BITS+3));;
    ptr[4] = RANGE(DESCALE(tmp3 + tmp4, PASS1_BITS+3));;
    ptr[3] = RANGE(DESCALE(tmp3 - tmp4, PASS1_BITS+3));;

  }
}