void ChenIDct(int *x, int *y) { register int i; register int *aptr; register int a0,a1,a2,a3; register int b0,b1,b2,b3; register int c0,c1,c2,c3; /* Loop over columns */ for(i=0; i<8; i++) { aptr = x+i; b0 = LS(*aptr,2); aptr += 8; a0 = LS(*aptr,2); aptr += 8; b2 = LS(*aptr,2); aptr += 8; a1 = LS(*aptr,2); aptr += 8; b1 = LS(*aptr,2); aptr += 8; a2 = LS(*aptr,2); aptr += 8; b3 = LS(*aptr,2); aptr += 8; a3 = LS(*aptr,2); /* Split into even mode b0 = x0 b1 = x4 b2 = x2 b3 = x6. And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7. */ c0 = MSCALE((c7d16*a0)-(c1d16*a3)); c1 = MSCALE((c3d16*a2)-(c5d16*a1)); c2 = MSCALE((c3d16*a1)+(c5d16*a2)); c3 = MSCALE((c1d16*a0)+(c7d16*a3)); /* First Butterfly on even terms.*/ a0 = MSCALE(c1d4*(b0+b1)); a1 = MSCALE(c1d4*(b0-b1)); a2 = MSCALE((c3d8*b2)-(c1d8*b3)); a3 = MSCALE((c1d8*b2)+(c3d8*b3)); b0 = a0+a3; b1 = a1+a2; b2 = a1-a2; b3 = a0-a3; /* Second Butterfly */ a0 = c0+c1; a1 = c0-c1; a2 = c3-c2; a3 = c3+c2; c0 = a0; c1 = MSCALE(c1d4*(a2-a1)); c2 = MSCALE(c1d4*(a2+a1)); c3 = a3; aptr = y+i; *aptr = b0+c3; aptr += 8; *aptr = b1+c2; aptr += 8; *aptr = b2+c1; aptr += 8; *aptr = b3+c0; aptr += 8; *aptr = b3-c0; aptr += 8; *aptr = b2-c1; aptr += 8; *aptr = b1-c2; aptr += 8; *aptr = b0-c3; } /* Loop over rows */ for(i=0; i<8; i++) { aptr = y+LS(i,3); b0 = *(aptr++); a0 = *(aptr++); b2 = *(aptr++); a1 = *(aptr++); b1 = *(aptr++); a2 = *(aptr++); b3 = *(aptr++); a3 = *(aptr); /* Split into even mode b0 = x0 b1 = x4 b2 = x2 b3 = x6. And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7. */ c0 = MSCALE((c7d16*a0)-(c1d16*a3)); c1 = MSCALE((c3d16*a2)-(c5d16*a1)); c2 = MSCALE((c3d16*a1)+(c5d16*a2)); c3 = MSCALE((c1d16*a0)+(c7d16*a3)); /* First Butterfly on even terms.*/ a0 = MSCALE(c1d4*(b0+b1)); a1 = MSCALE(c1d4*(b0-b1)); a2 = MSCALE((c3d8*b2)-(c1d8*b3)); a3 = MSCALE((c1d8*b2)+(c3d8*b3)); /* Calculate last set of b's */ b0 = a0+a3; b1 = a1+a2; b2 = a1-a2; b3 = a0-a3; /* Second Butterfly */ a0 = c0+c1; a1 = c0-c1; a2 = c3-c2; a3 = c3+c2; c0 = a0; c1 = MSCALE(c1d4*(a2-a1)); c2 = MSCALE(c1d4*(a2+a1)); c3 = a3; aptr = y+LS(i,3); *(aptr++) = b0+c3; *(aptr++) = b1+c2; *(aptr++) = b2+c1; *(aptr++) = b3+c0; *(aptr++) = b3-c0; *(aptr++) = b2-c1; *(aptr++) = b1-c2; *(aptr) = b0-c3; } /* Retrieve correct accuracy. We have additional factor of 16 that must be removed. */ for(i=0,aptr=y; i<64; i++,aptr++) *aptr = (((*aptr<0) ? (*aptr-8) : (*aptr+8)) /16); }
/* * Decode one block */ __attribute__((always_inline))void decode_block(int comp_no, int *out_buf, int *HuffBuff) { int QuantBuff[DCTSIZE2]; unsigned int* p_quant_tbl; DecodeHuffMCU(HuffBuff, comp_no); IZigzagMatrix(HuffBuff,QuantBuff); p_quant_tbl = &p_jinfo_quant_tbl_quantval[(int)p_jinfo_comps_info_quant_tbl_no[comp_no]][DCTSIZE2]; IQuantize(QuantBuff,(int *)p_quant_tbl); //ChenIDct(QuantBuff, out_buf); int *x = QuantBuff; int *y = out_buf; register int i; register int *aptr; register int a0,a1,a2,a3; register int b0,b1,b2,b3; register int c0,c1,c2,c3; /* Loop over columns */ for(i=0;i<8;i++) { aptr = x+i; b0 = LS(*aptr,2); aptr += 8; a0 = LS(*aptr,2); aptr += 8; b2 = LS(*aptr,2); aptr += 8; a1 = LS(*aptr,2); aptr += 8; b1 = LS(*aptr,2); aptr += 8; a2 = LS(*aptr,2); aptr += 8; b3 = LS(*aptr,2); aptr += 8; a3 = LS(*aptr,2); /* Split into even mode b0 = x0 b1 = x4 b2 = x2 b3 = x6. And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7. */ c0 = MSCALE((c7d16*a0)-(c1d16*a3)); c1 = MSCALE((c3d16*a2)-(c5d16*a1)); c2 = MSCALE((c3d16*a1)+(c5d16*a2)); c3 = MSCALE((c1d16*a0)+(c7d16*a3)); /* First Butterfly on even terms.*/ a0 = MSCALE(c1d4*(b0+b1)); a1 = MSCALE(c1d4*(b0-b1)); a2 = MSCALE((c3d8*b2)-(c1d8*b3)); a3 = MSCALE((c1d8*b2)+(c3d8*b3)); b0 = a0+a3; b1 = a1+a2; b2 = a1-a2; b3 = a0-a3; /* Second Butterfly */ a0 = c0+c1; a1 = c0-c1; a2 = c3-c2; a3 = c3+c2; c0 = a0; c1 = MSCALE(c1d4*(a2-a1)); c2 = MSCALE(c1d4*(a2+a1)); c3 = a3; aptr = y+i; *aptr = b0+c3; aptr += 8; *aptr = b1+c2; aptr += 8; *aptr = b2+c1; aptr += 8; *aptr = b3+c0; aptr += 8; *aptr = b3-c0; aptr += 8; *aptr = b2-c1; aptr += 8; *aptr = b1-c2; aptr += 8; *aptr = b0-c3; } /* Loop over rows */ for(i=0;i<8;i++) { aptr = y+LS(i,3); b0 = *(aptr++); a0 = *(aptr++); b2 = *(aptr++); a1 = *(aptr++); b1 = *(aptr++); a2 = *(aptr++); b3 = *(aptr++); a3 = *(aptr); /* Split into even mode b0 = x0 b1 = x4 b2 = x2 b3 = x6. And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7. */ c0 = MSCALE((c7d16*a0)-(c1d16*a3)); c1 = MSCALE((c3d16*a2)-(c5d16*a1)); c2 = MSCALE((c3d16*a1)+(c5d16*a2)); c3 = MSCALE((c1d16*a0)+(c7d16*a3)); /* First Butterfly on even terms.*/ a0 = MSCALE(c1d4*(b0+b1)); a1 = MSCALE(c1d4*(b0-b1)); a2 = MSCALE((c3d8*b2)-(c1d8*b3)); a3 = MSCALE((c1d8*b2)+(c3d8*b3)); /* Calculate last set of b's */ b0 = a0+a3; b1 = a1+a2; b2 = a1-a2; b3 = a0-a3; /* Second Butterfly */ a0 = c0+c1; a1 = c0-c1; a2 = c3-c2; a3 = c3+c2; c0 = a0; c1 = MSCALE(c1d4*(a2-a1)); c2 = MSCALE(c1d4*(a2+a1)); c3 = a3; aptr = y+LS(i,3); *(aptr++) = b0+c3; *(aptr++) = b1+c2; *(aptr++) = b2+c1; *(aptr++) = b3+c0; *(aptr++) = b3-c0; *(aptr++) = b2-c1; *(aptr++) = b1-c2; *(aptr) = b0-c3; } /* Retrieve correct accuracy. We have additional factor of 16 that must be removed. */ for(i=0,aptr=y;i<64;i++,aptr++) *aptr = (((*aptr<0) ? (*aptr-8) : (*aptr+8)) /16); PostshiftIDctMatrix(out_buf,IDCT_SHIFT); BoundIDctMatrix(out_buf,IDCT_BOUNT); }
/* Chen forward DCT algorithm */ void chendct(int in_block[64], int out_block[64]) { int i, aptr; int a0, a1, a2, a3; int b0, b1, b2, b3; int c0, c1, c2, c3; int v0, v1, v2, v3, v4, v5, v6, v7; for (i = 0; i < 8; i++) { aptr = i; v0 = in_block[aptr]; aptr += 8; v1 = in_block[aptr]; aptr += 8; v2 = in_block[aptr]; aptr += 8; v3 = in_block[aptr]; aptr += 8; v4 = in_block[aptr]; aptr += 8; v5 = in_block[aptr]; aptr += 8; v6 = in_block[aptr]; aptr += 8; v7 = in_block[aptr]; a0 = LS((v0 + v7), 2); c3 = LS((v0 - v7), 2); a1 = LS((v1 + v6), 2); c2 = LS((v1 - v6), 2); a2 = LS((v2 + v5), 2); c1 = LS((v2 - v5), 2); a3 = LS((v3 + v4), 2); c0 = LS((v3 - v4), 2); b0 = a0 + a3; b1 = a1 + a2; b2 = a1 - a2; b3 = a0 - a3; out_block[i] = MSCALE(c1d4 * (b0 + b1)); out_block[i + 32] = MSCALE(c1d4 * (b0 - b1)); out_block[i + 16] = MSCALE((c3d8 * b2) + (c1d8 * b3)); out_block[i + 48] = MSCALE((c3d8 * b3) - (c1d8 * b2)); b0 = MSCALE(c1d4 * (c2 - c1)); b1 = MSCALE(c1d4 * (c2 + c1)); a0 = c0 + b0; a1 = c0 - b0; a2 = c3 - b1; a3 = c3 + b1; out_block[i + 8] = MSCALE((c7d16 * a0) + (c1d16 * a3)); out_block[i + 24] = MSCALE((c3d16 * a2) - (c5d16 * a1)); out_block[i + 40] = MSCALE((c3d16 * a1) + (c5d16 * a2)); out_block[i + 56] = MSCALE((c7d16 * a3) - (c1d16 * a0)); } for (i = 0; i < 8; i++) { aptr = LS(i, 3); v0 = out_block[aptr]; aptr++; v1 = out_block[aptr]; aptr++; v2 = out_block[aptr]; aptr++; v3 = out_block[aptr]; aptr++; v4 = out_block[aptr]; aptr++; v5 = out_block[aptr]; aptr++; v6 = out_block[aptr]; aptr++; v7 = out_block[aptr]; c3 = RS((v0 - v7), 1); a0 = RS((v0 + v7), 1); c2 = RS((v1 - v6), 1); a1 = RS((v1 + v6), 1); c1 = RS((v2 - v5), 1); a2 = RS((v2 + v5), 1); c0 = RS((v3 - v4), 1); a3 = RS((v3 + v4), 1); b0 = a0 + a3; b1 = a1 + a2; b2 = a1 - a2; b3 = a0 - a3; aptr = LS(i, 3); out_block[aptr] = MSCALE(c1d4 * (b0 + b1)); out_block[aptr + 4] = MSCALE(c1d4 * (b0 - b1)); out_block[aptr + 2] = MSCALE((c3d8 * b2) + (c1d8 * b3)); out_block[aptr + 6] = MSCALE((c3d8 * b3) - (c1d8 * b2)); b0 = MSCALE(c1d4 * (c2 - c1)); b1 = MSCALE(c1d4 * (c2 + c1)); a0 = c0 + b0; a1 = c0 - b0; a2 = c3 - b1; a3 = c3 + b1; out_block[aptr + 1] = MSCALE((c7d16 * a0) + (c1d16 * a3)); out_block[aptr + 3] = MSCALE((c3d16 * a2) - (c5d16 * a1)); out_block[aptr + 5] = MSCALE((c3d16 * a1) + (c5d16 * a2)); out_block[aptr + 7] = MSCALE((c7d16 * a3) - (c1d16 * a0)); } }