int main(int argc, char *argv[]) { printf("Linear Regression\n"); printf("=================\n\n"); // Check for a filename if (argc == 2) { FILE *input = fopen(argv[1], "r"); if (input == NULL) { perror(argv[1]); exit(1); } else { DataSet theData = load_data(input); fclose(input); LinRegResult linReg = linear_regression(theData); clean(theData); printf("\nSlope \tm = %15.6e\n", DESCALE(linReg.m)); // print slope printf("y-intercept\tb = %15.6e\n", DESCALE(linReg.b)); // print y-intercept printf("Correlation\tr = %15.6e\n", DESCALE(linReg.r)); // print correlation } } else { printf("ERROR: Must enter filename after ./linReg\n"); } return 0; }
int dotProd(int *a, int *b, int n) { double dotProd = 0; int result = 0; int i; for (i = 0; i < n; i++) { dotProd += DESCALE(a[i]) * DESCALE(b[i]); } result = dotProd * SCALE; return result; }
ff_fdct248_islow (DCTELEM * data) { int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_fast32_t tmp10, tmp11, tmp12, tmp13; int_fast32_t z1; DCTELEM *dataptr; int ctr; row_fdct(data); /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1]; tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1]; tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; tmp10 = tmp0 + tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; tmp13 = tmp0 - tmp3; dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS); tmp10 = tmp4 + tmp7; tmp11 = tmp5 + tmp6; tmp12 = tmp5 - tmp6; tmp13 = tmp4 - tmp7; dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } }
/* Inverse 2-D Discrete Cosine Transform. */ void IDCT(const FBlock * input, PBlock * output) { int Y[64]; int k, l; /* Pass 1: process rows. */ for (k = 0; k < 8; k++) { /* Prescale k-th row: */ for (l = 0; l < 8; l++) Y(k, l) = SCALE(input->block[k][l], S_BITS); /* 1-D IDCT on k-th row: */ idct_1d(&Y(k, 0)); /* Result Y is scaled up by factor sqrt(8)*2^S_BITS. */ } /* Pass 2: process columns. */ for (l = 0; l < 8; l++) { int Yc[8]; for (k = 0; k < 8; k++) Yc[k] = Y(k, l); /* 1-D IDCT on l-th column: */ idct_1d(Yc); /* Result is once more scaled up by a factor sqrt(8). */ for (k = 0; k < 8; k++) { int r = 128 + DESCALE(Yc[k], S_BITS + 3); /* includes level shift */ /* Clip to 8 bits unsigned: */ r = r > 0 ? (r < 255 ? r : 255) : 0; X(k, l) = r; } } }
static void idct1(int *block) { int i, val; val = RANGE(DESCALE(block[0], PASS1_BITS+3)); for(i=0;i<DCTSIZE2;i++) block[i]=val; }
LinRegResult linear_regression(DataSet theData) { LinRegResult result; int n = theData.n; // number of data points double sumx = DESCALE(sum(theData.x, n)); // sum of x double sumxx = DESCALE(dotProd(theData.x, theData.x, n)); // sum of each x squared double sumy = DESCALE(sum(theData.y, n)); // sum of y double sumyy = DESCALE(dotProd(theData.y, theData.y, n)); // sum of each y squared double sumxy = DESCALE(dotProd(theData.x, theData.y, n)); // sum of each x * y double m, b, r; // Compute least-squares best fit straight line m = (n * sumxy - sumx * sumy) / (n * sumxx - sqr(sumx)); // slope b = (sumy * sumxx - (sumx * sumxy)) / (n * sumxx - sqr(sumx)); // y-intercept r = (sumxy - sumx * sumy / n) / sqrt((sumxx - sqr(sumx) / n) * (sumyy - sqr(sumy)/ n)); // correlation result.m = m * SCALE; result.b = b * SCALE; result.r = r * SCALE; return result; }
/* * Perform dequantization and inverse DCT on one block of coefficients, * producing a reduced-size 1x1 output block. */ void JPEG_SWIDCT_1X1(int16 *coef_block, uint8 *output_buf, const int32 *quantptr) { int32 dcval; /* We hardly need an inverse DCT routine for this: just take the * average pixel value, which is one-eighth of the DC coefficient. */ dcval = DEQUANTIZE(coef_block[0], quantptr[0]); dcval = (int32) DESCALE((int32) dcval, 3); output_buf[0]/*[0]*/ = s_pClip_table[dcval+128]; }
static void idct(int* tempptr, const jpgd_block_t* dataptr) { // will be optimized at compile time to either an array access, or 0 #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)dataptr[x] : 0) const int z2 = ACCESS_COL(2); const int z3 = ACCESS_COL(6); const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS; const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS; const int tmp10 = tmp0 + tmp3; const int tmp13 = tmp0 - tmp3; const int tmp11 = tmp1 + tmp2; const int tmp12 = tmp1 - tmp2; const int atmp0 = ACCESS_COL(7); const int atmp1 = ACCESS_COL(5); const int atmp2 = ACCESS_COL(3); const int atmp3 = ACCESS_COL(1); const int bz1 = atmp0 + atmp3; const int bz2 = atmp1 + atmp2; const int bz3 = atmp0 + atmp2; const int bz4 = atmp1 + atmp3; const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); const int az1 = MULTIPLY(bz1, - FIX_0_899976223); const int az2 = MULTIPLY(bz2, - FIX_2_562915447); const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5; const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5; const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; tempptr[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS); tempptr[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS); tempptr[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS); tempptr[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS); tempptr[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS); tempptr[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS); tempptr[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS); tempptr[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS); }
static void dct_1d(dct_data_t *src, dct_data_t *dst) { unsigned int k, n; int tmp; const dct_data_t dct_coeff_table[DCT_SIZE][DCT_SIZE] = { #include "dct_coeff_table.txt" }; for (k = 0; k < DCT_SIZE; k++) { for(n = 0, tmp = 0; n < DCT_SIZE; n++) { int coeff = (int)dct_coeff_table[k][n]; tmp += src[n] * coeff; } dst[k] = DESCALE(tmp, CONST_BITS); } }
openexif_jpeg_idct_1x1 (oe_j_decompress_ptr cinfo, openexif_jpeg_component_info * compptr, OE_JCOEFPTR coef_block, OE_JSAMPARRAY output_buf, OE_JDIMENSION output_col) { int dcval; ISLOW_MULT_TYPE * quantptr; OE_JSAMPLE *range_limit = IDCT_range_limit(cinfo); SHIFT_TEMPS /* We hardly need an inverse DCT routine for this: just take the * average pixel value, which is one-eighth of the DC coefficient. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; dcval = DEQUANTIZE(coef_block[0], quantptr[0]); dcval = (int) DESCALE((INT32) dcval, 3); output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; }
METHODDEF void start_pass_fdctmgr (j_compress_ptr cinfo) { my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; int ci, qtblno, i; jpeg_component_info *compptr; JQUANT_TBL * qtbl; #ifdef DCT_ISLOW_SUPPORTED DCTELEM * dtbl; #endif for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { qtblno = compptr->quant_tbl_no; /* Make sure specified quantization table is present */ if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || cinfo->quant_tbl_ptrs[qtblno] == NULL) ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); qtbl = cinfo->quant_tbl_ptrs[qtblno]; /* Compute divisors for this quant table */ /* We may do this more than once for same table, but it's not a big deal */ switch (cinfo->dct_method) { #ifdef DCT_ISLOW_SUPPORTED case JDCT_ISLOW: /* For LL&M IDCT method, divisors are equal to raw quantization * coefficients multiplied by 8 (to counteract scaling). */ if (fdct->divisors[qtblno] == NULL) { fdct->divisors[qtblno] = (DCTELEM *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, DCTSIZE2 * SIZEOF(DCTELEM)); } dtbl = fdct->divisors[qtblno]; for (i = 0; i < DCTSIZE2; i++) { dtbl[i] = ((DCTELEM) qtbl->quantval[jpeg_zigzag_order[i]]) << 3; } break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: { /* For AA&N IDCT method, divisors are equal to quantization * coefficients scaled by scalefactor[row]*scalefactor[col], where * scalefactor[0] = 1 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 * We apply a further scale factor of 8. */ #define CONST_BITS 14 static const INT16 aanscales[DCTSIZE2] = { /* precomputed values scaled up by 14 bits: in natural order */ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 }; SHIFT_TEMPS if (fdct->divisors[qtblno] == NULL) { fdct->divisors[qtblno] = (DCTELEM *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, DCTSIZE2 * SIZEOF(DCTELEM)); } dtbl = fdct->divisors[qtblno]; for (i = 0; i < DCTSIZE2; i++) { dtbl[i] = (DCTELEM) DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[jpeg_zigzag_order[i]], (INT32) aanscales[i]), CONST_BITS-3); } } break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: { /* For float AA&N IDCT method, divisors are equal to quantization * coefficients scaled by scalefactor[row]*scalefactor[col], where * scalefactor[0] = 1 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 * We apply a further scale factor of 8. * What's actually stored is 1/divisor so that the inner loop can * use a multiplication rather than a division. */ FAST_FLOAT * fdtbl; int row, col; static const double aanscalefactor[DCTSIZE] = { 1.0, 1.387039845, 1.306562965, 1.175875602, 1.0, 0.785694958, 0.541196100, 0.275899379 }; if (fdct->float_divisors[qtblno] == NULL) { fdct->float_divisors[qtblno] = (FAST_FLOAT *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, DCTSIZE2 * SIZEOF(FAST_FLOAT)); } fdtbl = fdct->float_divisors[qtblno]; i = 0; for (row = 0; row < DCTSIZE; row++) { for (col = 0; col < DCTSIZE; col++) { fdtbl[i] = (FAST_FLOAT) (1.0 / (((double) qtbl->quantval[jpeg_zigzag_order[i]] * aanscalefactor[row] * aanscalefactor[col] * 8.0))); i++; } } } break; #endif default: ERREXIT(cinfo, JERR_NOT_COMPILED); break; } } }
static av_always_inline void row_fdct(DCTELEM * data) { int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_fast32_t tmp10, tmp11, tmp12, tmp13; int_fast32_t z1, z2, z3, z4, z5; DCTELEM *dataptr; int ctr; /* Pass 1: process rows. */ /* Note results are scaled up by sqrt(8) compared to a true DCT; */ /* furthermore, we scale the results by 2**PASS1_BITS. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { tmp0 = dataptr[0] + dataptr[7]; tmp7 = dataptr[0] - dataptr[7]; tmp1 = dataptr[1] + dataptr[6]; tmp6 = dataptr[1] - dataptr[6]; tmp2 = dataptr[2] + dataptr[5]; tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS); dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); dataptr += DCTSIZE; /* advance pointer to next row */ } }
void idct_sh4(DCTELEM *block) { DEFREG; int i; float tblock[8*8],*fblock; int ofs1,ofs2,ofs3; #if defined(__SH4__) #error "FIXME!! change to single float" #endif /* row */ /* even part */ load_matrix(even_table); fblock = tblock+4; i = 8; do { fr0 = block[0]; fr1 = block[2]; fr2 = block[4]; fr3 = block[6]; block+=8; ftrv(); *--fblock = fr3; *--fblock = fr2; *--fblock = fr1; *--fblock = fr0; fblock+=8+4; } while(--i); block-=8*8; fblock-=8*8+4; load_matrix(odd_table); i = 8; // ofs1 = sizeof(float)*1; // ofs2 = sizeof(float)*2; // ofs3 = sizeof(float)*3; do { float t0,t1,t2,t3; fr0 = block[1]; fr1 = block[3]; fr2 = block[5]; fr3 = block[7]; block+=8; ftrv(); t0 = *fblock++; t1 = *fblock++; t2 = *fblock++; t3 = *fblock++; fblock+=4; *--fblock = t0 - fr0; *--fblock = t1 - fr1; *--fblock = t2 - fr2; *--fblock = t3 - fr3; *--fblock = t3 + fr3; *--fblock = t2 + fr2; *--fblock = t1 + fr1; *--fblock = t0 + fr0; fblock+=8; } while(--i); block-=8*8; fblock-=8*8; /* col */ /* even part */ load_matrix(even_table); ofs1 = sizeof(float)*2*8; ofs2 = sizeof(float)*4*8; ofs3 = sizeof(float)*6*8; i = 8; #define OA(fblock,ofs) *(float*)((char*)fblock + ofs) do { fr0 = OA(fblock, 0); fr1 = OA(fblock,ofs1); fr2 = OA(fblock,ofs2); fr3 = OA(fblock,ofs3); ftrv(); OA(fblock,0 ) = fr0; OA(fblock,ofs1) = fr1; OA(fblock,ofs2) = fr2; OA(fblock,ofs3) = fr3; fblock++; } while(--i); fblock-=8; load_matrix(odd_table); i=8; do { float t0,t1,t2,t3; t0 = OA(fblock, 0); /* [8*0] */ t1 = OA(fblock,ofs1); /* [8*2] */ t2 = OA(fblock,ofs2); /* [8*4] */ t3 = OA(fblock,ofs3); /* [8*6] */ fblock+=8; fr0 = OA(fblock, 0); /* [8*1] */ fr1 = OA(fblock,ofs1); /* [8*3] */ fr2 = OA(fblock,ofs2); /* [8*5] */ fr3 = OA(fblock,ofs3); /* [8*7] */ fblock+=-8+1; ftrv(); block[8*0] = DESCALE(t0 + fr0,3); block[8*7] = DESCALE(t0 - fr0,3); block[8*1] = DESCALE(t1 + fr1,3); block[8*6] = DESCALE(t1 - fr1,3); block[8*2] = DESCALE(t2 + fr2,3); block[8*5] = DESCALE(t2 - fr2,3); block[8*3] = DESCALE(t3 + fr3,3); block[8*4] = DESCALE(t3 - fr3,3); block++; } while(--i); #if defined(__SH4__) #error "FIXME!! change to double" #endif }
void j_rev_dct(DCTBLOCK data) { INT32 tmp0, tmp1, tmp2, tmp3; INT32 tmp10, tmp11, tmp12, tmp13; INT32 z1, z2, z3, z4, z5; register DCTELEM *dataptr; int rowctr; /* Pass 1: process rows. */ /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ /* furthermore, we scale the results by 2**PASS1_BITS. */ dataptr = data; for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ z2 = (INT32) dataptr[2]; z3 = (INT32) dataptr[6]; z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); tmp0 = ((INT32) dataptr[0] + (INT32) dataptr[4]) << CONST_BITS; tmp1 = ((INT32) dataptr[0] - (INT32) dataptr[4]) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ tmp0 = (INT32) dataptr[7]; tmp1 = (INT32) dataptr[5]; tmp2 = (INT32) dataptr[3]; tmp3 = (INT32) dataptr[1]; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. */ dataptr = data; for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ z2 = (INT32) dataptr[DCTSIZE*2]; z3 = (INT32) dataptr[DCTSIZE*6]; z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); tmp0 = ((INT32) dataptr[DCTSIZE*0] + (INT32) dataptr[DCTSIZE*4]) << CONST_BITS; tmp1 = ((INT32) dataptr[DCTSIZE*0] - (INT32) dataptr[DCTSIZE*4]) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ tmp0 = (INT32) dataptr[DCTSIZE*7]; tmp1 = (INT32) dataptr[DCTSIZE*5]; tmp2 = (INT32) dataptr[DCTSIZE*3]; tmp3 = (INT32) dataptr[DCTSIZE*1]; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+1); dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+1); dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+1); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+1); dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+1); dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+1); dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+1); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+1); dataptr++; /* advance pointer to next column */ } }
void ff_j_rev_dct4(DCTBLOCK data) { int32_t tmp0, tmp1, tmp2, tmp3; int32_t tmp10, tmp11, tmp12, tmp13; int32_t z1; int32_t d0, d2, d4, d6; register DCTELEM *dataptr; int rowctr; /* Pass 1: process rows. */ /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ /* furthermore, we scale the results by 2**PASS1_BITS. */ data[0] += 4; dataptr = data; for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { /* Due to quantization, we will usually find that many of the input * coefficients are zero, especially the AC terms. We can exploit this * by short-circuiting the IDCT calculation for any row in which all * the AC terms are zero. In that case each output is equal to the * DC coefficient (with scale factor as needed). * With typical images and quantization tables, half or more of the * row DCT calculations can be simplified this way. */ register int *idataptr = (int*)dataptr; d0 = dataptr[0]; d2 = dataptr[1]; d4 = dataptr[2]; d6 = dataptr[3]; if ((d2 | d4 | d6) == 0) { /* AC terms all zero */ if (d0) { /* Compute a 32 bit value to assign. */ DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); idataptr[0] = v; idataptr[1] = v; } dataptr += DCTSTRIDE; /* advance pointer to next row */ continue; } /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ if (d6) { if (d2) { /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ z1 = MULTIPLY(d2 + d6, FIX_0_541196100); tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); tmp0 = (d0 + d4) << CONST_BITS; tmp1 = (d0 - d4) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; } else { /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ tmp2 = MULTIPLY(-d6, FIX_1_306562965); tmp3 = MULTIPLY(d6, FIX_0_541196100); tmp0 = (d0 + d4) << CONST_BITS; tmp1 = (d0 - d4) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; } } else { if (d2) { /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ tmp2 = MULTIPLY(d2, FIX_0_541196100); tmp3 = MULTIPLY(d2, FIX_1_306562965); tmp0 = (d0 + d4) << CONST_BITS; tmp1 = (d0 - d4) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; } else { /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ tmp10 = tmp13 = (d0 + d4) << CONST_BITS; tmp11 = tmp12 = (d0 - d4) << CONST_BITS; } } /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); dataptr += DCTSTRIDE; /* advance pointer to next row */ } /* Pass 2: process columns. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. */ dataptr = data; for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { /* Columns of zeroes can be exploited in the same way as we did with rows. * However, the row calculation has created many nonzero AC terms, so the * simplification applies less often (typically 5% to 10% of the time). * On machines with very fast multiplication, it's possible that the * test takes more time than it's worth. In that case this section * may be commented out. */ d0 = dataptr[DCTSTRIDE*0]; d2 = dataptr[DCTSTRIDE*1]; d4 = dataptr[DCTSTRIDE*2]; d6 = dataptr[DCTSTRIDE*3]; /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ if (d6) { if (d2) { /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ z1 = MULTIPLY(d2 + d6, FIX_0_541196100); tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); tmp0 = (d0 + d4) << CONST_BITS; tmp1 = (d0 - d4) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; } else { /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ tmp2 = MULTIPLY(-d6, FIX_1_306562965); tmp3 = MULTIPLY(d6, FIX_0_541196100); tmp0 = (d0 + d4) << CONST_BITS; tmp1 = (d0 - d4) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; } } else { if (d2) { /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ tmp2 = MULTIPLY(d2, FIX_0_541196100); tmp3 = MULTIPLY(d2, FIX_1_306562965); tmp0 = (d0 + d4) << CONST_BITS; tmp1 = (d0 - d4) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; } else { /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ tmp10 = tmp13 = (d0 + d4) << CONST_BITS; tmp11 = tmp12 = (d0 - d4) << CONST_BITS; } } /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); dataptr++; /* advance pointer to next column */ } }
void ff_idct_sh4(int16_t *block) { DEFREG; int i; float tblock[8*8],*fblock; int ofs1,ofs2,ofs3; int fpscr; fp_single_enter(fpscr); /* row */ /* even part */ load_matrix(even_table); fblock = tblock+4; i = 8; do { fr0 = block[0]; fr1 = block[2]; fr2 = block[4]; fr3 = block[6]; block+=8; ftrv(); *--fblock = fr3; *--fblock = fr2; *--fblock = fr1; *--fblock = fr0; fblock+=8+4; } while(--i); block-=8*8; fblock-=8*8+4; load_matrix(odd_table); i = 8; do { float t0,t1,t2,t3; fr0 = block[1]; fr1 = block[3]; fr2 = block[5]; fr3 = block[7]; block+=8; ftrv(); t0 = *fblock++; t1 = *fblock++; t2 = *fblock++; t3 = *fblock++; fblock+=4; *--fblock = t0 - fr0; *--fblock = t1 - fr1; *--fblock = t2 - fr2; *--fblock = t3 - fr3; *--fblock = t3 + fr3; *--fblock = t2 + fr2; *--fblock = t1 + fr1; *--fblock = t0 + fr0; fblock+=8; } while(--i); block-=8*8; fblock-=8*8; /* col */ /* even part */ load_matrix(even_table); ofs1 = sizeof(float)*2*8; ofs2 = sizeof(float)*4*8; ofs3 = sizeof(float)*6*8; i = 8; #define OA(fblock,ofs) *(float*)((char*)fblock + ofs) do { fr0 = OA(fblock, 0); fr1 = OA(fblock,ofs1); fr2 = OA(fblock,ofs2); fr3 = OA(fblock,ofs3); ftrv(); OA(fblock,0 ) = fr0; OA(fblock,ofs1) = fr1; OA(fblock,ofs2) = fr2; OA(fblock,ofs3) = fr3; fblock++; } while(--i); fblock-=8; load_matrix(odd_table); i=8; do { float t0,t1,t2,t3; t0 = OA(fblock, 0); /* [8*0] */ t1 = OA(fblock,ofs1); /* [8*2] */ t2 = OA(fblock,ofs2); /* [8*4] */ t3 = OA(fblock,ofs3); /* [8*6] */ fblock+=8; fr0 = OA(fblock, 0); /* [8*1] */ fr1 = OA(fblock,ofs1); /* [8*3] */ fr2 = OA(fblock,ofs2); /* [8*5] */ fr3 = OA(fblock,ofs3); /* [8*7] */ fblock+=-8+1; ftrv(); block[8*0] = DESCALE(t0 + fr0,3); block[8*7] = DESCALE(t0 - fr0,3); block[8*1] = DESCALE(t1 + fr1,3); block[8*6] = DESCALE(t1 - fr1,3); block[8*2] = DESCALE(t2 + fr2,3); block[8*5] = DESCALE(t2 - fr2,3); block[8*3] = DESCALE(t3 + fr3,3); block[8*4] = DESCALE(t3 - fr3,3); block++; } while(--i); fp_single_leave(fpscr); }
GLOBAL void jpeg_fdct_islow( void ) { #ifdef PROFILING /* Profiling variables. Remove for actual WCET analyses. */ int iters_ctr1 = 0, min_ctr1 = 100000, max_ctr1 = 0; int iters_ctr2 = 0, min_ctr2 = 100000, max_ctr2 = 0; #endif INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; INT32 tmp10, tmp11, tmp12, tmp13; INT32 z1, z2, z3, z4, z5; DCTELEM *dataptr; int ctr; SHIFT_TEMPS /* Pass 1: process rows. */ /* Note results are scaled up by sqrt(8) compared to a true DCT; */ /* furthermore, we scale the results by 2**PASS1_BITS. */ dataptr = data; #ifdef PROFILING iters_ctr1 = 0; #endif _Pragma("loopbound min 8 max 8") for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { #ifdef PROFILING iters_ctr1++; #endif tmp0 = dataptr[0] + dataptr[7]; tmp7 = dataptr[0] - dataptr[7]; tmp1 = dataptr[1] + dataptr[6]; tmp6 = dataptr[1] - dataptr[6]; tmp2 = dataptr[2] + dataptr[5]; tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS); dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS); z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); dataptr += DCTSIZE; /* advance pointer to next row */ } #ifdef PROFILING if ( iters_ctr1 < min_ctr1 ) min_ctr1 = iters_ctr1; if ( iters_ctr1 > max_ctr1 ) max_ctr1 = iters_ctr1; #endif dataptr = data; #ifdef PROFILING iters_ctr2 = 0; #endif _Pragma("loopbound min 8 max 8") for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { #ifdef PROFILING iters_ctr2++; #endif tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS); z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } #ifdef PROFILING if ( iters_ctr2 < min_ctr2 ) min_ctr2 = iters_ctr2; if ( iters_ctr2 > max_ctr2 ) max_ctr2 = iters_ctr2; #endif #ifdef PROFILING printf( "ctr1-loop: [%d, %d]\n", min_ctr1, max_ctr1 ); printf( "ctr2-loop: [%d, %d]\n", min_ctr2, max_ctr2 ); #endif }
static CvStatus CV_STDCALL icvImgToObs_DCT_32f_C1R( float * img, int imgStep, CvSize roi, float *obs, CvSize dctSize, CvSize obsSize, CvSize delta ) { /* dct transform matrices: horizontal and vertical */ work_t tab_x[MAX_DCT_SIZE * MAX_DCT_SIZE / 2 + 2]; work_t tab_y[MAX_DCT_SIZE * MAX_DCT_SIZE / 2 + 2]; /* temporary buffers for dct */ work_t temp0[MAX_DCT_SIZE * 4]; work_t temp1[MAX_DCT_SIZE * 4]; work_t *buffer = 0; work_t *buf_limit; double s; int y; int Nx, Ny; int n1 = dctSize.height, m1 = n1 / 2; int n2 = dctSize.width, m2 = n2 / 2; if( !img || !obs ) return CV_NULLPTR_ERR; if( roi.width <= 0 || roi.height <= 0 ) return CV_BADSIZE_ERR; if( delta.width <= 0 || delta.height <= 0 ) return CV_BADRANGE_ERR; if( obsSize.width <= 0 || dctSize.width < obsSize.width || obsSize.height <= 0 || dctSize.height < obsSize.height ) return CV_BADRANGE_ERR; if( dctSize.width > MAX_DCT_SIZE || dctSize.height > MAX_DCT_SIZE ) return CV_BADRANGE_ERR; Nx = (roi.width - dctSize.width + delta.width) / delta.width; Ny = (roi.height - dctSize.height + delta.height) / delta.height; if( Nx <= 0 || Ny <= 0 ) return CV_BADRANGE_ERR; buffer = (work_t *)cvAlloc( roi.width * obsSize.height * sizeof( buffer[0] )); if( !buffer ) return CV_OUTOFMEM_ERR; icvCalcDCTMatrix( tab_x, dctSize.width ); icvCalcDCTMatrix( tab_y, dctSize.height ); buf_limit = buffer + obsSize.height * roi.width; imgStep /= sizeof(img[0]); for( y = 0; y < Ny; y++, img += delta.height * imgStep ) { int x, i, j, k; work_t k0 = 0; /* do transfroms for each column. Calc only first obsSize.height DCT coefficients */ for( x = 0; x < roi.width; x++ ) { float is = 0; work_t *buf = buffer + x; work_t *tab = tab_y + 2; if( n1 & 1 ) { is = img[x + m1 * imgStep]; k0 = ((work_t) is) * tab[-1]; } /* first coefficient */ for( j = 0; j < m1; j++ ) { float t0 = img[x + j * imgStep]; float t1 = img[x + (n1 - 1 - j) * imgStep]; float t2 = t0 + t1; t0 -= t1; temp0[j] = (work_t) t2; is += t2; temp1[j] = (work_t) t0; } buf[0] = DESCALE( is * tab[-2], PASS1_SHIFT ); if( (buf += roi.width) >= buf_limit ) continue; /* other coefficients */ for( ;; ) { s = 0; for( k = 0; k < m1; k++ ) s += temp1[k] * tab[k]; buf[0] = DESCALE( s, PASS1_SHIFT ); if( (buf += roi.width) >= buf_limit ) break; tab += m1; s = 0; if( n1 & 1 ) { k0 = -k0; s = k0; } for( k = 0; k < m1; k++ ) s += temp0[k] * tab[k]; buf[0] = DESCALE( s, PASS1_SHIFT ); tab += m1; if( (buf += roi.width) >= buf_limit ) break; } } k0 = 0; /* do transforms for rows. */ for( x = 0; x + dctSize.width <= roi.width; x += delta.width ) { for( i = 0; i < obsSize.height; i++ ) { work_t *buf = buffer + x + roi.width * i; work_t *tab = tab_x + 2; float *obs_limit = obs + obsSize.width; s = 0; if( n2 & 1 ) { s = buf[m2]; k0 = (work_t) (s * tab[-1]); } /* first coefficient */ for( j = 0; j < m2; j++ ) { work_t t0 = buf[j]; work_t t1 = buf[n2 - 1 - j]; work_t t2 = t0 + t1; t0 -= t1; temp0[j] = (work_t) t2; s += t2; temp1[j] = (work_t) t0; } *obs++ = (float) DESCALE( s * tab[-2], PASS2_SHIFT ); if( obs == obs_limit ) continue; /* other coefficients */ for( ;; ) { s = 0; for( k = 0; k < m2; k++ ) s += temp1[k] * tab[k]; obs[0] = (float) DESCALE( s, PASS2_SHIFT ); if( ++obs == obs_limit ) break; tab += m2; s = 0; if( n2 & 1 ) { k0 = -k0; s = k0; } for( k = 0; k < m2; k++ ) s += temp0[k] * tab[k]; obs[0] = (float) DESCALE( s, PASS2_SHIFT ); tab += m2; if( ++obs == obs_limit ) break; } } } } cvFree( &buffer ); return CV_NO_ERR; }
METHODDEF void start_pass (j_decompress_ptr cinfo) { my_idct_ptr idct = (my_idct_ptr) cinfo->idct; int ci, i; jpeg_component_info *compptr; int method = 0; inverse_DCT_method_ptr method_ptr = NULL; JQUANT_TBL * qtbl; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { /* Select the proper IDCT routine for this component's scaling */ switch (compptr->DCT_scaled_size) { #ifdef IDCT_SCALING_SUPPORTED case 1: method_ptr = jpeg_idct_1x1; method = JDCT_ISLOW; /* jidctred uses islow-style table */ break; case 2: method_ptr = jpeg_idct_2x2; method = JDCT_ISLOW; /* jidctred uses islow-style table */ break; case 4: method_ptr = jpeg_idct_4x4; method = JDCT_ISLOW; /* jidctred uses islow-style table */ break; #endif case DCTSIZE: switch (cinfo->dct_method) { #ifdef DCT_ISLOW_SUPPORTED case JDCT_ISLOW: method_ptr = jpeg_idct_islow; method = JDCT_ISLOW; break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: method_ptr = jpeg_idct_ifast; method = JDCT_IFAST; break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: method_ptr = jpeg_idct_float; method = JDCT_FLOAT; break; #endif default: ERREXIT(cinfo, JERR_NOT_COMPILED); break; } break; default: ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size); break; } idct->pub.inverse_DCT[ci] = method_ptr; /* Create multiplier table from quant table. * However, we can skip this if the component is uninteresting * or if we already built the table. Also, if no quant table * has yet been saved for the component, we leave the * multiplier table all-zero; we'll be reading zeroes from the * coefficient controller's buffer anyway. */ if (! compptr->component_needed || idct->cur_method[ci] == method) continue; qtbl = compptr->quant_table; if (qtbl == NULL) /* happens if no data yet for component */ continue; idct->cur_method[ci] = method; switch (method) { #ifdef PROVIDE_ISLOW_TABLES case JDCT_ISLOW: { /* For LL&M IDCT method, multipliers are equal to raw quantization * coefficients, but are stored in natural order as ints. */ ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table; for (i = 0; i < DCTSIZE2; i++) { ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[jpeg_zigzag_order[i]]; } } break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: { /* For AA&N IDCT method, multipliers are equal to quantization * coefficients scaled by scalefactor[row]*scalefactor[col], where * scalefactor[0] = 1 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 * For integer operation, the multiplier table is to be scaled by * IFAST_SCALE_BITS. The multipliers are stored in natural order. */ IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table; #define CONST_BITS 14 static const INT16 aanscales[DCTSIZE2] = { /* precomputed values scaled up by 14 bits */ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 }; SHIFT_TEMPS for (i = 0; i < DCTSIZE2; i++) { ifmtbl[i] = (IFAST_MULT_TYPE) DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[jpeg_zigzag_order[i]], (INT32) aanscales[i]), CONST_BITS-IFAST_SCALE_BITS); } } break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: { /* For float AA&N IDCT method, multipliers are equal to quantization * coefficients scaled by scalefactor[row]*scalefactor[col], where * scalefactor[0] = 1 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 * The multipliers are stored in natural order. */ FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table; int row, col; static const double aanscalefactor[DCTSIZE] = { 1.0, 1.387039845, 1.306562965, 1.175875602, 1.0, 0.785694958, 0.541196100, 0.275899379 }; i = 0; for (row = 0; row < DCTSIZE; row++) { for (col = 0; col < DCTSIZE; col++) { fmtbl[i] = (FLOAT_MULT_TYPE) ((double) qtbl->quantval[jpeg_zigzag_order[i]] * aanscalefactor[row] * aanscalefactor[col]); i++; } } } break; #endif default: ERREXIT(cinfo, JERR_NOT_COMPILED); break; } } }
METHODDEF void start_input_pass (j_decompress_ptr cinfo) { my_idct_ptr idct = (my_idct_ptr) cinfo->idct; int ci, qtblno, i; jpeg_component_info *compptr; JQUANT_TBL * qtbl; for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; qtblno = compptr->quant_tbl_no; /* Make sure specified quantization table is present */ if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || cinfo->quant_tbl_ptrs[qtblno] == NULL) ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); qtbl = cinfo->quant_tbl_ptrs[qtblno]; /* Create multiplier table from quant table, unless we already did so. */ if (compptr->dct_table != NULL) continue; switch (idct->real_method[compptr->component_index]) { #ifdef PROVIDE_ISLOW_TABLES case JDCT_ISLOW: { /* For LL&M IDCT method, multipliers are equal to raw quantization * coefficients, but are stored in natural order as ints. */ ISLOW_MULT_TYPE * ismtbl; compptr->dct_table = (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, DCTSIZE2 * SIZEOF(ISLOW_MULT_TYPE)); ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table; for (i = 0; i < DCTSIZE2; i++) { ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[ZIG[i]]; } } break; #endif #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: { /* For AA&N IDCT method, multipliers are equal to quantization * coefficients scaled by scalefactor[row]*scalefactor[col], where * scalefactor[0] = 1 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 * For integer operation, the multiplier table is to be scaled by * IFAST_SCALE_BITS. The multipliers are stored in natural order. */ IFAST_MULT_TYPE * ifmtbl; #define CONST_BITS 14 static const INT16 aanscales[DCTSIZE2] = { /* precomputed values scaled up by 14 bits */ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 }; SHIFT_TEMPS compptr->dct_table = (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, DCTSIZE2 * SIZEOF(IFAST_MULT_TYPE)); ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table; for (i = 0; i < DCTSIZE2; i++) { ifmtbl[i] = (IFAST_MULT_TYPE) DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[ZIG[i]], (INT32) aanscales[i]), CONST_BITS-IFAST_SCALE_BITS); } } break; #endif #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: { /* For float AA&N IDCT method, multipliers are equal to quantization * coefficients scaled by scalefactor[row]*scalefactor[col], where * scalefactor[0] = 1 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 * The multipliers are stored in natural order. */ FLOAT_MULT_TYPE * fmtbl; int row, col; static const double aanscalefactor[DCTSIZE] = { 1.0, 1.387039845, 1.306562965, 1.175875602, 1.0, 0.785694958, 0.541196100, 0.275899379 }; compptr->dct_table = (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, DCTSIZE2 * SIZEOF(FLOAT_MULT_TYPE)); fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table; i = 0; for (row = 0; row < DCTSIZE; row++) { for (col = 0; col < DCTSIZE; col++) { fmtbl[i] = (FLOAT_MULT_TYPE) ((double) qtbl->quantval[ZIG[i]] * aanscalefactor[row] * aanscalefactor[col]); i++; } } } break; #endif default: ERREXIT(cinfo, JERR_NOT_COMPILED); break; } } }
jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { INT32 tmp0, tmp1, tmp2, tmp3; INT32 tmp10, tmp11, tmp12, tmp13; INT32 z1, z2, z3, z4, z5; JCOEFPTR inptr; ISLOW_MULT_TYPE * quantptr; int * wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[DCTSIZE2]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ /* furthermore, we scale the results by 2**PASS1_BITS. */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) { /* Due to quantization, we will usually find that many of the input * coefficients are zero, especially the AC terms. We can exploit this * by short-circuiting the IDCT calculation for any column in which all * the AC terms are zero. In that case each output is equal to the * DC coefficient (with scale factor as needed). * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { /* AC terms all zero */ int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; wsptr[DCTSIZE*3] = dcval; wsptr[DCTSIZE*4] = dcval; wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; inptr++; /* advance pointers to next column */ quantptr++; wsptr++; continue; } /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } /* Pass 2: process rows from work array, store into output array. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. */ wsptr = workspace; for (ctr = 0; ctr < DCTSIZE; ctr++) { outptr = output_buf[ctr] + output_col; /* Rows of zeroes can be exploited in the same way as we did with columns. * However, the column calculation has created many nonzero AC terms, so * the simplification applies less often (typically 5% to 10% of the time). * On machines with very fast multiplication, it's possible that the * test takes more time than it's worth. In that case this section * may be commented out. */ #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; outptr[3] = dcval; outptr[4] = dcval; outptr[5] = dcval; outptr[6] = dcval; outptr[7] = dcval; wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[6]; z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS; tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ tmp0 = (INT32) wsptr[7]; tmp1 = (INT32) wsptr[5]; tmp2 = (INT32) wsptr[3]; tmp3 = (INT32) wsptr[1]; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } }
GLOBAL void jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { INT32 tmp0, tmp2, tmp10, tmp12; INT32 z1, z2, z3, z4; JCOEFPTR inptr; ISLOW_MULT_TYPE * quantptr; int * wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[DCTSIZE*4]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { /* Don't bother to process column 4, because second pass won't use it */ if (ctr == DCTSIZE-4) continue; if ((inptr[DCTSIZE*1] | inptr[DCTSIZE*2] | inptr[DCTSIZE*3] | inptr[DCTSIZE*5] | inptr[DCTSIZE*6] | inptr[DCTSIZE*7]) == 0) { /* AC terms all zero; we need not examine term 4 for 4x4 output */ int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; wsptr[DCTSIZE*3] = dcval; continue; } /* Even part */ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp0 <<= (CONST_BITS+1); z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865); tmp10 = tmp0 + tmp2; tmp12 = tmp0 - tmp2; /* Odd part */ z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ /* Final output stage */ wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1); wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1); } /* Pass 2: process 4 rows from work array, store into output array. */ wsptr = workspace; for (ctr = 0; ctr < 4; ctr++) { outptr = output_buf[ctr] + output_col; /* It's not clear whether a zero row test is worthwhile here ... */ #ifndef NO_ZERO_ROW_TEST if ((wsptr[1] | wsptr[2] | wsptr[3] | wsptr[5] | wsptr[6] | wsptr[7]) == 0) { /* AC terms all zero */ JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; outptr[3] = dcval; wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif /* Even part */ tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1); tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065) + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865); tmp10 = tmp0 + tmp2; tmp12 = tmp0 - tmp2; /* Odd part */ z1 = (INT32) wsptr[7]; z2 = (INT32) wsptr[5]; z3 = (INT32) wsptr[3]; z4 = (INT32) wsptr[1]; tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ /* Final output stage */ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, CONST_BITS+PASS1_BITS+3+1) & RANGE_MASK]; outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, CONST_BITS+PASS1_BITS+3+1) & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, CONST_BITS+PASS1_BITS+3+1) & RANGE_MASK]; outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0, CONST_BITS+PASS1_BITS+3+1) & RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } }
void idct_sh4(DCTELEM *block) { DEFREG; int i; float tblock[8*8],*fblock; /* row */ /* even part */ load_matrix(even_table); fblock = tblock; i = 8; do { fr0 = block[0]; fr1 = block[2]; fr2 = block[4]; fr3 = block[6]; block+=8; ftrv(); fblock[0] = fr0; fblock[2] = fr1; fblock[4] = fr2; fblock[6] = fr3; fblock+=8; } while(--i); block-=8*8; fblock-=8*8; load_matrix(odd_table); i = 8; do { float t0,t1,t2,t3; fr0 = block[1]; fr1 = block[3]; fr2 = block[5]; fr3 = block[7]; block+=8; ftrv(); t0 = fblock[0]; t1 = fblock[2]; t2 = fblock[4]; t3 = fblock[6]; fblock[0] = t0 + fr0; fblock[7] = t0 - fr0; fblock[1] = t1 + fr1; fblock[6] = t1 - fr1; fblock[2] = t2 + fr2; fblock[5] = t2 - fr2; fblock[3] = t3 + fr3; fblock[4] = t3 - fr3; fblock+=8; } while(--i); block-=8*8; fblock-=8*8; /* col */ /* even part */ load_matrix(even_table); i = 8; do { fr0 = fblock[8*0]; fr1 = fblock[8*2]; fr2 = fblock[8*4]; fr3 = fblock[8*6]; ftrv(); fblock[8*0] = fr0; fblock[8*2] = fr1; fblock[8*4] = fr2; fblock[8*6] = fr3; fblock++; } while(--i); fblock-=8; load_matrix(odd_table); i=8; do { float t0,t1,t2,t3; fr0 = fblock[8*1]; fr1 = fblock[8*3]; fr2 = fblock[8*5]; fr3 = fblock[8*7]; ftrv(); t0 = fblock[8*0]; t1 = fblock[8*2]; t2 = fblock[8*4]; t3 = fblock[8*6]; fblock++; block[8*0] = DESCALE(t0 + fr0,3); block[8*7] = DESCALE(t0 - fr0,3); block[8*1] = DESCALE(t1 + fr1,3); block[8*6] = DESCALE(t1 - fr1,3); block[8*2] = DESCALE(t2 + fr2,3); block[8*5] = DESCALE(t2 - fr2,3); block[8*3] = DESCALE(t3 + fr3,3); block[8*4] = DESCALE(t3 - fr3,3); block++; } while(--i); }
GLOBAL void jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { INT32 tmp0, tmp10, z1; JCOEFPTR inptr; ISLOW_MULT_TYPE * quantptr; int * wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[DCTSIZE*2]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { /* Don't bother to process columns 2,4,6 */ if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6) continue; if ((inptr[DCTSIZE*1] | inptr[DCTSIZE*3] | inptr[DCTSIZE*5] | inptr[DCTSIZE*7]) == 0) { /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */ int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; continue; } /* Even part */ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp10 = z1 << (CONST_BITS+2); /* Odd part */ z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); tmp0 = MULTIPLY(z1, - FIX_0_720959822); /* sqrt(2) * (c7-c5+c3-c1) */ z1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */ z1 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); tmp0 += MULTIPLY(z1, - FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ /* Final output stage */ wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2); wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2); } /* Pass 2: process 2 rows from work array, store into output array. */ wsptr = workspace; for (ctr = 0; ctr < 2; ctr++) { outptr = output_buf[ctr] + output_col; /* It's not clear whether a zero row test is worthwhile here ... */ #ifndef NO_ZERO_ROW_TEST if ((wsptr[1] | wsptr[3] | wsptr[5] | wsptr[7]) == 0) { /* AC terms all zero */ JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif /* Even part */ tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2); /* Odd part */ tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */ + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */ + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */ + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ /* Final output stage */ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0, CONST_BITS+PASS1_BITS+3+2) & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0, CONST_BITS+PASS1_BITS+3+2) & RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } }
ff_jpeg_fdct_islow (DCTELEM * data) { int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_fast32_t tmp10, tmp11, tmp12, tmp13; int_fast32_t z1, z2, z3, z4, z5; DCTELEM *dataptr; int ctr; row_fdct(data); /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } }
jpeg_fdct_islow (DCTELEM * data, boolean jpeg8) { INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; INT32 tmp10, tmp11, tmp12, tmp13; INT32 z1, z2, z3, z4, z5; DCTELEM *dataptr; int ctr, pass1_bits; /* set pass1_bits */ if (jpeg8) pass1_bits = 2; else pass1_bits = 1; SHIFT_TEMPS /* Pass 1: process rows. */ /* Note results are scaled up by sqrt(8) compared to a true DCT; */ /* furthermore, we scale the results by 2**PASS1_BITS. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { tmp0 = dataptr[0] + dataptr[7]; tmp7 = dataptr[0] - dataptr[7]; tmp1 = dataptr[1] + dataptr[6]; tmp6 = dataptr[1] - dataptr[6]; tmp2 = dataptr[2] + dataptr[5]; tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << pass1_bits); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << pass1_bits); if (jpeg8){ z1 = MULTIPLY16C16(tmp12 + tmp13, FIX_0_541196100); dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY16C16(tmp13, FIX_0_765366865), CONST_BITS-pass1_bits); dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY16C16(tmp12, - FIX_1_847759065), CONST_BITS-pass1_bits); } else { z1 = (tmp12 + tmp13) * FIX_0_541196100; dataptr[2] = (DCTELEM) DESCALE(z1 + ( tmp13 * FIX_0_765366865), CONST_BITS-pass1_bits); dataptr[6] = (DCTELEM) DESCALE(z1 + ( tmp12 * ( - FIX_1_847759065)), CONST_BITS-pass1_bits); } /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; if (jpeg8) { z5 = MULTIPLY16C16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp4 = MULTIPLY16C16(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY16C16(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY16C16(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY16C16(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY16C16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY16C16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY16C16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY16C16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ } else { z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */ tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = z1 * (- FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = z2 * (- FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = z3 * (- FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = z4 * (- FIX_0_390180644); /* sqrt(2) * (c5-c3) */ } z3 += z5; z4 += z5; dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-pass1_bits); dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-pass1_bits); dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-pass1_bits); dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-pass1_bits); dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, pass1_bits); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, pass1_bits); if (jpeg8) { z1 = MULTIPLY16C16(tmp12 + tmp13, FIX_0_541196100); dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY16C16(tmp13, FIX_0_765366865), CONST_BITS+pass1_bits); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY16C16(tmp12, - FIX_1_847759065), CONST_BITS+pass1_bits); } else { z1 = (tmp12 + tmp13) * FIX_0_541196100; dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + (tmp13 * FIX_0_765366865), CONST_BITS+pass1_bits); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + (tmp12 * (- FIX_1_847759065)), CONST_BITS+pass1_bits); } /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; if (jpeg8) { z5 = MULTIPLY16C16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp4 = MULTIPLY16C16(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY16C16(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY16C16(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY16C16(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY16C16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY16C16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY16C16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY16C16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ } else { z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */ tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = z1 * ( - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = z2 * ( - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = z3 * ( - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = z4 * ( - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ } z3 += z5; z4 += z5; dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+pass1_bits); dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+pass1_bits); dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+pass1_bits); dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+pass1_bits); dataptr++; /* advance pointer to next column */ } }
jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FAST_FLOAT tmp10, tmp11, tmp12, tmp13; FAST_FLOAT z5, z10, z11, z12, z13; JCOEFPTR inptr; FLOAT_MULT_TYPE * quantptr; FAST_FLOAT * wsptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) { /* Due to quantization, we will usually find that many of the input * coefficients are zero, especially the AC terms. We can exploit this * by short-circuiting the IDCT calculation for any column in which all * the AC terms are zero. In that case each output is equal to the * DC coefficient (with scale factor as needed). * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { /* AC terms all zero */ FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; wsptr[DCTSIZE*3] = dcval; wsptr[DCTSIZE*4] = dcval; wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; inptr++; /* advance pointers to next column */ quantptr++; wsptr++; continue; } /* Even part */ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); tmp10 = tmp0 + tmp2; /* phase 3 */ tmp11 = tmp0 - tmp2; tmp13 = tmp1 + tmp3; /* phases 5-3 */ tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */ tmp0 = tmp10 + tmp13; /* phase 2 */ tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; /* Odd part */ tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); z13 = tmp6 + tmp5; /* phase 6 */ z10 = tmp6 - tmp5; z11 = tmp4 + tmp7; z12 = tmp4 - tmp7; tmp7 = z11 + z13; /* phase 5 */ tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */ z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; wsptr[DCTSIZE*0] = tmp0 + tmp7; wsptr[DCTSIZE*7] = tmp0 - tmp7; wsptr[DCTSIZE*1] = tmp1 + tmp6; wsptr[DCTSIZE*6] = tmp1 - tmp6; wsptr[DCTSIZE*2] = tmp2 + tmp5; wsptr[DCTSIZE*5] = tmp2 - tmp5; wsptr[DCTSIZE*4] = tmp3 + tmp4; wsptr[DCTSIZE*3] = tmp3 - tmp4; inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } /* Pass 2: process rows from work array, store into output array. */ /* Note that we must descale the results by a factor of 8 == 2**3. */ wsptr = workspace; for (ctr = 0; ctr < DCTSIZE; ctr++) { outptr = output_buf[ctr] + output_col; /* Rows of zeroes can be exploited in the same way as we did with columns. * However, the column calculation has created many nonzero AC terms, so * the simplification applies less often (typically 5% to 10% of the time). * And testing floats for zero is relatively expensive, so we don't bother. */ /* Even part */ tmp10 = wsptr[0] + wsptr[4]; tmp11 = wsptr[0] - wsptr[4]; tmp13 = wsptr[2] + wsptr[6]; tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13; tmp0 = tmp10 + tmp13; tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; /* Odd part */ z13 = wsptr[5] + wsptr[3]; z10 = wsptr[5] - wsptr[3]; z11 = wsptr[1] + wsptr[7]; z12 = wsptr[1] - wsptr[7]; tmp7 = z11 + z13; tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ tmp6 = tmp12 - tmp7; tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; /* Final output stage: scale down by a factor of 8 and range-limit */ outptr[0] = range_limit[(int) DESCALE((IJG_INT32) (tmp0 + tmp7), 3) & RANGE_MASK]; outptr[7] = range_limit[(int) DESCALE((IJG_INT32) (tmp0 - tmp7), 3) & RANGE_MASK]; outptr[1] = range_limit[(int) DESCALE((IJG_INT32) (tmp1 + tmp6), 3) & RANGE_MASK]; outptr[6] = range_limit[(int) DESCALE((IJG_INT32) (tmp1 - tmp6), 3) & RANGE_MASK]; outptr[2] = range_limit[(int) DESCALE((IJG_INT32) (tmp2 + tmp5), 3) & RANGE_MASK]; outptr[5] = range_limit[(int) DESCALE((IJG_INT32) (tmp2 - tmp5), 3) & RANGE_MASK]; outptr[4] = range_limit[(int) DESCALE((IJG_INT32) (tmp3 + tmp4), 3) & RANGE_MASK]; outptr[3] = range_limit[(int) DESCALE((IJG_INT32) (tmp3 - tmp4), 3) & RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } }
static void fdct(const unsigned short* in, short* out) { INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; INT32 tmp10, tmp11, tmp12, tmp13; INT32 z1, z2, z3, z4, z5; int ctr; // SHIFT_TEMPS int data[DCTSIZE * DCTSIZE]; int i, j; int* dataptr = data; for( i = 0; i < DCTSIZE; i++ ) { for( j = 0; j < DCTSIZE; j++ ) { int temp; temp = in[i*DCTSIZE + j]; dataptr[i*DCTSIZE + j] = temp; } } /* Pass 1: process rows. */ /* Note results are scaled up by sqrt(8) compared to a true DCT; */ /* furthermore, we scale the results by 2**PASS1_BITS. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { tmp0 = dataptr[0] + dataptr[7]; tmp7 = dataptr[0] - dataptr[7]; tmp1 = dataptr[1] + dataptr[6]; tmp6 = dataptr[1] - dataptr[6]; tmp2 = dataptr[2] + dataptr[5]; tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS); dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). * i0..i3 in the paper are tmp4..tmp7 here. */ z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } for( i = 0; i < DCTSIZE; i++ ) for( j = 0; j < DCTSIZE; j++ ) out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3; }
void FeDrone_inverse_dct8 ( int32_t *block ) { int32_t tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, z1, z2, z3, z4, z5, workspace[DCT8_SIZE2]; int32_t *ws_ptr = workspace; int32_t *out_ptr = block, *in_ptr = block, idx; for (idx = DCT8_SIZE; idx > 0; idx--, in_ptr++, ws_ptr++) { if (in_ptr[DCT8_SIZE * 1] == 0 && in_ptr[DCT8_SIZE * 2] == 0 && in_ptr[DCT8_SIZE * 3] == 0 && in_ptr[DCT8_SIZE * 4] == 0 && in_ptr[DCT8_SIZE * 5] == 0 && in_ptr[DCT8_SIZE * 6] == 0 && in_ptr[DCT8_SIZE * 7] == 0) { int32_t dcval = in_ptr[DCT8_SIZE * 0] << PASS1_BITS; ws_ptr[DCT8_SIZE * 0] = dcval; ws_ptr[DCT8_SIZE * 1] = dcval; ws_ptr[DCT8_SIZE * 2] = dcval; ws_ptr[DCT8_SIZE * 3] = dcval; ws_ptr[DCT8_SIZE * 4] = dcval; ws_ptr[DCT8_SIZE * 5] = dcval; ws_ptr[DCT8_SIZE * 6] = dcval; ws_ptr[DCT8_SIZE * 7] = dcval; continue; } z2 = in_ptr[DCT8_SIZE * 2]; z3 = in_ptr[DCT8_SIZE * 6]; z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); z2 = in_ptr[DCT8_SIZE * 0]; z3 = in_ptr[DCT8_SIZE * 4]; tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; tmp0 = in_ptr[DCT8_SIZE * 7]; tmp1 = in_ptr[DCT8_SIZE * 5]; tmp2 = in_ptr[DCT8_SIZE * 3]; tmp3 = in_ptr[DCT8_SIZE * 1]; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); tmp0 = MULTIPLY(tmp0, FIX_0_298631336); tmp1 = MULTIPLY(tmp1, FIX_2_053119869); tmp2 = MULTIPLY(tmp2, FIX_3_072711026); tmp3 = MULTIPLY(tmp3, FIX_1_501321110); z1 = MULTIPLY( z1, -FIX_0_899976223); z2 = MULTIPLY( z2, -FIX_2_562915447); z3 = MULTIPLY( z3, -FIX_1_961570560); z4 = MULTIPLY( z4, -FIX_0_390180644); z3 += z5; z4 += z5; tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; ws_ptr[DCT8_SIZE * 0] = DESCALE(tmp10 + tmp3, CONST_BITS - PASS1_BITS); ws_ptr[DCT8_SIZE * 7] = DESCALE(tmp10 - tmp3, CONST_BITS - PASS1_BITS); ws_ptr[DCT8_SIZE * 1] = DESCALE(tmp11 + tmp2, CONST_BITS - PASS1_BITS); ws_ptr[DCT8_SIZE * 6] = DESCALE(tmp11 - tmp2, CONST_BITS - PASS1_BITS); ws_ptr[DCT8_SIZE * 2] = DESCALE(tmp12 + tmp1, CONST_BITS - PASS1_BITS); ws_ptr[DCT8_SIZE * 5] = DESCALE(tmp12 - tmp1, CONST_BITS - PASS1_BITS); ws_ptr[DCT8_SIZE * 3] = DESCALE(tmp13 + tmp0, CONST_BITS - PASS1_BITS); ws_ptr[DCT8_SIZE * 4] = DESCALE(tmp13 - tmp0, CONST_BITS - PASS1_BITS); } for (ws_ptr = workspace, idx = 0; idx < DCT8_SIZE; idx++, ws_ptr += DCT8_SIZE, out_ptr += DCT8_SIZE) { z2 = ws_ptr[2]; z3 = ws_ptr[6]; z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); tmp0 = (ws_ptr[0] + ws_ptr[4]) << CONST_BITS; tmp1 = (ws_ptr[0] - ws_ptr[4]) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; tmp0 = ws_ptr[7]; tmp1 = ws_ptr[5]; tmp2 = ws_ptr[3]; tmp3 = ws_ptr[1]; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); tmp0 = MULTIPLY(tmp0, FIX_0_298631336); tmp1 = MULTIPLY(tmp1, FIX_2_053119869); tmp2 = MULTIPLY(tmp2, FIX_3_072711026); tmp3 = MULTIPLY(tmp3, FIX_1_501321110); z1 = MULTIPLY(z1, - FIX_0_899976223); z2 = MULTIPLY(z2, - FIX_2_562915447); z3 = MULTIPLY(z3, - FIX_1_961570560); z4 = MULTIPLY(z4, - FIX_0_390180644); z3 += z5; z4 += z5; tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; out_ptr[0] = (tmp10 + tmp3) >> (CONST_BITS + PASS1_BITS + 3); out_ptr[7] = (tmp10 - tmp3) >> (CONST_BITS + PASS1_BITS + 3); out_ptr[1] = (tmp11 + tmp2) >> (CONST_BITS + PASS1_BITS + 3); out_ptr[6] = (tmp11 - tmp2) >> (CONST_BITS + PASS1_BITS + 3); out_ptr[2] = (tmp12 + tmp1) >> (CONST_BITS + PASS1_BITS + 3); out_ptr[5] = (tmp12 - tmp1) >> (CONST_BITS + PASS1_BITS + 3); out_ptr[3] = (tmp13 + tmp0) >> (CONST_BITS + PASS1_BITS + 3); out_ptr[4] = (tmp13 - tmp0) >> (CONST_BITS + PASS1_BITS + 3); } }
static void idct(const short* in, unsigned short* out) { INT32 tmp0, tmp1, tmp2, tmp3; INT32 tmp10, tmp11, tmp12, tmp13; INT32 z1, z2, z3, z4, z5; int* wsptr; int* outptr; const short* inptr; int ctr; int workspace[DCTSIZE2]; /* buffers data between passes */ int data[DCTSIZE2]; // SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ /* furthermore, we scale the results by 2**PASS1_BITS. */ inptr = in; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) { /* Due to quantization, we will usually find that many of the input * coefficients are zero, especially the AC terms. We can exploit this * by short-circuiting the IDCT calculation for any column in which all * the AC terms are zero. In that case each output is equal to the * DC coefficient (with scale factor as needed). * With typical images and quantization tables, half or more of the * column DCT calculations can be simplified this way. */ if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0 ) { /* AC terms all zero */ int dcval = inptr[DCTSIZE*0] << PASS1_BITS; wsptr[DCTSIZE*0] = dcval; wsptr[DCTSIZE*1] = dcval; wsptr[DCTSIZE*2] = dcval; wsptr[DCTSIZE*3] = dcval; wsptr[DCTSIZE*4] = dcval; wsptr[DCTSIZE*5] = dcval; wsptr[DCTSIZE*6] = dcval; wsptr[DCTSIZE*7] = dcval; inptr++; /* advance pointers to next column */ wsptr++; continue; } /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ z2 = inptr[DCTSIZE*2]; z3 = inptr[DCTSIZE*6]; z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); z2 = inptr[DCTSIZE*0]; z3 = inptr[DCTSIZE*4]; tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ tmp0 = inptr[DCTSIZE*7]; tmp1 = inptr[DCTSIZE*5]; tmp2 = inptr[DCTSIZE*3]; tmp3 = inptr[DCTSIZE*1]; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); inptr++; /* advance pointers to next column */ wsptr++; } /* Pass 2: process rows from work array, store into output array. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. */ wsptr = workspace; outptr = data; for (ctr = 0; ctr < DCTSIZE; ctr++) { /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[6]; z1 = MULTIPLY(z2 + z3, FIX_0_541196100); tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS; tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS; tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* Odd part per figure 8; the matrix is unitary and hence its * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ tmp0 = (INT32) wsptr[7]; tmp1 = (INT32) wsptr[5]; tmp2 = (INT32) wsptr[3]; tmp3 = (INT32) wsptr[1]; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; z3 = tmp0 + tmp2; z4 = tmp1 + tmp3; z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z3 += z5; z4 += z5; tmp0 += z1 + z3; tmp1 += z2 + z4; tmp2 += z2 + z3; tmp3 += z1 + z4; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 ); outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 ); outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 ); outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 ); outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 ); outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 ); outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 ); outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 ); wsptr += DCTSIZE; /* advance pointer to next row */ outptr += DCTSIZE; } for(ctr = 0; ctr < DCTSIZE2; ctr++) out[ctr] = data[ctr]; }