void dct_func(smaller_block *out, smaller_block *inp1) { // smaller_block output_block; // pass through all picture information to avoid having to create an extra channel for it out->is_cb = inp1->is_cb; out->is_cr = inp1->is_cr; out->block_id = inp1->block_id; out->width = inp1->width; out->height = inp1->height; #ifdef VERBOSE printf("dct block #%d\n", inp1->block_id); #endif #pragma ForSyDe begin dct_func dct3(inp1->content, out->content); #pragma ForSyDe end // out[0] = output_block; }
int jpeg_encode(unsigned char* in_buf, unsigned char* out_buf) { unsigned char block[BLOCK_SIZE * BLOCK_SIZE]; short block_o[BLOCK_SIZE * BLOCK_SIZE]; init_buf(out_buf); huffman_start(IMAGE_HEIGHT, IMAGE_WIDTH, Y_IMAGE, quality); unsigned y, x; for (y = 0; y < IMAGE_HEIGHT; y += 8) { for (x = 0; x < IMAGE_WIDTH; x += 8) { get_block(x, y, 8, 8, in_buf, block); dct3(block, block_o); block_o[0] -= 1024; huffman_encode(HUFFMAN_CTX_Y, block_o); } } huffman_stop(); return get_size(); }
/* * * \brief Perform real-valued forward modulation of the time domain * data of timeIn and stores the real part of the subband * samples in rSubband * */ static void sbrForwardModulationLP (const float *timeIn, float *rSubband, HANDLE_SBR_QMF_FILTER_BANK qmfBank ) { int i, L, M; COUNT_sub_start("sbrForwardModulationLP"); MOVE(1); L = NO_ANALYSIS_CHANNELS; MULT(1); M = L/2; PTR_INIT(1); /* pointers for rSubband[] */ MULT(1); MOVE(1); rSubband[0] = timeIn[3 * M]; PTR_INIT(2); /* pointers for timeIn[3 * M - i], timeIn[3 * M + i] */ LOOP(1); for (i = 1; i < M; i++) { ADD(1); STORE(1); rSubband[i] = timeIn[3 * M - i] + timeIn[3 * M + i]; } LOOP(1); for (i = M; i < L; i++) { ADD(1); STORE(1); rSubband[i] = timeIn[3 * M - i] - timeIn[i - M]; } FUNC(3); dct3 (rSubband, L, qmfBank); COUNT_sub_end(); }
/* Perform dct type 3 */ static void dct3 (float *data, int L, HANDLE_SBR_QMF_FILTER_BANK qmfBank ) { int i, M, N; float s1, s2, s3, s4; float temp[16]; const float sqrtHalf = 0.70710678118655f; MOVE(1); MULT(1); M = L / 2; MULT(1); N = L / 4; ADD(1); BRANCH(1); if (L > 2) { PTR_INIT(2); /* pointers for data[2 * i + 1], temp[i] */ LOOP(1); for(i = 0; i < N; i++) { MOVE(1); temp[i] = data[2*i+1]; } PTR_INIT(2); /* pointers for data[2 * i], data[i] */ LOOP(1); for(i = 1; i < M; i++) { MOVE(1); data[i] = data[2*i]; } PTR_INIT(2); /* pointers for data[L-1-i], data[L-1 - 2*i] */ LOOP(1); for(i = 1; i < N; i++) { MOVE(1); data[L-1 - i] = data[L-1 - 2*i]; } PTR_INIT(2); /* pointers for data[i+M], temp[i] */ LOOP(1); for(i = 0; i < N; i++) { MOVE(1); data[i + M] = temp[i]; } FUNC(3); dct3(data, M, qmfBank); FUNC(3); dct4(data + M, M, qmfBank); PTR_INIT(4); /* pointers for data[i], data[i + M], data[M-1 - i], data[L-1 - i] */ LOOP(1); for(i = 0; i < N; i++) { MOVE(4); s1 = data[i]; s2 = data[i + M]; s3 = data[M-1 - i]; s4 = data[L-1 - i]; ADD(4); STORE(4); data[i] = (s1 + s2); data[L-1 - i] = (s1 - s2); data[M-1 - i] = (s3 + s4); data[i + M] = (s3 - s4); } } else { MULT(1); s1 = data[1] * sqrtHalf; ADD(2); STORE(2); data[1] = (data[0] - s1); data[0] = (data[0] + s1); } }
int main (int argc, char *argv[]) { CBitmap bmp; #ifdef INVERSE FILE *fileY = fopen("dump.y.bin", "wb"); FILE *fileCb = fopen("dump.cb.bin", "wb"); FILE *fileCr = fopen("dump.cr.bin", "wb"); #endif //unsigned i; if (argc < 3) { fprintf(stderr, "Usage: %s file-in.bmp file-out.jpg\n", argv[0]); return -1; } if (!bmp.Load(argv[1])) { fprintf(stderr, "Error: cannot open %s\n", argv[1]); return -1; } if (bmp.GetBitCount() != 24) { fprintf(stderr, "Error BitCount != 24\n"); return -1; } /*recalc_qtab(512, qtable_paint_lum, 0); recalc_qtab(1024, qtable_paint_lum, 1); recalc_qtab(512, qtable_paint_chrom, 0); recalc_qtab(1024, qtable_paint_chrom, 1);*/ BGR RGB16x16[16][16]; CACHE_ALIGN conv Y8x8[2][2][8][8]; // four 8x8 blocks - 16x16 CACHE_ALIGN conv Cb8x8[8][8]; CACHE_ALIGN conv Cr8x8[8][8]; //dct_fill_tab(); // for IDCT if ((file_jpg = open(argv[2], O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, S_IWRITE)) < 0) { fprintf(stderr, "Error: cannot create %s (%s)\n", argv[2], strerror(errno)); return -1; } uint64_t tm = __rdtsc(); // Process image by 16x16 blocks, (16x16 because of chroma subsampling) // The resulting image will be truncated on the right/down side // if its width/height is not multiple of 16. // The data is written into <file_jpg> file by write_jpeg() function // which Huffman encoder uses to flush its output, so this file // should be opened before the call of huffman_start(). huffman_start(bmp.GetHeight() & -16, bmp.GetWidth() & -16); for (unsigned y = 0; y < bmp.GetHeight()-15; y += 16) { for (unsigned x = 0; x < bmp.GetWidth()-15; x += 16) { if (!bmp.GetBlock(x, y, 16, 16, (BGR*)RGB16x16)) { printf("Error: getBlock(%d,%d)\n", x, y); break; } /* // geting four 8x8 Y-blocks for (unsigned i = 0; i < 2; i++) for (unsigned j = 0; j < 2; j++) { for (unsigned r = 0; r < 8; r++) for (unsigned c = 0; c < 8; c++) { const unsigned rr = (i<<3) + r; const unsigned cc = (j<<3) + c; const color R = RGB16x16[rr][cc].Red; const color G = RGB16x16[rr][cc].Green; const color B = RGB16x16[rr][cc].Blue; // converting RGB into Y (luminance) Y8x8[i][j][r][c] = RGB2Y(R, G, B)-128; } } // getting subsampled Cb and Cr subsample(RGB16x16, Cb8x8, Cr8x8); */ // getting subsampled Cb and Cr subsample2(RGB16x16, Y8x8, Cb8x8, Cr8x8); uint64_t tmj = __rdtsc(); // 1 Y-compression dct3(Y8x8[0][0], Y8x8[0][0]); //quantization_lum(Y8x8[0][0]); huffman_encode(HUFFMAN_CTX_Y, (conv*)Y8x8[0][0]); // 2 Y-compression dct3(Y8x8[0][1], Y8x8[0][1]); //quantization_lum(Y8x8[0][1]); huffman_encode(HUFFMAN_CTX_Y, (conv*)Y8x8[0][1]); // 3 Y-compression dct3(Y8x8[1][0], Y8x8[1][0]); //quantization_lum(Y8x8[1][0]); huffman_encode(HUFFMAN_CTX_Y, (conv*)Y8x8[1][0]); // 4 Y-compression dct3(Y8x8[1][1], Y8x8[1][1]); //quantization_lum(Y8x8[1][1]); huffman_encode(HUFFMAN_CTX_Y, (conv*)Y8x8[1][1]); // Cb-compression dct3(Cb8x8, Cb8x8); //quantization_chrom(Cb8x8); huffman_encode(HUFFMAN_CTX_Cb, (conv*)Cb8x8); // Cr-compression dct3(Cr8x8, Cr8x8); //quantization_chrom(Cr8x8); huffman_encode(HUFFMAN_CTX_Cr, (conv*)Cr8x8); jpgclk += __rdtsc() - tmj; #ifdef INVERSE quantization_lum(Y8x8[0][0]); quantization_lum(Y8x8[0][1]); quantization_lum(Y8x8[1][0]); quantization_lum(Y8x8[1][1]); quantization_chrom(Cb8x8); quantization_chrom(Cr8x8); dump((conv*)Y8x8[0][0], fileY); dump((conv*)Y8x8[0][1], fileY); dump((conv*)Y8x8[1][0], fileY); dump((conv*)Y8x8[1][1], fileY); dump((conv*)Cb8x8, fileCb); dump((conv*)Cr8x8, fileCr); // inverse DCTs - getting pixels back iquantization_lum(Y8x8[0][0]); idct3(Y8x8[0][0], Y8x8[0][0]); //correct_color(Y8x8[0][0]); iquantization_lum(Y8x8[0][1]); idct3(Y8x8[0][1], Y8x8[0][1]); //correct_color(Y8x8[0][1]); iquantization_lum(Y8x8[1][0]); idct3(Y8x8[1][0], Y8x8[1][0]); //correct_color(Y8x8[1][0]); iquantization_lum(Y8x8[1][1]); idct3(Y8x8[1][1], Y8x8[1][1]); //correct_color(Y8x8[1][1]); iquantization_chrom(Cb8x8); idct3(Cb8x8, Cb8x8); //correct_color(Cb8x8); iquantization_chrom(Cr8x8); idct3(Cr8x8, Cr8x8); //correct_color(Cr8x8); for (unsigned i = 0; i < 2; i++) for (unsigned j = 0; j < 2; j++) { for (unsigned r = 0; r < 8; r += 2) for (unsigned c = 0; c < 8; c += 2) { const unsigned rr = (i<<3) + r; const unsigned cc = (j<<3) + c; // convert pixels back into RGB const conv Cb = Cb8x8[rr>>1][cc>>1] + 128; const conv Cr = Cr8x8[rr>>1][cc>>1] + 128; conv Y; Y = Y8x8[i][j][r][c] + 128; RGB16x16[rr][cc].Red = YCbCr2R(Y, Cb, Cr); RGB16x16[rr][cc].Green = YCbCr2G(Y, Cb, Cr); RGB16x16[rr][cc].Blue = YCbCr2B(Y, Cb, Cr); Y = Y8x8[i][j][r][c+1] + 128; RGB16x16[rr][cc+1].Red = YCbCr2R(Y, Cb, Cr); RGB16x16[rr][cc+1].Green = YCbCr2G(Y, Cb, Cr); RGB16x16[rr][cc+1].Blue = YCbCr2B(Y, Cb, Cr); Y = Y8x8[i][j][r+1][c] + 128; RGB16x16[rr+1][cc].Red = YCbCr2R(Y, Cb, Cr); RGB16x16[rr+1][cc].Green = YCbCr2G(Y, Cb, Cr); RGB16x16[rr+1][cc].Blue = YCbCr2B(Y, Cb, Cr); Y = Y8x8[i][j][r+1][c+1] + 128; RGB16x16[rr+1][cc+1].Red = YCbCr2R(Y, Cb, Cr); RGB16x16[rr+1][cc+1].Green = YCbCr2G(Y, Cb, Cr); RGB16x16[rr+1][cc+1].Blue = YCbCr2B(Y, Cb, Cr); } } // save pixels if (!bmp.SetBlock(x, y, 16, 16, (BGR*)RGB16x16)) { printf("Error: SetBlock(%d,%d)\n", x, y); } #endif } } huffman_stop(); tm = __rdtsc() - tm; close(file_jpg); printf(" DCT MIPS:\t\t%f\n", dctclk/1.e6); printf("JPEG MIPS:\t\t%f\n", jpgclk/1.e6); printf("IDCT MIPS(SSE2):\t%f\n", idctclk/1.e6); printf(" ALL MIPS:\t\t%f\n", tm/1.e6); #ifdef INVERSE bmp.Save("testz.bmp"); fclose(fileY); fclose(fileCb); fclose(fileCr); #endif return 0; }