/////////////////////////////////////////////////////////////////////////////// // calculateHuffmanCode turns freqTable into an array with a single entry that // is the root of the huffman tree. The return value is a SymbolEncoder, // which is an array of huffman codes index by symbol value. bool calculateHuffmanCode(FrequencyTable& freqTable, SymbolEncoder& SE) { bool bOk = true; freqTable.sort( ); size_t n = freqTable.numberOfSymbols(); if(n>0) { // Construct a Huffman tree. This code is based on the algorithm given in // Managing Gigabytes by Ian Witten et al, 2nd edition, page 34. // Note that this implementation uses a simple count instead of probability. for(size_t i=0; i<n-1; ++i) { // Set m1 and m2 to the two subSets of least probability. HuffmanNode* m1 = freqTable[0]; HuffmanNode* m2 = freqTable[1]; // Replace m1 and m2 with a Set {m1, m2} whose probability // is the sum of that of m1 and m2. HuffmanNode* newNode = new HuffmanNode(m1, // zero tree m2); // one tree freqTable.setNodeAtBegin(newNode); // Put new set into the correct count position in freqTable. freqTable.sort(); } // Build the SymbolEncoder array from the tree. SE.buildSymbolEncoder(freqTable[0]); } else { fprintf(stdout, "Error: cannot compute code. Frequency table is empty.\n"); bOk = false; } return bOk; }
bool huffmanProcess(Stream& in, Stream& out, HuffmanDirection direction, bool verbose) { bool bOk = true; if(direction == HuffmanEncode) { FrequencyTable freqTable; SymbolEncoder SE; if(verbose) fprintf(stdout, "Encode\n"); { // Get the frequency of each symbol in the input file. freqTable.updateFreqTable(in); if(verbose) fprintf(stdout, "Input Size(%ld bytes)\n", freqTable.totalCount()); if(verbose) fprintf(stdout, "Nb symbols: %ld\n", freqTable.numberOfSymbols()); // Build an optimal table from the symbolCount. bOk = calculateHuffmanCode(freqTable, SE); double bitSize = 0.0; double entropy = 0.0; if(bOk) { computeAverageBitSize(SE, freqTable.totalCount(), bitSize, entropy, verbose); // Scan the file again and, using the table // previously built, encode it into the output file. in.rewind( ); } if(verbose) fprintf(stdout, "Writing code table (%lu symbols, %lu worlds)\n", freqTable.size(), freqTable.totalCount()); bOk = bOk ? SE.writeCodeTable(out, freqTable.totalCount()) : false; if(bOk) { if(verbose) fprintf(stdout, "Encoding\n"); bOk = SE.encode(in, out, verbose); } // Print info fprintf(stdout, "Encoded: nbSymbol: %ld, bitSize: %.2g, entropy: %.2g, nbBytes: %ld\n", freqTable.numberOfSymbols(), bitSize, entropy, freqTable.totalCount()); } } else { if(verbose) fprintf(stdout, "Decode File"); unsigned int totalNbBytes = 0; // Read the Huffman code table. fprintf(stdout, "# Reading code table"); std::auto_ptr<const HuffmanNode> root(readCodeTable(in, totalNbBytes)); if(!root.get()) { fprintf(stdout, "Error: cannot read code table\n"); bOk = false; } else { if(verbose) fprintf(stdout, "# Decoding (size: %u bytes)", totalNbBytes); // Decode the file. const HuffmanNode* p = root.get(); unsigned int remainingNbBytes = totalNbBytes; unsigned int totalCountOfSymbols = 0; while(remainingNbBytes>0 && in.isOpen()) { const Byte byte = in.readByte(); Byte mask = 1; while(remainingNbBytes>0 && mask) { p = (byte & mask) ? p->one() : p->zero(); mask <<= 1; assert(p); if(p->isLeaf( )) { out.writeSymbol(p->symbol()); totalCountOfSymbols++; p = root.get(); --remainingNbBytes; } } } // this is not needed for the decode - Just for the info section SymbolEncoder SE; SE.buildSymbolEncoder(root.get()); double bitSize = 0.0; double entropy = 0.0; computeAverageBitSize(SE, totalCountOfSymbols, bitSize, entropy, verbose); fprintf(stdout, "Decoded: nbSymbol: %ld, bitSize: %.2g, entropy: %.2g, nbBytes: %u\n", SE.nbSymbol(), bitSize, entropy, totalNbBytes); } } if(verbose) fprintf(stdout, "done (%d).\n", bOk); return bOk; }