/* * Allocates memory and sets *pbufout to point to it. The memory * contains the code table. */ static int write_code_table_to_memory(buf_cache *pc, SymbolEncoder *se, uint32_t symbol_count) { uint32_t i, count = 0; /* Determine the number of entries in se. */ for(i = 0; i < MAX_SYMBOLS; ++i) { if((*se)[i]) ++count; } /* Write the number of entries in network byte order. */ i = htonl(count); if(write_cache(pc, &i, sizeof(i))) return 1; /* Write the number of bytes that will be encoded. */ symbol_count = htonl(symbol_count); if(write_cache(pc, &symbol_count, sizeof(symbol_count))) return 1; /* Write the entries. */ for(i = 0; i < MAX_SYMBOLS; ++i) { huffman_code *p = (*se)[i]; if(p) { unsigned int numbytes; /* The value of i is < MAX_SYMBOLS (256), so it can be stored in an unsigned char. */ unsigned char uc = (unsigned char)i; /* Write the 1 byte symbol. */ if(write_cache(pc, &uc, sizeof(uc))) return 1; /* Write the 1 byte code bit length. */ uc = (unsigned char)p->numbits; if(write_cache(pc, &uc, sizeof(uc))) return 1; /* Write the code bytes. */ numbytes = numbytes_from_numbits(p->numbits); if(write_cache(pc, p->bits, numbytes)) return 1; } } return 0; }
/* * Write the huffman code table. The format is: * 4 byte code count in network byte order. * 4 byte number of bytes encoded * (if you decode the data, you should get this number of bytes) * code1 * ... * codeN, where N is the count read at the begginning of the file. * Each codeI has the following format: * 1 byte symbol, 1 byte code bit length, code bytes. * Each entry has numbytes_from_numbits code bytes. * The last byte of each code may have extra bits, if the number of * bits in the code is not a multiple of 8. */ static int write_code_table(FILE* out, SymbolEncoder *se, uint32_t symbol_count) { uint32_t i, count = 0; /* Determine the number of entries in se. */ for(i = 0; i < MAX_SYMBOLS; ++i) { if((*se)[i]) ++count; } /* Write the number of entries in network byte order. */ i = htonl(count); if(fwrite(&i, sizeof(i), 1, out) != 1) return 1; /* Write the number of bytes that will be encoded. */ symbol_count = htonl(symbol_count); if(fwrite(&symbol_count, sizeof(symbol_count), 1, out) != 1) return 1; /* Write the entries. */ for(i = 0; i < MAX_SYMBOLS; ++i) { huffman_code *p = (*se)[i]; if(p) { unsigned int numbytes; /* Write the 1 byte symbol. */ fputc((unsigned char)i, out); /* Write the 1 byte code bit length. */ fputc(p->numbits, out); /* Write the code bytes. */ numbytes = numbytes_from_numbits(p->numbits); if(fwrite(p->bits, 1, numbytes, out) != numbytes) return 1; } } return 0; }
static void reverse_bits(unsigned char* bits, unsigned long numbits) { unsigned long numbytes = numbytes_from_numbits(numbits); unsigned char *tmp = (unsigned char*)alloca(numbytes); unsigned long curbit; long curbyte = 0; memset(tmp, 0, numbytes); for(curbit = 0; curbit < numbits; ++curbit) { unsigned int bitpos = curbit % 8; if(curbit > 0 && curbit % 8 == 0) ++curbyte; tmp[curbyte] |= (get_bit(bits, numbits - curbit - 1) << bitpos); } memcpy(bits, tmp, numbytes); }
static huffman_node* read_code_table_from_memory(const unsigned char* bufin, unsigned int bufinlen, unsigned int *pindex, uint32_t *pDataBytes) { huffman_node *root = new_nonleaf_node(0, NULL, NULL); uint32_t count; /* Read the number of entries. (it is stored in network byte order). */ if(memread(bufin, bufinlen, pindex, &count, sizeof(count))) { free_huffman_tree(root); return NULL; } count = ntohl(count); /* Read the number of data bytes this encoding represents. */ if(memread(bufin, bufinlen, pindex, pDataBytes, sizeof(*pDataBytes))) { free_huffman_tree(root); return NULL; } *pDataBytes = ntohl(*pDataBytes); /* Read the entries. */ while(count-- > 0) { unsigned int curbit; unsigned char symbol; unsigned char numbits; unsigned char numbytes; unsigned char *bytes; huffman_node *p = root; if(memread(bufin, bufinlen, pindex, &symbol, sizeof(symbol))) { free_huffman_tree(root); return NULL; } if(memread(bufin, bufinlen, pindex, &numbits, sizeof(numbits))) { free_huffman_tree(root); return NULL; } numbytes = (unsigned char)numbytes_from_numbits(numbits); bytes = (unsigned char*)malloc(numbytes); if(memread(bufin, bufinlen, pindex, bytes, numbytes)) { free(bytes); free_huffman_tree(root); return NULL; } /* * Add the entry to the Huffman tree. The value * of the current bit is used switch between * zero and one child nodes in the tree. New nodes * are added as needed in the tree. */ for(curbit = 0; curbit < numbits; ++curbit) { if(get_bit(bytes, curbit)) { if(p->one == NULL) { p->one = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->one->parent = p; } p = p->one; } else { if(p->zero == NULL) { p->zero = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->zero->parent = p; } p = p->zero; } } free(bytes); } return root; }
/* * read_code_table builds a Huffman tree from the code * in the in file. This function returns NULL on error. * The returned value should be freed with free_huffman_tree. */ static huffman_node* read_code_table(FILE* in, unsigned int *pDataBytes) { huffman_node *root = new_nonleaf_node(0, NULL, NULL); unsigned int count; /* Read the number of entries. (it is stored in network byte order). */ if(fread(&count, sizeof(count), 1, in) != 1) { free_huffman_tree(root); return NULL; } count = ntohl(count); /* Read the number of data bytes this encoding represents. */ if(fread(pDataBytes, sizeof(*pDataBytes), 1, in) != 1) { free_huffman_tree(root); return NULL; } *pDataBytes = ntohl(*pDataBytes); /* Read the entries. */ while(count-- > 0) { int c; unsigned int curbit; unsigned char symbol; unsigned char numbits; unsigned char numbytes; unsigned char *bytes; huffman_node *p = root; if((c = fgetc(in)) == EOF) { free_huffman_tree(root); return NULL; } symbol = (unsigned char)c; if((c = fgetc(in)) == EOF) { free_huffman_tree(root); return NULL; } numbits = (unsigned char)c; numbytes = (unsigned char)numbytes_from_numbits(numbits); bytes = (unsigned char*)malloc(numbytes); if(fread(bytes, 1, numbytes, in) != numbytes) { free(bytes); free_huffman_tree(root); return NULL; } /* * Add the entry to the Huffman tree. The value * of the current bit is used switch between * zero and one child nodes in the tree. New nodes * are added as needed in the tree. */ for(curbit = 0; curbit < numbits; ++curbit) { if(get_bit(bytes, curbit)) { if(p->one == NULL) { p->one = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->one->parent = p; } p = p->one; } else { if(p->zero == NULL) { p->zero = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->zero->parent = p; } p = p->zero; } } free(bytes); } return root; }
/////////////////////////////////////////////////////////////////////////////// // Builds a Huffman tree from the data in the in file. // Returns nullptr on error. // The returned value should be freed with free_huffman_tree. HuffmanNode* readCodeTable(Stream& in, uint32_t& dataBytes) { if(!readHeader(in)) { fprintf(stdout, "Error: wrong file signature.\n"); return nullptr; } HuffmanNode* root = new HuffmanNode(NON_LEAF); bool bOk = true; uint32_t nbEntries = in.read( ); dataBytes = in.read( ); while(nbEntries-- > 0) { // [ symbol ] [ number of bits ] [ count of this symbol (# instances ] HuffmanNode* p = root; const Symbol symbol = in.readSymbol(); const Byte numbits = in.readByte(); const uint32_t count = in.read(); std::string s; if(in.inError()) { bOk = false; break; } // note that the bytes are padded. For exameple, // if a symbol takes 11bits, code will read 2 Bytes Byte numbytes = (Byte)numbytes_from_numbits(numbits); Byte* bytes = (Byte*)malloc(numbytes); // read the actual bytes if(in.read(bytes, numbytes)) { // Adds the entry to the Huffman tree. The value of the current bit // is used switch between zero and one child nodes in the tree. // New nodes are added as needed in the tree. for(unsigned int curbit=0; curbit<numbits; ++curbit) { unsigned char bit = get_bit(bytes, curbit); p = p->getSubNode(bit, curbit, symbol, count, numbits); s += bit ? "1" : "0"; } //fprintf(stdout, "symbol: %03d, nbBits: %02d, %03d, nbEntries:%d %s %p %p\n", (int)symbol, (int)numbits, (int)numbytes, (int)nbEntries, s.c_str(), // root->one(), root->zero()); } else { bOk = false; free(bytes); break; } free(bytes); } if(!bOk) { fprintf(stdout, "Cannot read table\n"); delete root; root = nullptr; } return root; }
size_t HuffmanCode::nbByte( )const { return numbytes_from_numbits(m_nbBits); }