/* * calculate_huffman_codes turns pSF into an array * with a single entry that is the root of the * huffman tree. The return value is a SymbolEncoder, * which is an array of huffman codes index by symbol value. */ static SymbolEncoder* calculate_huffman_codes(SymbolFrequencies * pSF) { unsigned int i = 0; unsigned int n = 0; huffman_node *m1 = NULL, *m2 = NULL; SymbolEncoder *pSE = NULL; #if 0 printf("BEFORE SORT\n"); print_freqs(pSF); #endif /* Sort the symbol frequency array by ascending frequency. */ qsort((*pSF), MAX_SYMBOLS, sizeof((*pSF)[0]), SFComp); #if 0 printf("AFTER SORT\n"); print_freqs(pSF); #endif /* Get the number of symbols. */ for(n = 0; n < MAX_SYMBOLS && (*pSF)[n]; ++n) ; /* * Construct a Huffman tree. This code is based * on the algorithm given in Managing Gigabytes * by Ian Witten et al, 2nd edition, page 34. * Note that this implementation uses a simple * count instead of probability. */ for(i = 0; i < n - 1; ++i) { /* Set m1 and m2 to the two subsets of least probability. */ m1 = (*pSF)[0]; m2 = (*pSF)[1]; /* Replace m1 and m2 with a set {m1, m2} whose probability * is the sum of that of m1 and m2. */ (*pSF)[0] = m1->parent = m2->parent = new_nonleaf_node(m1->count + m2->count, m1, m2); (*pSF)[1] = NULL; /* Put newSet into the correct count position in pSF. */ qsort((*pSF), n, sizeof((*pSF)[0]), SFComp); } /* Build the SymbolEncoder array from the tree. */ pSE = (SymbolEncoder*)malloc(sizeof(SymbolEncoder)); memset(pSE, 0, sizeof(SymbolEncoder)); build_symbol_encoder((*pSF)[0], pSE); return pSE; }
static huffman_node* read_code_table_from_memory(const unsigned char* bufin, unsigned int bufinlen, unsigned int *pindex, uint32_t *pDataBytes) { huffman_node *root = new_nonleaf_node(0, NULL, NULL); uint32_t count; /* Read the number of entries. (it is stored in network byte order). */ if(memread(bufin, bufinlen, pindex, &count, sizeof(count))) { free_huffman_tree(root); return NULL; } count = ntohl(count); /* Read the number of data bytes this encoding represents. */ if(memread(bufin, bufinlen, pindex, pDataBytes, sizeof(*pDataBytes))) { free_huffman_tree(root); return NULL; } *pDataBytes = ntohl(*pDataBytes); /* Read the entries. */ while(count-- > 0) { unsigned int curbit; unsigned char symbol; unsigned char numbits; unsigned char numbytes; unsigned char *bytes; huffman_node *p = root; if(memread(bufin, bufinlen, pindex, &symbol, sizeof(symbol))) { free_huffman_tree(root); return NULL; } if(memread(bufin, bufinlen, pindex, &numbits, sizeof(numbits))) { free_huffman_tree(root); return NULL; } numbytes = (unsigned char)numbytes_from_numbits(numbits); bytes = (unsigned char*)malloc(numbytes); if(memread(bufin, bufinlen, pindex, bytes, numbytes)) { free(bytes); free_huffman_tree(root); return NULL; } /* * Add the entry to the Huffman tree. The value * of the current bit is used switch between * zero and one child nodes in the tree. New nodes * are added as needed in the tree. */ for(curbit = 0; curbit < numbits; ++curbit) { if(get_bit(bytes, curbit)) { if(p->one == NULL) { p->one = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->one->parent = p; } p = p->one; } else { if(p->zero == NULL) { p->zero = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->zero->parent = p; } p = p->zero; } } free(bytes); } return root; }
/* * read_code_table builds a Huffman tree from the code * in the in file. This function returns NULL on error. * The returned value should be freed with free_huffman_tree. */ static huffman_node* read_code_table(FILE* in, unsigned int *pDataBytes) { huffman_node *root = new_nonleaf_node(0, NULL, NULL); unsigned int count; /* Read the number of entries. (it is stored in network byte order). */ if(fread(&count, sizeof(count), 1, in) != 1) { free_huffman_tree(root); return NULL; } count = ntohl(count); /* Read the number of data bytes this encoding represents. */ if(fread(pDataBytes, sizeof(*pDataBytes), 1, in) != 1) { free_huffman_tree(root); return NULL; } *pDataBytes = ntohl(*pDataBytes); /* Read the entries. */ while(count-- > 0) { int c; unsigned int curbit; unsigned char symbol; unsigned char numbits; unsigned char numbytes; unsigned char *bytes; huffman_node *p = root; if((c = fgetc(in)) == EOF) { free_huffman_tree(root); return NULL; } symbol = (unsigned char)c; if((c = fgetc(in)) == EOF) { free_huffman_tree(root); return NULL; } numbits = (unsigned char)c; numbytes = (unsigned char)numbytes_from_numbits(numbits); bytes = (unsigned char*)malloc(numbytes); if(fread(bytes, 1, numbytes, in) != numbytes) { free(bytes); free_huffman_tree(root); return NULL; } /* * Add the entry to the Huffman tree. The value * of the current bit is used switch between * zero and one child nodes in the tree. New nodes * are added as needed in the tree. */ for(curbit = 0; curbit < numbits; ++curbit) { if(get_bit(bytes, curbit)) { if(p->one == NULL) { p->one = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->one->parent = p; } p = p->one; } else { if(p->zero == NULL) { p->zero = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->zero->parent = p; } p = p->zero; } } free(bytes); } return root; }