enum huffman_error huffman_compute_tree_from_histo(struct huffman_decoder* decoder) { /* compute the number of data items in the histogram */ int i; uint32_t upperweight; uint32_t lowerweight = 0; uint32_t sdatacount = 0; for (i = 0; i < decoder->numcodes; i++) sdatacount += decoder->datahisto[i]; /* binary search to achieve the optimum encoding */ upperweight = sdatacount * 2; while (1) { /* build a tree using the current weight */ uint32_t curweight = (upperweight + lowerweight) / 2; int curmaxbits = huffman_build_tree(decoder, sdatacount, curweight); /* apply binary search here */ if (curmaxbits <= decoder->maxbits) { lowerweight = curweight; /* early out if it worked with the raw weights, or if we're done searching */ if (curweight == sdatacount || (upperweight - lowerweight) <= 1) break; } else upperweight = curweight; } /* assign canonical codes for all nodes based on their code lengths */ return huffman_assign_canonical_codes(decoder); }
END_TEST START_TEST(test_table_encode) { TreeNode *t = huffman_build_tree("books/holmes.txt");//holmes ck_assert_int_eq(tree_size(t), 161); EncodeTable *etab = table_build(t); ck_assert_msg(etab != NULL, "Encode table should not be NULL."); char* e_encode = table_bit_encode(etab, 'e'); ck_assert_msg(e_encode != NULL, "Problem finding the encoding for 'e'."); ck_assert_int_eq(e_encode[0], 0); ck_assert_int_eq(e_encode[1], 0); ck_assert_int_eq(e_encode[2], 0); ck_assert_int_eq(e_encode[3], 0); ck_assert_int_eq(e_encode[4], -1); free(e_encode); char* space_encode = table_bit_encode(etab, ' '); ck_assert_msg(space_encode != NULL, "Problem finding the encoding for ' '."); ck_assert_int_eq(space_encode[0], 1); ck_assert_int_eq(space_encode[1], 0); ck_assert_int_eq(space_encode[2], -1); free(space_encode); free(t); free(etab); }
int main (int argc, char *argv[]) { if (argc != 2) { usage(); exit(1); } char *infile = argv[1]; TreeNode *tree = huffman_build_tree(infile); if (tree == NULL) { printf("Could not build the tree!"); usage(); exit(1); } EncodeTable *etab = table_build(tree); if (etab == NULL) { printf("Could not build the table!"); usage(); exit(1); } table_print(etab); table_free(etab); tree_free(tree); return 0; }
END_TEST START_TEST(test_table_free) { TreeNode *t = huffman_build_tree("books/holmes.txt");//holmes ck_assert_int_eq(tree_size(t), 161); EncodeTable *etab = table_build(t); ck_assert_msg(etab != NULL, "Encode table should not be NULL."); free(t); free(etab); }
END_TEST ////////////////////////////////////////////////////////////////////// ///////////// huffman unit tests ////////////////////////////////////////////////////////////////////// START_TEST(test_huffman_build_tree) { TreeNode *t; t = huffman_build_tree("books/aladdin.txt"); ck_assert_msg(t != NULL, "Tree should not be NULL."); ck_assert_int_eq(tree_size(t), 115); free(t); t = huffman_build_tree("books/holmes.txt"); ck_assert_msg(t != NULL, "Tree should not be NULL."); ck_assert_int_eq(tree_size(t), 161); free(t); t = huffman_build_tree("books/iliad.txt"); ck_assert_msg(t != NULL, "Tree should not be NULL."); ck_assert_int_eq(tree_size(t), 131); free(t); t = huffman_build_tree("books/KJV.txt"); ck_assert_msg(t != NULL, "Tree should not be NULL."); ck_assert_int_eq(tree_size(t), 147); free(t); t = huffman_build_tree("books/newton.txt"); ck_assert_msg(t != NULL, "Tree should not be NULL."); ck_assert_int_eq(tree_size(t), 187); free(t); t = huffman_build_tree("books/odyssy.txt"); ck_assert_msg(t != NULL, "Tree should not be NULL."); ck_assert_int_eq(tree_size(t), 161); free(t); t = huffman_build_tree("books/poems.txt"); ck_assert_msg(t != NULL, "Tree should not be NULL."); ck_assert_int_eq(tree_size(t), 185); free(t); t = huffman_build_tree("books/shakespeare.txt"); ck_assert_msg(t != NULL, "Tree should not be NULL."); ck_assert_int_eq(tree_size(t), 159); free(t); }
END_TEST START_TEST(test_huffman_find) { TreeNode *t = huffman_build_tree("books/holmes.txt");//holmes ck_assert_int_eq(tree_size(t), 161); EncodeTable *etab = table_build(t); ck_assert_msg(etab != NULL, "Encode table should not be NULL."); char* e_encode = table_bit_encode(etab, 'e'); ck_assert_msg(e_encode != NULL, "Problem finding the encoding for 'e'."); char c = huffman_find(t, e_encode); ck_assert_int_eq(c, 'e'); char* b_encode = table_bit_encode(etab, 'b'); c = huffman_find(t, b_encode); ck_assert_int_eq(c, 'b'); }
/** * Program entrypoint. */ int main(int argc, char **argv) { int char_count; int string_count; int encoded_size; unsigned char charmap[256]; int frequencies[256]; huffman_node_t *leaf_nodes[256]; huffman_node_t *code_nodes[256]; huffman_node_t *root; int symbol_count; string_list_t *strings; FILE *input; FILE *table_output; FILE *data_output; int append_byte = -1; int ignore_case = 0; const char *input_filename = 0; const char *charmap_filename = 0; const char *table_output_filename = 0; const char *data_output_filename = 0; const char *table_label = ""; const char *node_label_prefix = ""; const char *string_table_label = ""; const char *string_label_prefix = ""; int generate_string_table = 0; int verbose = 0; /* Process arguments. */ { char *p; while ((p = *(++argv))) { if (!strncmp("--", p, 2)) { const char *opt = &p[2]; if (!strncmp("character-map=", opt, 14)) { charmap_filename = &opt[14]; } else if (!strncmp("table-output=", opt, 13)) { table_output_filename = &opt[13]; } else if (!strncmp("data-output=", opt, 12)) { data_output_filename = &opt[12]; } else if (!strncmp("table-label=", opt, 12)) { table_label = &opt[12]; } else if (!strncmp("node-label-prefix=", opt, 18)) { node_label_prefix = &opt[18]; } else if (!strncmp("string-label-prefix=", opt, 20)) { string_label_prefix = &opt[20]; generate_string_table = 1; } else if (!strcmp("generate-string-table", opt)) { generate_string_table = 1; } else if (!strncmp("string-table-label=", opt, 19)) { string_table_label = &opt[19]; } else if (!strncmp("append-byte=", opt, 12)) { append_byte = strtol(&opt[12], 0, 0); if ((append_byte < 0) || (append_byte >= 256)) { fprintf(stderr, "huffpuff: --append-byte: value must be in range 0..255\n"); return(-1); } } else if (!strcmp("ignore-case", opt)) { ignore_case = 1; } else if (!strcmp("verbose", opt)) { verbose = 1; } else if (!strcmp("help", opt)) { help(); } else if (!strcmp("usage", opt)) { usage(); } else if (!strcmp("version", opt)) { version(); } else { fprintf(stderr, "huffpuff: unrecognized option `%s'\n" "Try `huffpuff --help' or `huffpuff --usage' for more information.\n", p); return(-1); } } else { input_filename = p; } } } /* Set default character mapping f(c)=c */ { int i; for (i=0; i<256; i++) charmap[i] = (unsigned char)i; } if (charmap_filename) { if (verbose) fprintf(stdout, "reading character map\n"); if (!charmap_parse(charmap_filename, charmap)) { fprintf(stderr, "error: failed to parse character map `%s'\n", charmap_filename); return(-1); } } if (input_filename) { input = fopen(input_filename, "rt"); if (!input) { fprintf(stderr, "error: failed to open `%s' for reading\n", input_filename); return(-1); } } else { input = stdin; } /* Read strings to encode. */ if (verbose) fprintf(stdout, "reading strings\n"); strings = read_strings(input, ignore_case, frequencies, &char_count, &string_count); fclose(input); /* Create Huffman leaf nodes. */ if (verbose) fprintf(stdout, "creating Huffman leaf nodes\n"); symbol_count = 0; { int i; if (append_byte != -1) frequencies[append_byte] += string_count; for (i=0; i<256; i++) { if (frequencies[i] > 0) { huffman_node_t *node; node = huffman_create_node( /*symbol=*/i, /*weight=*/frequencies[i], /*left=*/NULL, /*right=*/NULL); leaf_nodes[symbol_count++] = node; code_nodes[i] = node; } else { code_nodes[i] = 0; } } } if (verbose) fprintf(stdout, " number of symbols: %d\n", symbol_count); /* Build the Huffman tree. */ if (verbose) fprintf(stdout, "Building the Huffman tree\n"); root = huffman_build_tree(leaf_nodes, symbol_count); /* Huffman-encode strings. */ if (verbose) fprintf(stdout, "encoding strings\n"); encoded_size = encode_strings(strings, code_nodes, append_byte); /* Sanity check */ if (verbose) fprintf(stdout, "verifying output integrity\n"); if (!verify_data_integrity(strings, root)) { assert(0); /* Cleanup */ huffman_delete_node(root); destroy_string_list(strings); return(-1); } /* Prepare output */ if (!table_output_filename) { table_output_filename = "huffpuff.tab.asm"; } table_output = fopen(table_output_filename, "wt"); if (!table_output) { fprintf(stderr, "error: failed to open `%s' for writing\n", table_output_filename); /* Cleanup */ huffman_delete_node(root); destroy_string_list(strings); return(-1); } if (!data_output_filename) { data_output_filename = "huffpuff.dat.asm"; } data_output = fopen(data_output_filename, "wt"); if (!data_output) { fprintf(stderr, "error: failed to open `%s' for writing\n", data_output_filename); /* Cleanup */ huffman_delete_node(root); destroy_string_list(strings); return(-1); } fprintf(data_output, "; Huffman-encoded string data automatically generated by huffpuff.\n"); /* Print the Huffman codes in code length order. */ if (verbose) fprintf(stdout, "writing Huffman decoder table\n"); fprintf(table_output, "; Huffman decoder table automatically generated by huffpuff.\n"); if (table_label && strlen(table_label)) fprintf(table_output, "%s:\n", table_label); write_huffman_codes(table_output, root, charmap, node_label_prefix); fclose(table_output); if (generate_string_table) { /* Print string pointer table */ int i; string_list_t *lst; if (verbose) fprintf(stdout, "writing string pointer table\n"); if (string_table_label && strlen(string_table_label)) fprintf(data_output, "%s:\n", string_table_label); for (i = 0, lst = strings; lst != 0; lst = lst->next, ++i) { fprintf(data_output, ".dw %sString%d\n", string_label_prefix, i); } } /* Write the Huffman-encoded strings. */ if (verbose) fprintf(stdout, "writing encoded string data\n"); write_huffman_strings(data_output, strings, string_label_prefix); fclose(data_output); if (verbose) fprintf(stdout, "compressed size: %d%%\n", (encoded_size*100) / char_count); /* Cleanup */ huffman_delete_node(root); destroy_string_list(strings); return 0; }