Example #1
0
enum huffman_error huffman_compute_tree_from_histo(struct huffman_decoder* decoder)
{
	/* compute the number of data items in the histogram */
	int i;
   uint32_t upperweight;
	uint32_t lowerweight = 0;
	uint32_t sdatacount = 0;
	for (i = 0; i < decoder->numcodes; i++)
		sdatacount += decoder->datahisto[i];

	/* binary search to achieve the optimum encoding */
	upperweight = sdatacount * 2;
	while (1)
	{
		/* build a tree using the current weight */
		uint32_t curweight = (upperweight + lowerweight) / 2;
		int curmaxbits = huffman_build_tree(decoder, sdatacount, curweight);

		/* apply binary search here */
		if (curmaxbits <= decoder->maxbits)
		{
			lowerweight = curweight;

			/* early out if it worked with the raw weights, or if we're done searching */
			if (curweight == sdatacount || (upperweight - lowerweight) <= 1)
				break;
		}
		else
			upperweight = curweight;
	}

	/* assign canonical codes for all nodes based on their code lengths */
	return huffman_assign_canonical_codes(decoder);
}
Example #2
0
END_TEST

START_TEST(test_table_encode)
{
  TreeNode *t = huffman_build_tree("books/holmes.txt");//holmes
  ck_assert_int_eq(tree_size(t), 161);

  EncodeTable *etab = table_build(t);
  ck_assert_msg(etab != NULL, "Encode table should not be NULL.");

  char* e_encode = table_bit_encode(etab, 'e');
  ck_assert_msg(e_encode != NULL, "Problem finding the encoding for 'e'.");
  ck_assert_int_eq(e_encode[0], 0);
  ck_assert_int_eq(e_encode[1], 0);
  ck_assert_int_eq(e_encode[2], 0);
  ck_assert_int_eq(e_encode[3], 0);
  ck_assert_int_eq(e_encode[4], -1);
  free(e_encode);

  char* space_encode = table_bit_encode(etab, ' ');
  ck_assert_msg(space_encode != NULL, "Problem finding the encoding for ' '.");
  ck_assert_int_eq(space_encode[0], 1);
  ck_assert_int_eq(space_encode[1], 0);
  ck_assert_int_eq(space_encode[2], -1);
  free(space_encode);

  free(t);
  free(etab);
}
Example #3
0
int main (int argc, char *argv[]) {
  if (argc != 2) {
    usage();
    exit(1);
  }

  char *infile  = argv[1];

  TreeNode *tree = huffman_build_tree(infile);
  if (tree == NULL) {
    printf("Could not build the tree!");
    usage();
    exit(1);
  }

  EncodeTable *etab = table_build(tree);
  if (etab == NULL) {
    printf("Could not build the table!");
    usage();
    exit(1);
  }  

  table_print(etab);

  table_free(etab);
  tree_free(tree);

  return 0;
}
Example #4
0
END_TEST

START_TEST(test_table_free)
{
  TreeNode *t = huffman_build_tree("books/holmes.txt");//holmes
  ck_assert_int_eq(tree_size(t), 161);

  EncodeTable *etab = table_build(t);
  ck_assert_msg(etab != NULL, "Encode table should not be NULL.");
  free(t);
  free(etab);
}
Example #5
0
END_TEST

//////////////////////////////////////////////////////////////////////
///////////// huffman unit tests
//////////////////////////////////////////////////////////////////////

START_TEST(test_huffman_build_tree)
{
  TreeNode *t;
  t = huffman_build_tree("books/aladdin.txt");
  ck_assert_msg(t != NULL, "Tree should not be NULL.");
  ck_assert_int_eq(tree_size(t), 115);
  free(t);

  t = huffman_build_tree("books/holmes.txt");
  ck_assert_msg(t != NULL, "Tree should not be NULL.");
  ck_assert_int_eq(tree_size(t), 161);
  free(t);

  t = huffman_build_tree("books/iliad.txt");
  ck_assert_msg(t != NULL, "Tree should not be NULL.");
  ck_assert_int_eq(tree_size(t), 131);
  free(t);

  t = huffman_build_tree("books/KJV.txt");
  ck_assert_msg(t != NULL, "Tree should not be NULL.");
  ck_assert_int_eq(tree_size(t), 147);
  free(t);

  t = huffman_build_tree("books/newton.txt");
  ck_assert_msg(t != NULL, "Tree should not be NULL.");
  ck_assert_int_eq(tree_size(t), 187);
  free(t);

  t = huffman_build_tree("books/odyssy.txt");
  ck_assert_msg(t != NULL, "Tree should not be NULL.");
  ck_assert_int_eq(tree_size(t), 161);
  free(t);

  t = huffman_build_tree("books/poems.txt");
  ck_assert_msg(t != NULL, "Tree should not be NULL.");
  ck_assert_int_eq(tree_size(t), 185);
  free(t);

  t = huffman_build_tree("books/shakespeare.txt");
  ck_assert_msg(t != NULL, "Tree should not be NULL.");
  ck_assert_int_eq(tree_size(t), 159);
  free(t);
}
Example #6
0
END_TEST

START_TEST(test_huffman_find)
{
  TreeNode *t = huffman_build_tree("books/holmes.txt");//holmes
  ck_assert_int_eq(tree_size(t), 161);

  EncodeTable *etab = table_build(t);
  ck_assert_msg(etab != NULL, "Encode table should not be NULL.");

  char* e_encode = table_bit_encode(etab, 'e');
  ck_assert_msg(e_encode != NULL, "Problem finding the encoding for 'e'.");

  char c = huffman_find(t, e_encode);
  ck_assert_int_eq(c, 'e');

  char* b_encode = table_bit_encode(etab, 'b');
  c = huffman_find(t, b_encode);
  ck_assert_int_eq(c, 'b');
}
Example #7
0
/**
 * Program entrypoint.
 */
int main(int argc, char **argv)
{
    int char_count;
    int string_count;
    int encoded_size;
    unsigned char charmap[256];
    int frequencies[256];
    huffman_node_t *leaf_nodes[256];
    huffman_node_t *code_nodes[256];
    huffman_node_t *root;
    int symbol_count;
    string_list_t *strings;
    FILE *input;
    FILE *table_output;
    FILE *data_output;
    int append_byte = -1;
    int ignore_case = 0;
    const char *input_filename = 0;
    const char *charmap_filename = 0;
    const char *table_output_filename = 0;
    const char *data_output_filename = 0;
    const char *table_label = "";
    const char *node_label_prefix = "";
    const char *string_table_label = "";
    const char *string_label_prefix = "";
    int generate_string_table = 0;
    int verbose = 0;

    /* Process arguments. */
    {
        char *p;
        while ((p = *(++argv))) {
            if (!strncmp("--", p, 2)) {
                const char *opt = &p[2];
                if (!strncmp("character-map=", opt, 14)) {
                    charmap_filename = &opt[14];
                } else if (!strncmp("table-output=", opt, 13)) {
                    table_output_filename = &opt[13];
                } else if (!strncmp("data-output=", opt, 12)) {
                    data_output_filename = &opt[12];
                } else if (!strncmp("table-label=", opt, 12)) {
                    table_label = &opt[12];
                } else if (!strncmp("node-label-prefix=", opt, 18)) {
                    node_label_prefix = &opt[18];
                } else if (!strncmp("string-label-prefix=", opt, 20)) {
                    string_label_prefix = &opt[20];
                    generate_string_table = 1;
                } else if (!strcmp("generate-string-table", opt)) {
                    generate_string_table = 1;
                } else if (!strncmp("string-table-label=", opt, 19)) {
                    string_table_label = &opt[19];
                } else if (!strncmp("append-byte=", opt, 12)) {
                    append_byte = strtol(&opt[12], 0, 0);
                    if ((append_byte < 0) || (append_byte >= 256)) {
                        fprintf(stderr, "huffpuff: --append-byte: value must be in range 0..255\n");
                        return(-1);
                    }
                } else if (!strcmp("ignore-case", opt)) {
                    ignore_case = 1;
                } else if (!strcmp("verbose", opt)) {
                    verbose = 1;
                } else if (!strcmp("help", opt)) {
                    help();
                } else if (!strcmp("usage", opt)) {
                    usage();
                } else if (!strcmp("version", opt)) {
                    version();
                } else {
                    fprintf(stderr, "huffpuff: unrecognized option `%s'\n"
			    "Try `huffpuff --help' or `huffpuff --usage' for more information.\n", p);
                    return(-1);
                }
            } else {
                input_filename = p;
            }
        }
    }

    /* Set default character mapping f(c)=c */
    {
        int i;
        for (i=0; i<256; i++)
            charmap[i] = (unsigned char)i;
    }

    if (charmap_filename) {
        if (verbose)
            fprintf(stdout, "reading character map\n");
        if (!charmap_parse(charmap_filename, charmap)) {
            fprintf(stderr, "error: failed to parse character map `%s'\n",
                    charmap_filename);
            return(-1);
        }
    }

    if (input_filename) {
        input = fopen(input_filename, "rt");
        if (!input) {
            fprintf(stderr, "error: failed to open `%s' for reading\n",
                    input_filename);
            return(-1);
        }
    } else {
        input = stdin;
    }

    /* Read strings to encode. */
    if (verbose)
        fprintf(stdout, "reading strings\n");
    strings = read_strings(input, ignore_case, frequencies, &char_count, &string_count);
    fclose(input);

    /* Create Huffman leaf nodes. */
    if (verbose)
        fprintf(stdout, "creating Huffman leaf nodes\n");
    symbol_count = 0;
    {
        int i;
        if (append_byte != -1)
            frequencies[append_byte] += string_count;
        for (i=0; i<256; i++) {
            if (frequencies[i] > 0) {
                huffman_node_t *node;
                node = huffman_create_node(
                    /*symbol=*/i, /*weight=*/frequencies[i],
                    /*left=*/NULL, /*right=*/NULL);
                leaf_nodes[symbol_count++] = node;
                code_nodes[i] = node;
            } else {
                code_nodes[i] = 0;
            }
        }
    }
    if (verbose)
        fprintf(stdout, "  number of symbols: %d\n", symbol_count);

    /* Build the Huffman tree. */
    if (verbose)
        fprintf(stdout, "Building the Huffman tree\n");
    root = huffman_build_tree(leaf_nodes, symbol_count);

    /* Huffman-encode strings. */
    if (verbose)
        fprintf(stdout, "encoding strings\n");
    encoded_size = encode_strings(strings, code_nodes, append_byte);

    /* Sanity check */
    if (verbose)
        fprintf(stdout, "verifying output integrity\n");
    if (!verify_data_integrity(strings, root)) {
        assert(0);
        /* Cleanup */
        huffman_delete_node(root);
        destroy_string_list(strings);
        return(-1);
    }

    /* Prepare output */
    if (!table_output_filename) {
        table_output_filename = "huffpuff.tab.asm";
    }
    table_output = fopen(table_output_filename, "wt");
    if (!table_output) {
        fprintf(stderr, "error: failed to open `%s' for writing\n",
                table_output_filename);
        /* Cleanup */
        huffman_delete_node(root);
        destroy_string_list(strings);
        return(-1);
    }

    if (!data_output_filename) {
        data_output_filename = "huffpuff.dat.asm";
    }
    data_output = fopen(data_output_filename, "wt");
    if (!data_output) {
        fprintf(stderr, "error: failed to open `%s' for writing\n",
                data_output_filename);
        /* Cleanup */
        huffman_delete_node(root);
        destroy_string_list(strings);
        return(-1);
    }
    fprintf(data_output, "; Huffman-encoded string data automatically generated by huffpuff.\n");

    /* Print the Huffman codes in code length order. */
    if (verbose)
        fprintf(stdout, "writing Huffman decoder table\n");
    fprintf(table_output, "; Huffman decoder table automatically generated by huffpuff.\n");
    if (table_label && strlen(table_label))
        fprintf(table_output, "%s:\n", table_label);
    write_huffman_codes(table_output, root, charmap, node_label_prefix);

    fclose(table_output);

    if (generate_string_table) {
        /* Print string pointer table */
        int i;
        string_list_t *lst;
        if (verbose)
            fprintf(stdout, "writing string pointer table\n");
        if (string_table_label && strlen(string_table_label))
            fprintf(data_output, "%s:\n", string_table_label);
        for (i = 0, lst = strings; lst != 0; lst = lst->next, ++i) {
            fprintf(data_output, ".dw %sString%d\n",
                    string_label_prefix, i);
        }
    }

    /* Write the Huffman-encoded strings. */
    if (verbose)
        fprintf(stdout, "writing encoded string data\n");
    write_huffman_strings(data_output, strings, string_label_prefix);

    fclose(data_output);

    if (verbose)
        fprintf(stdout, "compressed size: %d%%\n", (encoded_size*100) / char_count);

    /* Cleanup */
    huffman_delete_node(root);
    destroy_string_list(strings);

    return 0;
}