Example #1
0
/*
 * calculate_huffman_codes turns pSF into an array
 * with a single entry that is the root of the
 * huffman tree. The return value is a SymbolEncoder,
 * which is an array of huffman codes index by symbol value.
 */
static SymbolEncoder*
calculate_huffman_codes(SymbolFrequencies * pSF)
{
    unsigned int i = 0;
    unsigned int n = 0;
    huffman_node *m1 = NULL, *m2 = NULL;
    SymbolEncoder *pSE = NULL;

#if 0
    printf("BEFORE SORT\n");
    print_freqs(pSF);
#endif

    /* Sort the symbol frequency array by ascending frequency. */
    qsort((*pSF), MAX_SYMBOLS, sizeof((*pSF)[0]), SFComp);

#if 0
    printf("AFTER SORT\n");
    print_freqs(pSF);
#endif

    /* Get the number of symbols. */
    for(n = 0; n < MAX_SYMBOLS && (*pSF)[n]; ++n)
        ;

    /*
     * Construct a Huffman tree. This code is based
     * on the algorithm given in Managing Gigabytes
     * by Ian Witten et al, 2nd edition, page 34.
     * Note that this implementation uses a simple
     * count instead of probability.
     */
    for(i = 0; i < n - 1; ++i)
    {
        /* Set m1 and m2 to the two subsets of least probability. */
        m1 = (*pSF)[0];
        m2 = (*pSF)[1];

        /* Replace m1 and m2 with a set {m1, m2} whose probability
         * is the sum of that of m1 and m2. */
        (*pSF)[0] = m1->parent = m2->parent =
                                     new_nonleaf_node(m1->count + m2->count, m1, m2);
        (*pSF)[1] = NULL;

        /* Put newSet into the correct count position in pSF. */
        qsort((*pSF), n, sizeof((*pSF)[0]), SFComp);
    }

    /* Build the SymbolEncoder array from the tree. */
    pSE = (SymbolEncoder*)malloc(sizeof(SymbolEncoder));
    memset(pSE, 0, sizeof(SymbolEncoder));
    build_symbol_encoder((*pSF)[0], pSE);
    return pSE;
}
Example #2
0
static huffman_node*
read_code_table_from_memory(const unsigned char* bufin,
                            unsigned int bufinlen,
                            unsigned int *pindex,
                            uint32_t *pDataBytes)
{
    huffman_node *root = new_nonleaf_node(0, NULL, NULL);
    uint32_t count;

    /* Read the number of entries.
       (it is stored in network byte order). */
    if(memread(bufin, bufinlen, pindex, &count, sizeof(count)))
    {
        free_huffman_tree(root);
        return NULL;
    }

    count = ntohl(count);

    /* Read the number of data bytes this encoding represents. */
    if(memread(bufin, bufinlen, pindex, pDataBytes, sizeof(*pDataBytes)))
    {
        free_huffman_tree(root);
        return NULL;
    }

    *pDataBytes = ntohl(*pDataBytes);

    /* Read the entries. */
    while(count-- > 0)
    {
        unsigned int curbit;
        unsigned char symbol;
        unsigned char numbits;
        unsigned char numbytes;
        unsigned char *bytes;
        huffman_node *p = root;

        if(memread(bufin, bufinlen, pindex, &symbol, sizeof(symbol)))
        {
            free_huffman_tree(root);
            return NULL;
        }

        if(memread(bufin, bufinlen, pindex, &numbits, sizeof(numbits)))
        {
            free_huffman_tree(root);
            return NULL;
        }

        numbytes = (unsigned char)numbytes_from_numbits(numbits);
        bytes = (unsigned char*)malloc(numbytes);
        if(memread(bufin, bufinlen, pindex, bytes, numbytes))
        {
            free(bytes);
            free_huffman_tree(root);
            return NULL;
        }

        /*
         * Add the entry to the Huffman tree. The value
         * of the current bit is used switch between
         * zero and one child nodes in the tree. New nodes
         * are added as needed in the tree.
         */
        for(curbit = 0; curbit < numbits; ++curbit)
        {
            if(get_bit(bytes, curbit))
            {
                if(p->one == NULL)
                {
                    p->one = curbit == (unsigned char)(numbits - 1)
                             ? new_leaf_node(symbol)
                             : new_nonleaf_node(0, NULL, NULL);
                    p->one->parent = p;
                }
                p = p->one;
            }
            else
            {
                if(p->zero == NULL)
                {
                    p->zero = curbit == (unsigned char)(numbits - 1)
                              ? new_leaf_node(symbol)
                              : new_nonleaf_node(0, NULL, NULL);
                    p->zero->parent = p;
                }
                p = p->zero;
            }
        }

        free(bytes);
    }

    return root;
}
Example #3
0
/*
 * read_code_table builds a Huffman tree from the code
 * in the in file. This function returns NULL on error.
 * The returned value should be freed with free_huffman_tree.
 */
static huffman_node*
read_code_table(FILE* in, unsigned int *pDataBytes)
{
	huffman_node *root = new_nonleaf_node(0, NULL, NULL);
	unsigned int count;
	
	/* Read the number of entries.
	   (it is stored in network byte order). */
	if(fread(&count, sizeof(count), 1, in) != 1)
	{
		free_huffman_tree(root);
		return NULL;
	}

	count = ntohl(count);

	/* Read the number of data bytes this encoding represents. */
	if(fread(pDataBytes, sizeof(*pDataBytes), 1, in) != 1)
	{
		free_huffman_tree(root);
		return NULL;
	}

	*pDataBytes = ntohl(*pDataBytes);


	/* Read the entries. */
	while(count-- > 0)
	{
		int c;
		unsigned int curbit;
		unsigned char symbol;
		unsigned char numbits;
		unsigned char numbytes;
		unsigned char *bytes;
		huffman_node *p = root;
		
		if((c = fgetc(in)) == EOF)
		{
			free_huffman_tree(root);
			return NULL;
		}
		symbol = (unsigned char)c;
		
		if((c = fgetc(in)) == EOF)
		{
			free_huffman_tree(root);
			return NULL;
		}
		
		numbits = (unsigned char)c;
		numbytes = (unsigned char)numbytes_from_numbits(numbits);
		bytes = (unsigned char*)malloc(numbytes);
		if(fread(bytes, 1, numbytes, in) != numbytes)
		{
			free(bytes);
			free_huffman_tree(root);
			return NULL;
		}

		/*
		 * Add the entry to the Huffman tree. The value
		 * of the current bit is used switch between
		 * zero and one child nodes in the tree. New nodes
		 * are added as needed in the tree.
		 */
		for(curbit = 0; curbit < numbits; ++curbit)
		{
			if(get_bit(bytes, curbit))
			{
				if(p->one == NULL)
				{
					p->one = curbit == (unsigned char)(numbits - 1)
						? new_leaf_node(symbol)
						: new_nonleaf_node(0, NULL, NULL);
					p->one->parent = p;
				}
				p = p->one;
			}
			else
			{
				if(p->zero == NULL)
				{
					p->zero = curbit == (unsigned char)(numbits - 1)
						? new_leaf_node(symbol)
						: new_nonleaf_node(0, NULL, NULL);
					p->zero->parent = p;
				}
				p = p->zero;
			}
		}
		
		free(bytes);
	}

	return root;
}