/* ----------
 * pglz_compress -
 *
 *		Compresses source into dest using strategy.
 * ----------
 */
bool
pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
			  const PGLZ_Strategy *strategy)
{
	unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header);
	unsigned char *bstart = bp;
	int			hist_next = 0;
	bool		hist_recycle = false;
	const char *dp = source;
	const char *dend = source + slen;
	unsigned char ctrl_dummy = 0;
	unsigned char *ctrlp = &ctrl_dummy;
	unsigned char ctrlb = 0;
	unsigned char ctrl = 0;
	int32		match_len;
	int32		match_off;
	int32		good_match;
	int32		good_drop;
	int32		result_size;
	int32		result_max;
	int32		need_rate;

	/*
	 * Our fallback strategy is the default.
	 */
	if (strategy == NULL)
		strategy = PGLZ_strategy_default;

	/*
	 * If the strategy forbids compression (at all or if source chunk too
	 * small), fail.
	 */
	if (strategy->match_size_good <= 0 ||
		slen < strategy->min_input_size)
		return false;

	/*
	 * Save the original source size in the header.
	 */
	dest->rawsize = slen;

	/*
	 * Limit the match size to the maximum implementation allowed value
	 */
	if ((good_match = strategy->match_size_good) > PGLZ_MAX_MATCH)
		good_match = PGLZ_MAX_MATCH;
	if (good_match < 17)
		good_match = 17;

	if ((good_drop = strategy->match_size_drop) < 0)
		good_drop = 0;
	if (good_drop > 100)
		good_drop = 100;

	/*
	 * Initialize the history lists to empty.  We do not need to zero the
	 * hist_entries[] array; its entries are initialized as they are used.
	 */
	memset((void *) hist_start, 0, sizeof(hist_start));

	/*
	 * Compute the maximum result size allowed by the strategy. If the input
	 * size exceeds force_input_size, the max result size is the input size
	 * itself. Otherwise, it is the input size minus the minimum wanted
	 * compression rate.
	 */
	if (slen >= strategy->force_input_size)
		result_max = slen;
	else
	{
		need_rate = strategy->min_comp_rate;
		if (need_rate < 0)
			need_rate = 0;
		else if (need_rate > 99)
			need_rate = 99;
		result_max = slen - ((slen * need_rate) / 100);
	}

	/*
	 * Compress the source directly into the output buffer.
	 */
	while (dp < dend)
	{
		/*
		 * If we already exceeded the maximum result size, fail.
		 *
		 * We check once per loop; since the loop body could emit as many as 4
		 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
		 * allow 4 slop bytes.
		 */
		if (bp - bstart >= result_max)
			return false;

		/*
		 * Try to find a match in the history
		 */
		if (pglz_find_match(hist_start, dp, dend, &match_len,
							&match_off, good_match, good_drop))
		{
			/*
			 * Create the tag and add history entries for all matched
			 * characters.
			 */
			pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
			while (match_len--)
			{
				pglz_hist_add(hist_start, hist_entries,
							  hist_next, hist_recycle,
							  dp, dend);
				dp++;			/* Do not do this ++ in the line above!		*/
				/* The macro would do it four times - Jan.	*/
			}
		}
		else
		{
			/*
			 * No match found. Copy one literal byte.
			 */
			pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
			pglz_hist_add(hist_start, hist_entries,
						  hist_next, hist_recycle,
						  dp, dend);
			dp++;				/* Do not do this ++ in the line above!		*/
			/* The macro would do it four times - Jan.	*/
		}
	}

	/*
	 * Write out the last control byte and check that we haven't overrun the
	 * output size allowed by the strategy.
	 */
	*ctrlp = ctrlb;
	result_size = bp - bstart;
	if (result_size >= result_max)
		return false;

	/*
	 * Success - need only fill in the actual length of the compressed datum.
	 */
	SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header));

	return true;
}
/* ----------
 * pglz_compress -
 *
 *		Compresses source into dest using strategy.
 * ----------
 */
bool
pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
              const PGLZ_Strategy *strategy)
{
    unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header);
    unsigned char *bstart = bp;
    int			hist_next = 1;
    bool		hist_recycle = false;
    const char *dp = source;
    const char *dend = source + slen;
    unsigned char ctrl_dummy = 0;
    unsigned char *ctrlp = &ctrl_dummy;
    unsigned char ctrlb = 0;
    unsigned char ctrl = 0;
    bool		found_match = false;
    int32		match_len;
    int32		match_off;
    int32		good_match;
    int32		good_drop;
    int32		result_size;
    int32		result_max;
    int32		need_rate;
    int			hashsz;
    int			mask;

    /*
     * Our fallback strategy is the default.
     */
    if (strategy == NULL)
        strategy = PGLZ_strategy_default;

    /*
     * If the strategy forbids compression (at all or if source chunk size out
     * of range), fail.
     */
    if (strategy->match_size_good <= 0 ||
            slen < strategy->min_input_size ||
            slen > strategy->max_input_size)
        return false;

    /*
     * Save the original source size in the header.
     */
    dest->rawsize = slen;

    /*
     * Limit the match parameters to the supported range.
     */
    good_match = strategy->match_size_good;
    if (good_match > PGLZ_MAX_MATCH)
        good_match = PGLZ_MAX_MATCH;
    else if (good_match < 17)
        good_match = 17;

    good_drop = strategy->match_size_drop;
    if (good_drop < 0)
        good_drop = 0;
    else if (good_drop > 100)
        good_drop = 100;

    need_rate = strategy->min_comp_rate;
    if (need_rate < 0)
        need_rate = 0;
    else if (need_rate > 99)
        need_rate = 99;

    /*
     * Compute the maximum result size allowed by the strategy, namely the
     * input size minus the minimum wanted compression rate.  This had better
     * be <= slen, else we might overrun the provided output buffer.
     */
    if (slen > (INT_MAX / 100))
    {
        /* Approximate to avoid overflow */
        result_max = (slen / 100) * (100 - need_rate);
    }
    else
        result_max = (slen * (100 - need_rate)) / 100;

    /*
     * Experiments suggest that these hash sizes work pretty well. A large
     * hash table minimizes collision, but has a higher startup cost. For
     * a small input, the startup cost dominates. The table size must be
     * a power of two.
     */
    if (slen < 128)
        hashsz = 512;
    else if (slen < 256)
        hashsz = 1024;
    else if (slen < 512)
        hashsz = 2048;
    else if (slen < 1024)
        hashsz = 4096;
    else
        hashsz = 8192;
    mask = hashsz - 1;

    /*
     * Initialize the history lists to empty.  We do not need to zero the
     * hist_entries[] array; its entries are initialized as they are used.
     */
    memset(hist_start, 0, hashsz * sizeof(int16));

    /*
     * Compress the source directly into the output buffer.
     */
    while (dp < dend)
    {
        /*
         * If we already exceeded the maximum result size, fail.
         *
         * We check once per loop; since the loop body could emit as many as 4
         * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
         * allow 4 slop bytes.
         */
        if (bp - bstart >= result_max)
            return false;

        /*
         * If we've emitted more than first_success_by bytes without finding
         * anything compressible at all, fail.	This lets us fall out
         * reasonably quickly when looking at incompressible input (such as
         * pre-compressed data).
         */
        if (!found_match && bp - bstart >= strategy->first_success_by)
            return false;

        /*
         * Try to find a match in the history
         */
        if (pglz_find_match(hist_start, dp, dend, &match_len,
                            &match_off, good_match, good_drop, mask))
        {
            /*
             * Create the tag and add history entries for all matched
             * characters.
             */
            pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
            while (match_len--)
            {
                pglz_hist_add(hist_start, hist_entries,
                              hist_next, hist_recycle,
                              dp, dend, mask);
                dp++;			/* Do not do this ++ in the line above! */
                /* The macro would do it four times - Jan.	*/
            }
            found_match = true;
        }
        else
        {
            /*
             * No match found. Copy one literal byte.
             */
            pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
            pglz_hist_add(hist_start, hist_entries,
                          hist_next, hist_recycle,
                          dp, dend, mask);
            dp++;				/* Do not do this ++ in the line above! */
            /* The macro would do it four times - Jan.	*/
        }
    }

    /*
     * Write out the last control byte and check that we haven't overrun the
     * output size allowed by the strategy.
     */
    *ctrlp = ctrlb;
    result_size = bp - bstart;
    if (result_size >= result_max)
        return false;

    /*
     * Success - need only fill in the actual length of the compressed datum.
     */
    SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header));

    return true;
}
Example #3
0
/* ----------
 * pglz_compress -
 *
 *		Compresses source into dest using strategy.
 * ----------
 */
int
pglz_compress(char *source, int32 slen, PGLZ_Header *dest, PGLZ_Strategy *strategy)
{
	unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header);
	unsigned char *bstart = bp;
	int			hist_next = 0;
	bool		hist_recycle = false;
	char	   *dp = source;
	char	   *dend = source + slen;
	unsigned char ctrl_dummy = 0;
	unsigned char *ctrlp = &ctrl_dummy;
	unsigned char ctrlb = 0;
	unsigned char ctrl = 0;
	int32		match_len;
	int32		match_off;
	int32		good_match;
	int32		good_drop;
	int32		do_compress = 1;
	int32		result_size = -1;
	int32		result_max;
	int32		need_rate;

	/*
	 * Our fallback strategy is the default.
	 */
	if (strategy == NULL)
		strategy = PGLZ_strategy_default;

	/*
	 * Save the original source size in the header.
	 */
	dest->rawsize = slen;

	/*
	 * If the strategy forbids compression (at all or if source chunk too
	 * small), copy input to output without compression.
	 */
	if (strategy->match_size_good == 0)
	{
		memcpy(bstart, source, slen);
		return (dest->varsize = slen + sizeof(PGLZ_Header));
	}
	else
	{
		if (slen < strategy->min_input_size)
		{
			memcpy(bstart, source, slen);
			return (dest->varsize = slen + sizeof(PGLZ_Header));
		}
	}

	/*
	 * Limit the match size to the maximum implementation allowed value
	 */
	if ((good_match = strategy->match_size_good) > PGLZ_MAX_MATCH)
		good_match = PGLZ_MAX_MATCH;
	if (good_match < 17)
		good_match = 17;

	if ((good_drop = strategy->match_size_drop) < 0)
		good_drop = 0;
	if (good_drop > 100)
		good_drop = 100;

	/*
	 * Initialize the history lists to empty.  We do not need to zero the
	 * hist_entries[] array; its entries are initialized as they are used.
	 */
	memset((void *) hist_start, 0, sizeof(hist_start));

	/*
	 * Compute the maximum result size allowed by the strategy. If the input
	 * size exceeds force_input_size, the max result size is the input size
	 * itself. Otherwise, it is the input size minus the minimum wanted
	 * compression rate.
	 */
	if (slen >= strategy->force_input_size)
		result_max = slen;
	else
	{
		need_rate = strategy->min_comp_rate;
		if (need_rate < 0)
			need_rate = 0;
		else if (need_rate > 99)
			need_rate = 99;
		result_max = slen - ((slen * need_rate) / 100);
	}

	/*
	 * Compress the source directly into the output buffer.
	 */
	while (dp < dend)
	{
		/*
		 * If we already exceeded the maximum result size, set no compression
		 * flag and stop this. But don't check too often.
		 */
		if (bp - bstart >= result_max)
		{
			do_compress = 0;
			break;
		}

		/*
		 * Try to find a match in the history
		 */
		if (pglz_find_match(hist_start, dp, dend, &match_len,
							&match_off, good_match, good_drop))
		{
			/*
			 * Create the tag and add history entries for all matched
			 * characters.
			 */
			pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
			while (match_len--)
			{
				pglz_hist_add(hist_start, hist_entries,
							  hist_next, hist_recycle,
							  dp, dend);
				dp++;			/* Do not do this ++ in the line above!		*/
				/* The macro would do it four times - Jan.	*/
			}
		}
		else
		{
			/*
			 * No match found. Copy one literal byte.
			 */
			pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
			pglz_hist_add(hist_start, hist_entries,
						  hist_next, hist_recycle,
						  dp, dend);
			dp++;				/* Do not do this ++ in the line above!		*/
			/* The macro would do it four times - Jan.	*/
		}
	}

	/*
	 * If we are still in compressing mode, write out the last control byte
	 * and determine if the compression gained the rate requested by the
	 * strategy.
	 */
	if (do_compress)
	{
		*ctrlp = ctrlb;

		result_size = bp - bstart;
		if (result_size >= result_max)
			do_compress = 0;
	}

	/*
	 * Done - if we successfully compressed and matched the strategy's
	 * constraints, return the compressed result. Otherwise copy the original
	 * source over it and return the original length.
	 */
	if (do_compress)
	{
		dest->varsize = result_size + sizeof(PGLZ_Header);
		return VARATT_SIZE(dest);
	}
	else
	{
		memcpy(((char *) dest) + sizeof(PGLZ_Header), source, slen);
		dest->varsize = slen + sizeof(PGLZ_Header);
		return VARATT_SIZE(dest);
	}
}