/* ---------- * pglz_compress - * * Compresses source into dest using strategy. * ---------- */ bool pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, const PGLZ_Strategy *strategy) { unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header); unsigned char *bstart = bp; int hist_next = 0; bool hist_recycle = false; const char *dp = source; const char *dend = source + slen; unsigned char ctrl_dummy = 0; unsigned char *ctrlp = &ctrl_dummy; unsigned char ctrlb = 0; unsigned char ctrl = 0; int32 match_len; int32 match_off; int32 good_match; int32 good_drop; int32 result_size; int32 result_max; int32 need_rate; /* * Our fallback strategy is the default. */ if (strategy == NULL) strategy = PGLZ_strategy_default; /* * If the strategy forbids compression (at all or if source chunk too * small), fail. */ if (strategy->match_size_good <= 0 || slen < strategy->min_input_size) return false; /* * Save the original source size in the header. */ dest->rawsize = slen; /* * Limit the match size to the maximum implementation allowed value */ if ((good_match = strategy->match_size_good) > PGLZ_MAX_MATCH) good_match = PGLZ_MAX_MATCH; if (good_match < 17) good_match = 17; if ((good_drop = strategy->match_size_drop) < 0) good_drop = 0; if (good_drop > 100) good_drop = 100; /* * Initialize the history lists to empty. We do not need to zero the * hist_entries[] array; its entries are initialized as they are used. */ memset((void *) hist_start, 0, sizeof(hist_start)); /* * Compute the maximum result size allowed by the strategy. If the input * size exceeds force_input_size, the max result size is the input size * itself. Otherwise, it is the input size minus the minimum wanted * compression rate. */ if (slen >= strategy->force_input_size) result_max = slen; else { need_rate = strategy->min_comp_rate; if (need_rate < 0) need_rate = 0; else if (need_rate > 99) need_rate = 99; result_max = slen - ((slen * need_rate) / 100); } /* * Compress the source directly into the output buffer. */ while (dp < dend) { /* * If we already exceeded the maximum result size, fail. * * We check once per loop; since the loop body could emit as many as 4 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better * allow 4 slop bytes. */ if (bp - bstart >= result_max) return false; /* * Try to find a match in the history */ if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) { /* * Create the tag and add history entries for all matched * characters. */ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); while (match_len--) { pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } else { /* * No match found. Copy one literal byte. */ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } /* * Write out the last control byte and check that we haven't overrun the * output size allowed by the strategy. */ *ctrlp = ctrlb; result_size = bp - bstart; if (result_size >= result_max) return false; /* * Success - need only fill in the actual length of the compressed datum. */ SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header)); return true; }
/* ---------- * pglz_compress - * * Compresses source into dest using strategy. * ---------- */ bool pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, const PGLZ_Strategy *strategy) { unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header); unsigned char *bstart = bp; int hist_next = 1; bool hist_recycle = false; const char *dp = source; const char *dend = source + slen; unsigned char ctrl_dummy = 0; unsigned char *ctrlp = &ctrl_dummy; unsigned char ctrlb = 0; unsigned char ctrl = 0; bool found_match = false; int32 match_len; int32 match_off; int32 good_match; int32 good_drop; int32 result_size; int32 result_max; int32 need_rate; int hashsz; int mask; /* * Our fallback strategy is the default. */ if (strategy == NULL) strategy = PGLZ_strategy_default; /* * If the strategy forbids compression (at all or if source chunk size out * of range), fail. */ if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size) return false; /* * Save the original source size in the header. */ dest->rawsize = slen; /* * Limit the match parameters to the supported range. */ good_match = strategy->match_size_good; if (good_match > PGLZ_MAX_MATCH) good_match = PGLZ_MAX_MATCH; else if (good_match < 17) good_match = 17; good_drop = strategy->match_size_drop; if (good_drop < 0) good_drop = 0; else if (good_drop > 100) good_drop = 100; need_rate = strategy->min_comp_rate; if (need_rate < 0) need_rate = 0; else if (need_rate > 99) need_rate = 99; /* * Compute the maximum result size allowed by the strategy, namely the * input size minus the minimum wanted compression rate. This had better * be <= slen, else we might overrun the provided output buffer. */ if (slen > (INT_MAX / 100)) { /* Approximate to avoid overflow */ result_max = (slen / 100) * (100 - need_rate); } else result_max = (slen * (100 - need_rate)) / 100; /* * Experiments suggest that these hash sizes work pretty well. A large * hash table minimizes collision, but has a higher startup cost. For * a small input, the startup cost dominates. The table size must be * a power of two. */ if (slen < 128) hashsz = 512; else if (slen < 256) hashsz = 1024; else if (slen < 512) hashsz = 2048; else if (slen < 1024) hashsz = 4096; else hashsz = 8192; mask = hashsz - 1; /* * Initialize the history lists to empty. We do not need to zero the * hist_entries[] array; its entries are initialized as they are used. */ memset(hist_start, 0, hashsz * sizeof(int16)); /* * Compress the source directly into the output buffer. */ while (dp < dend) { /* * If we already exceeded the maximum result size, fail. * * We check once per loop; since the loop body could emit as many as 4 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better * allow 4 slop bytes. */ if (bp - bstart >= result_max) return false; /* * If we've emitted more than first_success_by bytes without finding * anything compressible at all, fail. This lets us fall out * reasonably quickly when looking at incompressible input (such as * pre-compressed data). */ if (!found_match && bp - bstart >= strategy->first_success_by) return false; /* * Try to find a match in the history */ if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop, mask)) { /* * Create the tag and add history entries for all matched * characters. */ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); while (match_len--) { pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend, mask); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } found_match = true; } else { /* * No match found. Copy one literal byte. */ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend, mask); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } /* * Write out the last control byte and check that we haven't overrun the * output size allowed by the strategy. */ *ctrlp = ctrlb; result_size = bp - bstart; if (result_size >= result_max) return false; /* * Success - need only fill in the actual length of the compressed datum. */ SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header)); return true; }
/* ---------- * pglz_compress - * * Compresses source into dest using strategy. * ---------- */ int pglz_compress(char *source, int32 slen, PGLZ_Header *dest, PGLZ_Strategy *strategy) { unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header); unsigned char *bstart = bp; int hist_next = 0; bool hist_recycle = false; char *dp = source; char *dend = source + slen; unsigned char ctrl_dummy = 0; unsigned char *ctrlp = &ctrl_dummy; unsigned char ctrlb = 0; unsigned char ctrl = 0; int32 match_len; int32 match_off; int32 good_match; int32 good_drop; int32 do_compress = 1; int32 result_size = -1; int32 result_max; int32 need_rate; /* * Our fallback strategy is the default. */ if (strategy == NULL) strategy = PGLZ_strategy_default; /* * Save the original source size in the header. */ dest->rawsize = slen; /* * If the strategy forbids compression (at all or if source chunk too * small), copy input to output without compression. */ if (strategy->match_size_good == 0) { memcpy(bstart, source, slen); return (dest->varsize = slen + sizeof(PGLZ_Header)); } else { if (slen < strategy->min_input_size) { memcpy(bstart, source, slen); return (dest->varsize = slen + sizeof(PGLZ_Header)); } } /* * Limit the match size to the maximum implementation allowed value */ if ((good_match = strategy->match_size_good) > PGLZ_MAX_MATCH) good_match = PGLZ_MAX_MATCH; if (good_match < 17) good_match = 17; if ((good_drop = strategy->match_size_drop) < 0) good_drop = 0; if (good_drop > 100) good_drop = 100; /* * Initialize the history lists to empty. We do not need to zero the * hist_entries[] array; its entries are initialized as they are used. */ memset((void *) hist_start, 0, sizeof(hist_start)); /* * Compute the maximum result size allowed by the strategy. If the input * size exceeds force_input_size, the max result size is the input size * itself. Otherwise, it is the input size minus the minimum wanted * compression rate. */ if (slen >= strategy->force_input_size) result_max = slen; else { need_rate = strategy->min_comp_rate; if (need_rate < 0) need_rate = 0; else if (need_rate > 99) need_rate = 99; result_max = slen - ((slen * need_rate) / 100); } /* * Compress the source directly into the output buffer. */ while (dp < dend) { /* * If we already exceeded the maximum result size, set no compression * flag and stop this. But don't check too often. */ if (bp - bstart >= result_max) { do_compress = 0; break; } /* * Try to find a match in the history */ if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) { /* * Create the tag and add history entries for all matched * characters. */ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); while (match_len--) { pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } else { /* * No match found. Copy one literal byte. */ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } /* * If we are still in compressing mode, write out the last control byte * and determine if the compression gained the rate requested by the * strategy. */ if (do_compress) { *ctrlp = ctrlb; result_size = bp - bstart; if (result_size >= result_max) do_compress = 0; } /* * Done - if we successfully compressed and matched the strategy's * constraints, return the compressed result. Otherwise copy the original * source over it and return the original length. */ if (do_compress) { dest->varsize = result_size + sizeof(PGLZ_Header); return VARATT_SIZE(dest); } else { memcpy(((char *) dest) + sizeof(PGLZ_Header), source, slen); dest->varsize = slen + sizeof(PGLZ_Header); return VARATT_SIZE(dest); } }