static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos, int a1, int a2, int b1, int b2, struct hunk *l) { int i, j, k; while (1) { /* find the longest match in this chunk */ k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j); if (!k) return l; /* and recurse on the remaining chunks on either side */ l = recurse(a, b, pos, a1, i, b1, j, l); if (!l) return NULL; l->next = (struct hunk *)malloc(sizeof(struct hunk)); if (!l->next) return NULL; l = l->next; l->a1 = i; l->a2 = i + k; l->b1 = j; l->b2 = j + k; l->next = NULL; /* tail-recursion didn't happen, so do equivalent iteration */ a1 = i + k; b1 = j + k; } }
char *id_compress_reduced(char *id, int idlen, size_t *plen) { int count = 0; char *p = (char *)malloc(idlen + 1); for (int i = 0; i < idlen; i++) { int matchoff; int matchlen; int j = 0; if (i > (255-33)) j = i - (255-33); if (longest_match(id + j, i - j, id + i, idlen - i, &matchoff, &matchlen)) { int off; matchoff += j; off = i - matchoff; //printf("matchoff = %3d, matchlen = %2d, off = %d\n", matchoff, matchlen, off); assert(off >= matchlen); /* if (off <= 8 && matchlen <= 8) { p[count] = 0xC0 | ((off - 1) << 3) | (matchlen - 1); count++; i += matchlen - 1; continue; } else */ if (matchlen > 2) { if (matchlen >= 128) matchlen = 127; // longest representable match p[count + 0] = 0x80 | matchlen; p[count + 1] = off + 33; // p[count + 2] = 0x80 | off; count += 2; i += matchlen - 1; continue; } /* //2 match has the same compression space, just do raw copy instead? else if (matchlen == 2) { p[count++] = id[i++]; p[count++] = id[i++]; continue; } */ } p[count] = id[i]; count++; } p[count] = 0; //printf("old size = %d, new size = %d\n", idlen, count); assert(count <= idlen); *plen = count; return p; }
int main() { insert(); longest_match("binder"); longest_match("bracelet"); longest_match("apple"); prefix_match("aff"); prefix_match("bi"); prefix_match("a"); greedy_match("avoid"); greedy_match("bring"); greedy_match("attack"); traverse(); tree.erase("bro"); prefix_match("bro"); return EXIT_SUCCESS; }
static void recurse(struct line *a, struct line *b, struct pos *pos, int a1, int a2, int b1, int b2, struct hunklist *l) { int i, j, k; /* find the longest match in this chunk */ k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j); if(!k) return; /* and recurse on the remaining chunks on either side */ recurse(a, b, pos, a1, i, b1, j, l); l->head->a1 = i; l->head->a2 = i + k; l->head->b1 = j; l->head->b2 = j + k; l->head++; recurse(a, b, pos, i + k, a2, j + k, b2, l); }
block_state deflate_medium(deflate_state *s, int flush) { struct match current_match, next_match; memset(¤t_match, 0, sizeof(struct match)); memset(&next_match, 0, sizeof(struct match)); for (;;) { IPos hash_head = 0; /* head of the hash chain */ int bflush; /* set if current block must be flushed */ /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes * for the next match, plus MIN_MATCH bytes to insert the * string following the next current_match. */ if (s->lookahead < MIN_LOOKAHEAD) { fill_window(s); if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { return need_more; } if (s->lookahead == 0) break; /* flush the current block */ next_match.match_length = 0; } s->prev_length = 2; /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ /* If we already have a future match from a previous round, just use that */ if (next_match.match_length > 0) { current_match = next_match; next_match.match_length = 0; } else { hash_head = 0; if (s->lookahead >= MIN_MATCH) { hash_head = insert_string(s, s->strstart); } /* set up the initial match to be a 1 byte literal */ current_match.match_start = 0; current_match.match_length = 1; current_match.strstart = s->strstart; current_match.orgstart = current_match.strstart; /* Find the longest match, discarding those <= prev_length. * At this point we have always match_length < MIN_MATCH */ if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) { /* To simplify the code, we prevent matches with the string * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ current_match.match_length = longest_match (s, hash_head); current_match.match_start = s->match_start; if (current_match.match_length < MIN_MATCH) current_match.match_length = 1; if (current_match.match_start >= current_match.strstart) { /* this can happen due to some restarts */ current_match.match_length = 1; } } } insert_match(s, current_match); /* now, look ahead one */ if (s->lookahead > MIN_LOOKAHEAD) { s->strstart = current_match.strstart + current_match.match_length; hash_head = insert_string(s, s->strstart); /* set up the initial match to be a 1 byte literal */ next_match.match_start = 0; next_match.match_length = 1; next_match.strstart = s->strstart; next_match.orgstart = next_match.strstart; /* Find the longest match, discarding those <= prev_length. * At this point we have always match_length < MIN_MATCH */ if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) { /* To simplify the code, we prevent matches with the string * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ next_match.match_length = longest_match (s, hash_head); next_match.match_start = s->match_start; if (next_match.match_start >= next_match.strstart) /* this can happen due to some restarts */ next_match.match_length = 1; if (next_match.match_length < MIN_MATCH) next_match.match_length = 1; else fizzle_matches(s, ¤t_match, &next_match); } /* short matches with a very long distance are rarely a good idea encoding wise */ if (next_match.match_length == 3 && (next_match.strstart - next_match.match_start) > 12000) next_match.match_length = 1; s->strstart = current_match.strstart; } else { next_match.match_length = 0; } /* now emit the current match */ bflush = emit_match(s, current_match, hash_head); /* move the "cursor" forward */ s->strstart += current_match.match_length; if (bflush) FLUSH_BLOCK(s, 0); } s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; if (flush == Z_FINISH) { FLUSH_BLOCK(s, 1); return finish_done; } if (s->last_lit) FLUSH_BLOCK(s, 0); return block_done; }
/* =========================================================================== * Same as deflate_medium, but achieves better compression. We use a lazy * evaluation for matches: a match is finally adopted only if there is * no better match at the next window position. */ block_state deflate_slow(deflate_state *s, int flush) { IPos hash_head; /* head of hash chain */ int bflush; /* set if current block must be flushed */ /* Process the input block. */ for (;;) { /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes * for the next match, plus MIN_MATCH bytes to insert the * string following the next match. */ if (s->lookahead < MIN_LOOKAHEAD) { fill_window(s); if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { return need_more; } if (s->lookahead == 0) break; /* flush the current block */ } /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ hash_head = NIL; if (s->lookahead >= MIN_MATCH) { hash_head = insert_string(s, s->strstart); } /* Find the longest match, discarding those <= prev_length. */ s->prev_length = s->match_length, s->prev_match = s->match_start; s->match_length = MIN_MATCH-1; if (hash_head != NIL && s->prev_length < s->max_lazy_match && s->strstart - hash_head <= MAX_DIST(s)) { /* To simplify the code, we prevent matches with the string * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ s->match_length = longest_match(s, hash_head); /* longest_match() sets match_start */ if (s->match_length <= 5 && (s->strategy == Z_FILTERED #if TOO_FAR <= 32767 || (s->match_length == MIN_MATCH && s->strstart - s->match_start > TOO_FAR) #endif )) { /* If prev_match is also MIN_MATCH, match_start is garbage * but we will ignore the current match anyway. */ s->match_length = MIN_MATCH-1; } } /* If there was a match at the previous step and the current * match is not better, output the previous match: */ if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; /* Do not insert strings in hash table beyond this. */ check_match(s, s->strstart-1, s->prev_match, s->prev_length); _tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH, bflush); /* Insert in hash table all strings up to the end of the match. * strstart-1 and strstart are already inserted. If there is not * enough lookahead, the last two strings are not inserted in * the hash table. */ s->lookahead -= s->prev_length-1; #ifdef NOT_TWEAK_COMPILER s->prev_length -= 2; do { if (++s->strstart <= max_insert) { insert_string(s, s->strstart); } } while (--s->prev_length != 0); s->match_available = 0; s->match_length = MIN_MATCH-1; s->strstart++; #else { uInt mov_fwd = s->prev_length - 2; uInt insert_cnt = mov_fwd; if (unlikely(insert_cnt > max_insert - s->strstart)) insert_cnt = max_insert - s->strstart; bulk_insert_str(s, s->strstart + 1, insert_cnt); s->prev_length = 0; s->match_available = 0; s->match_length = MIN_MATCH-1; s->strstart += mov_fwd + 1; } #endif /*NOT_TWEAK_COMPILER*/ if (bflush) FLUSH_BLOCK(s, 0); } else if (s->match_available) { /* If there was no match at the previous position, output a * single literal. If there was a match but the current match * is longer, truncate the previous match to a single literal. */ Tracevv((stderr, "%c", s->window[s->strstart-1])); _tr_tally_lit(s, s->window[s->strstart-1], bflush); if (bflush) { FLUSH_BLOCK_ONLY(s, 0); } s->strstart++; s->lookahead--; if (s->strm->avail_out == 0) return need_more; } else { /* There is no previous match to compare with, wait for * the next step to decide. */ s->match_available = 1; s->strstart++; s->lookahead--; } } Assert(flush != Z_NO_FLUSH, "no flush?"); if (s->match_available) { Tracevv((stderr, "%c", s->window[s->strstart-1])); _tr_tally_lit(s, s->window[s->strstart-1], bflush); s->match_available = 0; } s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; if (flush == Z_FINISH) { FLUSH_BLOCK(s, 1); return finish_done; } if (s->last_lit) FLUSH_BLOCK(s, 0); return block_done; }
best_size = longest_match_shifter(a, b, a_start, a_end, b_start, b_end, 0, a_offset, b_offset); best_size = longest_match_shifter(b, a, b_start, b_end, a_start, a_end, best_size, b_offset, a_offset); return best_size; } /* make_template() Creates a template from two strings with a given tolerance. */ void make_template(char* template, int tolerance, char* a, char* b, int a_start, int a_end, int b_start, int b_end) { int a_offset, b_offset; unsigned int best_size; best_size = longest_match(a, b, a_start, a_end, b_start, b_end, &a_offset, &b_offset); if (best_size == 0) { strcat(template, MARKER); } if (a_offset > a_start && b_offset > b_start) { // There's leftover stuff on the left side of BOTH strings. make_template(template, tolerance, a, b, a_start, a_offset, b_start, b_offset); } else if (a_offset > a_start || b_offset > b_start) { // There's leftover stuff on the left side of ONLY ONE of the strings. strcat(template, MARKER); } if (best_size > tolerance) { strncat(template, a+a_offset, best_size);