Example #1
0
static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
			    int a1, int a2, int b1, int b2, struct hunk *l)
{
	int i, j, k;

	while (1) {
		/* find the longest match in this chunk */
		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
		if (!k)
			return l;

		/* and recurse on the remaining chunks on either side */
		l = recurse(a, b, pos, a1, i, b1, j, l);
		if (!l)
			return NULL;

		l->next = (struct hunk *)malloc(sizeof(struct hunk));
		if (!l->next)
			return NULL;

		l = l->next;
		l->a1 = i;
		l->a2 = i + k;
		l->b1 = j;
		l->b2 = j + k;
		l->next = NULL;

		/* tail-recursion didn't happen, so do equivalent iteration */
		a1 = i + k;
		b1 = j + k;
	}
}
Example #2
0
char *id_compress_reduced(char *id, int idlen, size_t *plen)
{
    int count = 0;
    char *p = (char *)malloc(idlen + 1);
    for (int i = 0; i < idlen; i++)
    {
        int matchoff;
        int matchlen;

        int j = 0;
        if (i > (255-33))
            j = i - (255-33);

        if (longest_match(id + j, i - j, id + i, idlen - i, &matchoff, &matchlen))
        {   int off;

            matchoff += j;
            off = i - matchoff;
            //printf("matchoff = %3d, matchlen = %2d, off = %d\n", matchoff, matchlen, off);
            assert(off >= matchlen);

            /*
            if (off <= 8 && matchlen <= 8)
            {
                p[count] = 0xC0 | ((off - 1) << 3) | (matchlen - 1);
                count++;
                i += matchlen - 1;
                continue;
            }
            else
            */
            if (matchlen > 2)
            {
                if (matchlen >= 128)
                    matchlen = 127;    // longest representable match
                p[count + 0] = 0x80 | matchlen;
                p[count + 1] = off + 33;
//                p[count + 2] = 0x80 | off;
                count += 2;
                i += matchlen - 1;
                continue;
            }
            /*
            //2 match has the same compression space, just do raw copy instead?
            else if (matchlen == 2) {
                p[count++] = id[i++];
                p[count++] = id[i++];
                continue;
            }
            */
        }
        p[count] = id[i];
        count++;
    }
    p[count] = 0;
    //printf("old size = %d, new size = %d\n", idlen, count);
    assert(count <= idlen);
    *plen = count;
    return p;
}
Example #3
0
int main()
{
    insert();

    longest_match("binder");
    longest_match("bracelet");
    longest_match("apple");

    prefix_match("aff");
    prefix_match("bi");
    prefix_match("a");

    greedy_match("avoid");
    greedy_match("bring");
    greedy_match("attack");

    traverse();

    tree.erase("bro");
    prefix_match("bro");

    return EXIT_SUCCESS;
}
Example #4
0
static void recurse(struct line *a, struct line *b, struct pos *pos,
		    int a1, int a2, int b1, int b2, struct hunklist *l) {
	int i, j, k;

	/* find the longest match in this chunk */
	k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
	if(!k)
		return;
	/* and recurse on the remaining chunks on either side */
	recurse(a, b, pos, a1, i, b1, j, l);
	l->head->a1 = i;
	l->head->a2 = i + k;
	l->head->b1 = j;
	l->head->b2 = j + k;
	l->head++;
	recurse(a, b, pos, i + k, a2, j + k, b2, l);
}
Example #5
0
block_state deflate_medium(deflate_state *s, int flush)
{
    struct match current_match, next_match;
    
    memset(&current_match, 0, sizeof(struct match));
    memset(&next_match, 0, sizeof(struct match));

    for (;;) {
        IPos hash_head = 0;   /* head of the hash chain */
        int bflush;           /* set if current block must be flushed */
        
        /* Make sure that we always have enough lookahead, except
         * at the end of the input file. We need MAX_MATCH bytes
         * for the next match, plus MIN_MATCH bytes to insert the
         * string following the next current_match.
         */
        if (s->lookahead < MIN_LOOKAHEAD) {
            fill_window(s);
            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
                return need_more;
            }
            if (s->lookahead == 0) break; /* flush the current block */
            next_match.match_length = 0;
        }
        s->prev_length = 2;

        /* Insert the string window[strstart .. strstart+2] in the
         * dictionary, and set hash_head to the head of the hash chain:
         */
        
        /* If we already have a future match from a previous round, just use that */
        if (next_match.match_length > 0) {
            current_match = next_match;
            next_match.match_length = 0;

        } else {
            hash_head = 0;
            if (s->lookahead >= MIN_MATCH) {
                hash_head = insert_string(s, s->strstart);
            }
        
            /* set up the initial match to be a 1 byte literal */
            current_match.match_start = 0;
            current_match.match_length = 1;
            current_match.strstart = s->strstart;
            current_match.orgstart = current_match.strstart;
        
            /* Find the longest match, discarding those <= prev_length.
             * At this point we have always match_length < MIN_MATCH
             */
             
            if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
                /* To simplify the code, we prevent matches with the string
                 * of window index 0 (in particular we have to avoid a match
                 * of the string with itself at the start of the input file).
                 */
                current_match.match_length = longest_match (s, hash_head);
                current_match.match_start = s->match_start;
                if (current_match.match_length < MIN_MATCH)
                    current_match.match_length = 1;
                if (current_match.match_start >= current_match.strstart) { 
                    /* this can happen due to some restarts */
                    current_match.match_length = 1;
                }
            }
        }
        
        insert_match(s, current_match);

        /* now, look ahead one */
        if (s->lookahead > MIN_LOOKAHEAD) {
            s->strstart = current_match.strstart + current_match.match_length;
            hash_head = insert_string(s, s->strstart);
        
            /* set up the initial match to be a 1 byte literal */
            next_match.match_start = 0;
            next_match.match_length = 1;
            next_match.strstart = s->strstart;
            next_match.orgstart = next_match.strstart;

            /* Find the longest match, discarding those <= prev_length.
             * At this point we have always match_length < MIN_MATCH
             */
            if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
                /* To simplify the code, we prevent matches with the string
                 * of window index 0 (in particular we have to avoid a match
                 * of the string with itself at the start of the input file).
                 */
                next_match.match_length = longest_match (s, hash_head);
                next_match.match_start = s->match_start;
                if (next_match.match_start >= next_match.strstart)
                    /* this can happen due to some restarts */
                    next_match.match_length = 1;
                if (next_match.match_length < MIN_MATCH)
                    next_match.match_length = 1;
                else
                    fizzle_matches(s, &current_match, &next_match);
            }
            
            /* short matches with a very long distance are rarely a good idea encoding wise */
            if (next_match.match_length == 3 &&
            (next_match.strstart - next_match.match_start) > 12000)
                    next_match.match_length = 1;
            s->strstart = current_match.strstart;
        
        } else {
            next_match.match_length = 0;
        }
        
        /* now emit the current match */
        bflush = emit_match(s, current_match, hash_head);
        
        /* move the "cursor" forward */
        s->strstart += current_match.match_length;        
        
        if (bflush)
            FLUSH_BLOCK(s, 0);
    }
    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
    if (flush == Z_FINISH) {
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
    if (s->last_lit)
        FLUSH_BLOCK(s, 0);
    return block_done;
}
Example #6
0
/* ===========================================================================
 * Same as deflate_medium, but achieves better compression. We use a lazy
 * evaluation for matches: a match is finally adopted only if there is
 * no better match at the next window position.
 */
block_state deflate_slow(deflate_state *s, int flush) {
    IPos hash_head;          /* head of hash chain */
    int bflush;              /* set if current block must be flushed */

    /* Process the input block. */
    for (;;) {
        /* Make sure that we always have enough lookahead, except
         * at the end of the input file. We need MAX_MATCH bytes
         * for the next match, plus MIN_MATCH bytes to insert the
         * string following the next match.
         */
        if (s->lookahead < MIN_LOOKAHEAD) {
            fill_window(s);
            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
                return need_more;
            }
            if (s->lookahead == 0)
                break; /* flush the current block */
        }

        /* Insert the string window[strstart .. strstart+2] in the
         * dictionary, and set hash_head to the head of the hash chain:
         */
        hash_head = NIL;
        if (s->lookahead >= MIN_MATCH) {
            hash_head = insert_string(s, s->strstart);
        }

        /* Find the longest match, discarding those <= prev_length.
         */
        s->prev_length = s->match_length, s->prev_match = s->match_start;
        s->match_length = MIN_MATCH-1;

        if (hash_head != NIL && s->prev_length < s->max_lazy_match && s->strstart - hash_head <= MAX_DIST(s)) {
            /* To simplify the code, we prevent matches with the string
             * of window index 0 (in particular we have to avoid a match
             * of the string with itself at the start of the input file).
             */
            s->match_length = longest_match(s, hash_head);
            /* longest_match() sets match_start */

            if (s->match_length <= 5 && (s->strategy == Z_FILTERED
#if TOO_FAR <= 32767
                || (s->match_length == MIN_MATCH && s->strstart - s->match_start > TOO_FAR)
#endif
                )) {

                /* If prev_match is also MIN_MATCH, match_start is garbage
                 * but we will ignore the current match anyway.
                 */
                s->match_length = MIN_MATCH-1;
            }
        }
        /* If there was a match at the previous step and the current
         * match is not better, output the previous match:
         */
        if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
            uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
            /* Do not insert strings in hash table beyond this. */

            check_match(s, s->strstart-1, s->prev_match, s->prev_length);

            _tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH, bflush);

            /* Insert in hash table all strings up to the end of the match.
             * strstart-1 and strstart are already inserted. If there is not
             * enough lookahead, the last two strings are not inserted in
             * the hash table.
             */
            s->lookahead -= s->prev_length-1;

#ifdef NOT_TWEAK_COMPILER
            s->prev_length -= 2;
            do {
                if (++s->strstart <= max_insert) {
                    insert_string(s, s->strstart);
                }
            } while (--s->prev_length != 0);
            s->match_available = 0;
            s->match_length = MIN_MATCH-1;
            s->strstart++;
#else
            {
                uInt mov_fwd = s->prev_length - 2;
                uInt insert_cnt = mov_fwd;
                if (unlikely(insert_cnt > max_insert - s->strstart))
                    insert_cnt = max_insert - s->strstart;

                bulk_insert_str(s, s->strstart + 1, insert_cnt);
                s->prev_length = 0;
                s->match_available = 0;
                s->match_length = MIN_MATCH-1;
                s->strstart += mov_fwd + 1;
            }
#endif /*NOT_TWEAK_COMPILER*/

            if (bflush) FLUSH_BLOCK(s, 0);

        } else if (s->match_available) {
            /* If there was no match at the previous position, output a
             * single literal. If there was a match but the current match
             * is longer, truncate the previous match to a single literal.
             */
            Tracevv((stderr, "%c", s->window[s->strstart-1]));
            _tr_tally_lit(s, s->window[s->strstart-1], bflush);
            if (bflush) {
                FLUSH_BLOCK_ONLY(s, 0);
            }
            s->strstart++;
            s->lookahead--;
            if (s->strm->avail_out == 0)
                return need_more;
        } else {
            /* There is no previous match to compare with, wait for
             * the next step to decide.
             */
            s->match_available = 1;
            s->strstart++;
            s->lookahead--;
        }
    }
    Assert(flush != Z_NO_FLUSH, "no flush?");
    if (s->match_available) {
        Tracevv((stderr, "%c", s->window[s->strstart-1]));
        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
        s->match_available = 0;
    }
    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
    if (flush == Z_FINISH) {
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
    if (s->last_lit)
        FLUSH_BLOCK(s, 0);
    return block_done;
}
Example #7
0
    best_size = longest_match_shifter(a, b, a_start, a_end, b_start, b_end, 0, a_offset, b_offset);
    best_size = longest_match_shifter(b, a, b_start, b_end, a_start, a_end, best_size, b_offset, a_offset);
    return best_size;
}

/*
 make_template()

 Creates a template from two strings with a given tolerance.
*/

void make_template(char* template, int tolerance, char* a, char* b, int a_start, int a_end, int b_start, int b_end) {
    int a_offset, b_offset;
    unsigned int best_size;

    best_size = longest_match(a, b, a_start, a_end, b_start, b_end, &a_offset, &b_offset);
    if (best_size == 0) {
        strcat(template, MARKER);
    }
    if (a_offset > a_start && b_offset > b_start) {
        // There's leftover stuff on the left side of BOTH strings.
        make_template(template, tolerance, a, b, a_start, a_offset, b_start, b_offset);
    }
    else if (a_offset > a_start || b_offset > b_start) {
        // There's leftover stuff on the left side of ONLY ONE of the strings.
        strcat(template, MARKER);
    }

    if (best_size > tolerance) {
        strncat(template, a+a_offset, best_size);