Beispiel #1
0
void
search_trie(trie_t *curr, char *word, char *prefix)
{
	int	indx;
	int	match_len = 0;
	int	word_len;
	int	prefix_len;
	char	next_ch;

	if (!curr) {
		return;
	}

	word_len = strlen(word);
	prefix_len = strlen(prefix);

	match_len = find_longest_match(curr, word, word + word_len - 1);
	printf("longest match in %s for %s is %d\n", curr->str, word, match_len);	

	if (match_len == word_len) {
		print_all_words(curr, prefix);
		return;
	}

	if (match_len < word_len) {
		next_ch = word[match_len];
		indx = tolower(next_ch) - 'a';
		strncat(prefix, curr->str, match_len);		
		search_trie(curr->child[indx], word + match_len, prefix);
		return;
	}
}
QList<Match> SequenceMatcher::get_matching_blocks()
{
    if (!matching_blocks.isEmpty())
        return matching_blocks;

    const int LengthA = a.count();
    const int LengthB = b.count();
    QList<Offsets> offsets;
    offsets << Offsets(0, LengthA, 0, LengthB);
    while (!offsets.isEmpty()) {
        const Offsets &offset = offsets.takeLast();
        const int a_low = offset.a_low;
        const int a_high = offset.a_high;
        const int b_low = offset.b_low;
        const int b_high = offset.b_high;
        const Match match = find_longest_match(a_low, a_high, b_low,
                                               b_high);
        const int i = match.i;
        const int j = match.j;
        const int k = match.size;
        if (k) {
            matching_blocks.append(match);
            if (a_low < i && b_low < j)
                offsets.append(Offsets(a_low, i, b_low, j));
            if (i + k < a_high && j + k < b_high)
                offsets.append(Offsets(i + k, a_high, j + k, b_high));
        }
    }
    qSort(matching_blocks.begin(), matching_blocks.end(), matchLessThan);

    int i1 = 0;
    int j1 = 0;
    int k1 = 0;
    QList<Match> non_adjacent;
    foreach (const Match match, matching_blocks) {
        const int i2 = match.i;
        const int j2 = match.j;
        const int k2 = match.size;
        if (i1 + k1 == i2 && j1 + k1 == j2)
            k1 += k2;
        else {
            if (k1)
                non_adjacent.append(Match(i1, j1, k1));
            i1 = i2;
            j1 = j2;
            k1 = k2;
        }
    }
    if (k1)
        non_adjacent.append(Match(i1, j1, k1));
    non_adjacent.append(Match(LengthA, LengthB, 0));
    matching_blocks = non_adjacent;
    return matching_blocks;
}
Beispiel #3
0
static HSE_state ICACHE_FLASH_ATTR st_step_search(heatshrink_encoder *hse) {
    uint16_t window_length = get_input_buffer_size(hse);
    uint16_t lookahead_sz = get_lookahead_size(hse);
    uint16_t msi = hse->match_scan_index;
    LOG("## step_search, scan @ +%d (%d/%d), input size %d\n",
        msi, hse->input_size + msi, 2*window_length, hse->input_size);

    bool fin = is_finishing(hse);
    if (msi >= hse->input_size - (fin ? 0 : lookahead_sz)) {
        /* Current search buffer is exhausted, copy it into the
         * backlog and await more input. */
        LOG("-- end of search @ %d, saving backlog\n", msi);
        return HSES_SAVE_BACKLOG;
    }

    uint16_t input_offset = get_input_offset(hse);
    uint16_t end = input_offset + msi;

    uint16_t start = 0;
    if (backlog_is_filled(hse)) { /* last WINDOW_LENGTH bytes */
        start = end - window_length + 1;
    } else if (backlog_is_partial(hse)) { /* clamp to available data */
        start = end - window_length + 1;
        if (start < lookahead_sz) { start = lookahead_sz; }
    } else {              /* only scan available input */
        start = input_offset;
    }

    uint16_t max_possible = lookahead_sz;
    if (hse->input_size - msi < lookahead_sz) {
        max_possible = hse->input_size - msi;
    }
    
    uint16_t match_length = 0;
    uint16_t match_pos = find_longest_match(hse,
        start, end, max_possible, &match_length);
    
    if (match_pos == MATCH_NOT_FOUND) {
        LOG("ss Match not found\n");
        hse->match_scan_index++;
        hse->flags |= FLAG_HAS_LITERAL;
        hse->match_length = 0;
        return HSES_YIELD_TAG_BIT;
    } else {
        LOG("ss Found match of %d bytes at %d\n", match_length, match_pos);
        hse->match_pos = match_pos;
        hse->match_length = match_length;
        ASSERT(match_pos < 1 << hse->window_sz2 /*window_length*/);

        return HSES_YIELD_TAG_BIT;
    }
}
static HSE_state st_step_search(heatshrink_encoder *hse) {
    uint16_t window_length = get_input_buffer_size(hse);
    uint16_t lookahead_sz = get_lookahead_size(hse);
    uint16_t msi = hse->match_scan_index;
    LOG("## step_search, scan @ +%d (%d/%d), input size %d\n",
        msi, hse->input_size + msi, 2*window_length, hse->input_size);

    bool fin = is_finishing(hse);
    if (msi > hse->input_size - (fin ? 1 : lookahead_sz)) {
        /* Current search buffer is exhausted, copy it into the
         * backlog and await more input. */
        LOG("-- end of search @ %d\n", msi);
        return fin ? HSES_FLUSH_BITS : HSES_SAVE_BACKLOG;
    }

    uint16_t input_offset = get_input_offset(hse);
    uint16_t end = input_offset + msi;
    uint16_t start = end - window_length;

    uint16_t max_possible = lookahead_sz;
    if (hse->input_size - msi < lookahead_sz) {
        max_possible = hse->input_size - msi;
    }
    
    uint16_t match_length = 0;
    uint16_t match_pos = find_longest_match(hse,
        start, end, max_possible, &match_length);
    
    if (match_pos == MATCH_NOT_FOUND) {
        LOG("ss Match not found\n");
        hse->match_scan_index++;
        hse->match_length = 0;
        return HSES_YIELD_TAG_BIT;
    } else {
        LOG("ss Found match of %d bytes at %d\n", match_length, match_pos);
        hse->match_pos = match_pos;
        hse->match_length = match_length;
        ASSERT(match_pos < 1 << HEATSHRINK_ENCODER_WINDOW_BITS(hse) /*window_length*/);

        return HSES_YIELD_TAG_BIT;
    }
}
Beispiel #5
0
void
insert_word(trie_t *root, char *start, char *end)
{
	trie_t	*curr;
	int	indx;
	int	word_len;
	int	match_len;
	char	next_ch;
	char	temp_ch;

	if (!start) {
		return;
	}

	while (*start == ' ' && start++ < end);
	if (start >= end) {
		return;
	}

	curr = root;
	word_len = end - start + 1;

	match_len = find_longest_match(curr, start, end);
	temp_ch = start[word_len];
	start[word_len] = '\0';
	printf("longest match in %s for %s is %d\n", curr->str, start, match_len);
	start[word_len] = temp_ch;

	/*
	 * split based on the length being matched
	 * match len has to be less than/equal to the curr node string
	 * if remaining in the current node, then
	 *	we need to create a new node, copy the remaining str
	 * 	null terminate
	 * if remaining in the current word, then,
	 *	we need to create a new node, copy the remaining str
	 */

	if (curr->len == 0) {
		next_ch = *(start + match_len);
		indx = tolower(next_ch) - 'a';
		if (indx > MAX_ALPHA) {
			g_stats.total_ignored++;
			return;
		}

		if (curr->child[indx] == NULL) {
			curr->child[indx] = create_new_node(start + match_len,
			    word_len - match_len);
		} else {
			insert_word(curr->child[indx], start + match_len,
			    end);
		}

		return;
	}

	if (match_len < curr->len) {
		next_ch = curr->str[match_len];
		indx = tolower(next_ch) - 'a';
		if (indx > MAX_ALPHA) {
			g_stats.total_ignored++;
			return;
		}
		curr->child[indx] = create_new_node(&curr->str[match_len],
		    curr->len - match_len);		
		if (curr->type == LEAF) {
			g_stats.total_leaf--;
		}
		curr->type = NODE;
	}

	curr->str[match_len] = '\0';
	curr->len = match_len;

	if (match_len == word_len) {
		curr->type = LEAF;
		g_stats.total_leaf++;
	} else {
		next_ch = *(start + match_len);
		indx = tolower(next_ch) - 'a';
		if (indx > MAX_ALPHA) {
			g_stats.total_ignored++;
			return;
		}

		if (curr->child[indx] == NULL) {
			curr->child[indx] = create_new_node(start + match_len,
			    word_len - match_len);
		} else {
			insert_word(curr->child[indx], start + match_len,
			    end);
		}
	}
}