void search_trie(trie_t *curr, char *word, char *prefix) { int indx; int match_len = 0; int word_len; int prefix_len; char next_ch; if (!curr) { return; } word_len = strlen(word); prefix_len = strlen(prefix); match_len = find_longest_match(curr, word, word + word_len - 1); printf("longest match in %s for %s is %d\n", curr->str, word, match_len); if (match_len == word_len) { print_all_words(curr, prefix); return; } if (match_len < word_len) { next_ch = word[match_len]; indx = tolower(next_ch) - 'a'; strncat(prefix, curr->str, match_len); search_trie(curr->child[indx], word + match_len, prefix); return; } }
QList<Match> SequenceMatcher::get_matching_blocks() { if (!matching_blocks.isEmpty()) return matching_blocks; const int LengthA = a.count(); const int LengthB = b.count(); QList<Offsets> offsets; offsets << Offsets(0, LengthA, 0, LengthB); while (!offsets.isEmpty()) { const Offsets &offset = offsets.takeLast(); const int a_low = offset.a_low; const int a_high = offset.a_high; const int b_low = offset.b_low; const int b_high = offset.b_high; const Match match = find_longest_match(a_low, a_high, b_low, b_high); const int i = match.i; const int j = match.j; const int k = match.size; if (k) { matching_blocks.append(match); if (a_low < i && b_low < j) offsets.append(Offsets(a_low, i, b_low, j)); if (i + k < a_high && j + k < b_high) offsets.append(Offsets(i + k, a_high, j + k, b_high)); } } qSort(matching_blocks.begin(), matching_blocks.end(), matchLessThan); int i1 = 0; int j1 = 0; int k1 = 0; QList<Match> non_adjacent; foreach (const Match match, matching_blocks) { const int i2 = match.i; const int j2 = match.j; const int k2 = match.size; if (i1 + k1 == i2 && j1 + k1 == j2) k1 += k2; else { if (k1) non_adjacent.append(Match(i1, j1, k1)); i1 = i2; j1 = j2; k1 = k2; } } if (k1) non_adjacent.append(Match(i1, j1, k1)); non_adjacent.append(Match(LengthA, LengthB, 0)); matching_blocks = non_adjacent; return matching_blocks; }
static HSE_state ICACHE_FLASH_ATTR st_step_search(heatshrink_encoder *hse) { uint16_t window_length = get_input_buffer_size(hse); uint16_t lookahead_sz = get_lookahead_size(hse); uint16_t msi = hse->match_scan_index; LOG("## step_search, scan @ +%d (%d/%d), input size %d\n", msi, hse->input_size + msi, 2*window_length, hse->input_size); bool fin = is_finishing(hse); if (msi >= hse->input_size - (fin ? 0 : lookahead_sz)) { /* Current search buffer is exhausted, copy it into the * backlog and await more input. */ LOG("-- end of search @ %d, saving backlog\n", msi); return HSES_SAVE_BACKLOG; } uint16_t input_offset = get_input_offset(hse); uint16_t end = input_offset + msi; uint16_t start = 0; if (backlog_is_filled(hse)) { /* last WINDOW_LENGTH bytes */ start = end - window_length + 1; } else if (backlog_is_partial(hse)) { /* clamp to available data */ start = end - window_length + 1; if (start < lookahead_sz) { start = lookahead_sz; } } else { /* only scan available input */ start = input_offset; } uint16_t max_possible = lookahead_sz; if (hse->input_size - msi < lookahead_sz) { max_possible = hse->input_size - msi; } uint16_t match_length = 0; uint16_t match_pos = find_longest_match(hse, start, end, max_possible, &match_length); if (match_pos == MATCH_NOT_FOUND) { LOG("ss Match not found\n"); hse->match_scan_index++; hse->flags |= FLAG_HAS_LITERAL; hse->match_length = 0; return HSES_YIELD_TAG_BIT; } else { LOG("ss Found match of %d bytes at %d\n", match_length, match_pos); hse->match_pos = match_pos; hse->match_length = match_length; ASSERT(match_pos < 1 << hse->window_sz2 /*window_length*/); return HSES_YIELD_TAG_BIT; } }
static HSE_state st_step_search(heatshrink_encoder *hse) { uint16_t window_length = get_input_buffer_size(hse); uint16_t lookahead_sz = get_lookahead_size(hse); uint16_t msi = hse->match_scan_index; LOG("## step_search, scan @ +%d (%d/%d), input size %d\n", msi, hse->input_size + msi, 2*window_length, hse->input_size); bool fin = is_finishing(hse); if (msi > hse->input_size - (fin ? 1 : lookahead_sz)) { /* Current search buffer is exhausted, copy it into the * backlog and await more input. */ LOG("-- end of search @ %d\n", msi); return fin ? HSES_FLUSH_BITS : HSES_SAVE_BACKLOG; } uint16_t input_offset = get_input_offset(hse); uint16_t end = input_offset + msi; uint16_t start = end - window_length; uint16_t max_possible = lookahead_sz; if (hse->input_size - msi < lookahead_sz) { max_possible = hse->input_size - msi; } uint16_t match_length = 0; uint16_t match_pos = find_longest_match(hse, start, end, max_possible, &match_length); if (match_pos == MATCH_NOT_FOUND) { LOG("ss Match not found\n"); hse->match_scan_index++; hse->match_length = 0; return HSES_YIELD_TAG_BIT; } else { LOG("ss Found match of %d bytes at %d\n", match_length, match_pos); hse->match_pos = match_pos; hse->match_length = match_length; ASSERT(match_pos < 1 << HEATSHRINK_ENCODER_WINDOW_BITS(hse) /*window_length*/); return HSES_YIELD_TAG_BIT; } }
void insert_word(trie_t *root, char *start, char *end) { trie_t *curr; int indx; int word_len; int match_len; char next_ch; char temp_ch; if (!start) { return; } while (*start == ' ' && start++ < end); if (start >= end) { return; } curr = root; word_len = end - start + 1; match_len = find_longest_match(curr, start, end); temp_ch = start[word_len]; start[word_len] = '\0'; printf("longest match in %s for %s is %d\n", curr->str, start, match_len); start[word_len] = temp_ch; /* * split based on the length being matched * match len has to be less than/equal to the curr node string * if remaining in the current node, then * we need to create a new node, copy the remaining str * null terminate * if remaining in the current word, then, * we need to create a new node, copy the remaining str */ if (curr->len == 0) { next_ch = *(start + match_len); indx = tolower(next_ch) - 'a'; if (indx > MAX_ALPHA) { g_stats.total_ignored++; return; } if (curr->child[indx] == NULL) { curr->child[indx] = create_new_node(start + match_len, word_len - match_len); } else { insert_word(curr->child[indx], start + match_len, end); } return; } if (match_len < curr->len) { next_ch = curr->str[match_len]; indx = tolower(next_ch) - 'a'; if (indx > MAX_ALPHA) { g_stats.total_ignored++; return; } curr->child[indx] = create_new_node(&curr->str[match_len], curr->len - match_len); if (curr->type == LEAF) { g_stats.total_leaf--; } curr->type = NODE; } curr->str[match_len] = '\0'; curr->len = match_len; if (match_len == word_len) { curr->type = LEAF; g_stats.total_leaf++; } else { next_ch = *(start + match_len); indx = tolower(next_ch) - 'a'; if (indx > MAX_ALPHA) { g_stats.total_ignored++; return; } if (curr->child[indx] == NULL) { curr->child[indx] = create_new_node(start + match_len, word_len - match_len); } else { insert_word(curr->child[indx], start + match_len, end); } } }