void dump_matches(int level, matchp mp) { if (mp == NULL) { LOG(level, (" (NULL)\n")); } else { if(mp->offset > 0) { LOG(level, (" offset %d, len %d\n", mp->offset, mp->len)); } if (mp->next != NULL) { dump_matches(level, mp->next); } } }
void dump_matches(matchp mp) { if (mp == NULL) { LOG(LOG_DEBUG, (" (NULL)\n")); } else { if(mp->offset > 0) { LOG(LOG_DEBUG, (" offset %d, len %d\n", mp->offset, mp->len)); } if (mp->next != NULL) { dump_matches(mp->next); } } }
void Matcher::handle_end(Token& token) { if (LOG_WOULD_LOG(debug)) { char utf8token[1024]; Fast_UnicodeUtil::utf8ncopy(utf8token, token.token, 1024, (token.token != NULL ? token.curlen : 0)); LOG(debug, "handle_end(%s)", utf8token); } if (LOG_WOULD_LOG(spam)) { dump_occurrences(100); LOG(spam, "Topmost 10 matches found:"); dump_matches(10, false); } JL(JD_MDUMP, log_matches(20)); // Just keep track of end of the text _endpos = token.bytepos; // flush here for now since we do not traverse all the nonterminal lists for each kw. flush_candidates(); }
/* this needs to be called with the indexes in * reverse order */ const_matchp matches_calc(match_ctx ctx, /* IN/OUT */ int index) /* IN */ { const unsigned char *buf; matchp matches; matchp mp; struct match_node *np; buf = ctx->buf; matches = NULL; LOG(LOG_DUMP, ("index %d, char '%c', rle %d, rle_r %d\n", index, buf[index], ctx->rle[index], ctx->rle_r[index])); /* proces the literal match and add it to matches */ mp = match_new(ctx, &matches, 1, 0); /* get possible match */ np = ctx->info[index]->single; if(np != NULL) { np = np->next; } for (; np != NULL; np = np->next) { int mp_len; int len; int pos; int offset; /* limit according to max offset */ if(np->index > index + ctx->max_offset) { break; } LOG(LOG_DUMP, ("find lengths for index %d to index %d\n", index, np->index)); /* get match len */ mp_len = mp->offset > 0 ? mp->len : 0; LOG(LOG_DUMP, ("0) comparing with current best [%d] off %d len %d\n", index, mp->offset, mp_len)); offset = np->index - index; len = mp_len; pos = index + 1 - len; /* Compare the first <previous len> bytes backwards. We can * skip some comparisons by increasing by the rle count. We * don't need to compare the first byte, hence > 1 instead of * > 0 */ while(len > 1 && buf[pos] == buf[pos + offset]) { #if 1 int offset1 = ctx->rle_r[pos]; int offset2 = ctx->rle_r[pos + offset]; int offset = offset1 < offset2 ? offset1 : offset2; LOG(LOG_DUMP, ("1) compared sucesssfully [%d] %d %d\n", index, pos, pos + offset)); len -= 1 + offset; pos += 1 + offset; #else --len; ++pos; #endif } if(len > 1) { /* sequence length too short, skip this match */ continue; } if(offset < 17) { /* allocate match struct and add it to matches */ mp = match_new(ctx, &matches, 1, offset); } /* Here we know that the current match is atleast as long as * the previuos one. let's compare further. */ len = mp_len; pos = index - len; while(pos >= 0 && buf[pos] == buf[pos + offset]) { LOG(LOG_DUMP, ("2) compared sucesssfully [%d] %d %d\n", index, pos, pos + offset)); ++len; --pos; } if(len > mp_len) { /* allocate match struct and add it to matches */ mp = match_new(ctx, &matches, index - pos, offset); } if(pos < 0) { /* we have reached the eof, no better matches can be found */ break; } } LOG(LOG_DEBUG, ("adding matches for index %d to cache\n", index)); dump_matches(LOG_DEBUG, matches); return matches; }