bool Matcher::add_occurrence(off_t pos, off_t tpos, size_t len) { QueryTerm* mexp = _match_iter.current(); LOG(spam, "Match: %s(%ld)", mexp->term(), tpos); // Add new occurrence to sequence of all occurrences key_occ_ptr k = new key_occ(mexp->term(), pos, tpos, len); if (!k) return false; _occ.push_back(k); if (!(_need_complete_cnt > 0)) { size_t nodeno; // From the head of the sequences, remove any candidates that are // "too old", eg. that is not complete within the winsize window // and also trig further processing of complete matches: for (nodeno = 0; nodeno < _nontermcnt; nodeno++) { match_sequence& ws = _wrk_set[nodeno]; for (match_sequence::iterator it = ws.begin(); it != ws.end();) { MatchCandidate* m = (*it); if ((k->startpos() - m->startpos()) < static_cast<int>(_winsize)) break; it = ws.erase(it); // This moves the iterator forward if (m->partial_ok()) update_match(m); else DerefCandidate(m); } } } // Then add a new candidate starting at the currently found keyword // for each subexpression that matches this keyword for (; mexp != NULL; mexp = _match_iter.next()) { QueryNode* pexp = mexp->_parent; assert(pexp); MatchCandidate* nm = NewCandidate(pexp); if (!nm || nm->elems() < 0) { LOG(error, "Matcher could not allocate memory for candidate - bailing out"); if (nm) DerefCandidate(nm); return false; } match_sequence& cs = _wrk_set[pexp->_node_idx]; if (cs.size() >= _max_match_candidates) { DerefCandidate(nm); LOG(debug, "The max number of match candidates (%zu) in the work set for query node idx '%u' has been reached. " "No more candidates are added", _max_match_candidates, pexp->_node_idx); } else { cs.push_back(nm); } update_wrk_set(cs, k, mexp); } return true; }
void Matcher::dump_statistics() { int i; int nterms = QueryTerms(); fprintf(stderr, "%20s %12s %12s\n", "Term", "Matches", "Exact"); for (i = 0; i < nterms; i++) { QueryTerm* q = _mo->Term(i); fprintf(stderr, "%20s %12d %12d\n", q->term(), q->total_match_cnt, q->exact_match_cnt); } }