void Matcher::update_wrk_set(match_sequence& ws, MatchElement* k, QueryExpr* mexp) { if (LOG_WOULD_LOG(spam)) { std::string s; k->dump(s); LOG(spam, "update_wrk_set(): match_sequence.size(%zu), element(%s)", ws.size(), s.c_str()); } // update this working set (start with the freshest) for (match_sequence::reverse_iterator rit = ws.rbegin(); rit != ws.rend();) { MatchCandidate* m = (*rit); MatchCandidate::accept_state as = m->accept(k, mexp); // If a candidate already has this keyword, then all earlier // candidates also has the keyword if (as == MatchCandidate::M_EXISTS) break; // Just accepted this candidate into another higher level if (as != MatchCandidate::M_OVERLAP) { MatchCandidate* mu = k->Complex(); RefCandidate(mu); } // we should allow a slighly larger winsize here because we have not found all matches yet. if ((as == MatchCandidate::M_EXPIRED) || ((k->startpos() - m->startpos()) >= static_cast<int>(_winsizeFallback))) { // remove from current pos and delete - can never be satisfied match_sequence::reverse_iterator new_rit(ws.erase((++rit).base())); rit = new_rit; DerefCandidate(m); } else { // If this one got complete, move it to the ranked set or trigger updates // of parent candidates if subquery match if (m->complete()) { // STL hackers' heaven - removing this element unconditionally from _wrk_set['k'] match_sequence::reverse_iterator new_rit(ws.erase((++rit).base())); rit = new_rit; if (m->matches_limit()) { if (_need_complete_cnt > 0) { _need_complete_cnt--; } update_match(m); } else { DerefCandidate(m); } } else { ++rit; } } } if (LOG_WOULD_LOG(spam)) { std::string s; k->dump(s); LOG(spam, "END update_wrk_set, '%s'", s.c_str()); } }
bool Matcher::add_occurrence(off_t pos, off_t tpos, size_t len) { QueryTerm* mexp = _match_iter.current(); LOG(spam, "Match: %s(%ld)", mexp->term(), tpos); // Add new occurrence to sequence of all occurrences key_occ_ptr k = new key_occ(mexp->term(), pos, tpos, len); if (!k) return false; _occ.push_back(k); if (!(_need_complete_cnt > 0)) { size_t nodeno; // From the head of the sequences, remove any candidates that are // "too old", eg. that is not complete within the winsize window // and also trig further processing of complete matches: for (nodeno = 0; nodeno < _nontermcnt; nodeno++) { match_sequence& ws = _wrk_set[nodeno]; for (match_sequence::iterator it = ws.begin(); it != ws.end();) { MatchCandidate* m = (*it); if ((k->startpos() - m->startpos()) < static_cast<int>(_winsize)) break; it = ws.erase(it); // This moves the iterator forward if (m->partial_ok()) update_match(m); else DerefCandidate(m); } } } // Then add a new candidate starting at the currently found keyword // for each subexpression that matches this keyword for (; mexp != NULL; mexp = _match_iter.next()) { QueryNode* pexp = mexp->_parent; assert(pexp); MatchCandidate* nm = NewCandidate(pexp); if (!nm || nm->elems() < 0) { LOG(error, "Matcher could not allocate memory for candidate - bailing out"); if (nm) DerefCandidate(nm); return false; } match_sequence& cs = _wrk_set[pexp->_node_idx]; if (cs.size() >= _max_match_candidates) { DerefCandidate(nm); LOG(debug, "The max number of match candidates (%zu) in the work set for query node idx '%u' has been reached. " "No more candidates are added", _max_match_candidates, pexp->_node_idx); } else { cs.push_back(nm); } update_wrk_set(cs, k, mexp); } return true; }
// Flush all remaining candidates upon context change or document end: void Matcher::flush_candidates() { int cands = 0; for (size_t i = 0; i < _nontermcnt; i++) { match_sequence& ws = _wrk_set[i]; for (match_sequence::iterator it = ws.begin(); it != ws.end(); ++it) { cands++; MatchCandidate* m = (*it); if (m->partial_ok()) update_match(m); else DerefCandidate(m); } ws.clear(); } LOG(debug, "Flushing done (%d candidates)", cands); }
static inline void match(lzma_lzma1_encoder *coder, const uint32_t pos_state, const uint32_t distance, const uint32_t len) { update_match(coder->state); length(&coder->rc, &coder->match_len_encoder, pos_state, len, coder->fast_mode); const uint32_t dist_slot = get_dist_slot(distance); const uint32_t dist_state = get_dist_state(len); rc_bittree(&coder->rc, coder->dist_slot[dist_state], DIST_SLOT_BITS, dist_slot); if (dist_slot >= DIST_MODEL_START) { const uint32_t footer_bits = (dist_slot >> 1) - 1; const uint32_t base = (2 | (dist_slot & 1)) << footer_bits; const uint32_t dist_reduced = distance - base; if (dist_slot < DIST_MODEL_END) { // Careful here: base - dist_slot - 1 can be -1, but // rc_bittree_reverse starts at probs[1], not probs[0]. rc_bittree_reverse(&coder->rc, coder->dist_special + base - dist_slot - 1, footer_bits, dist_reduced); } else { rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS, footer_bits - ALIGN_BITS); rc_bittree_reverse( &coder->rc, coder->dist_align, ALIGN_BITS, dist_reduced & ALIGN_MASK); ++coder->align_price_count; } } coder->reps[3] = coder->reps[2]; coder->reps[2] = coder->reps[1]; coder->reps[1] = coder->reps[0]; coder->reps[0] = distance; ++coder->match_price_count; }