示例#1
0
void Matcher::update_wrk_set(match_sequence& ws, MatchElement* k, QueryExpr* mexp)
{
    if (LOG_WOULD_LOG(spam)) {
        std::string s; k->dump(s);
        LOG(spam, "update_wrk_set(): match_sequence.size(%zu), element(%s)", ws.size(), s.c_str());
    }

    // update this working set (start with the freshest)
    for (match_sequence::reverse_iterator rit = ws.rbegin(); rit != ws.rend();) {
        MatchCandidate* m = (*rit);

        MatchCandidate::accept_state as = m->accept(k, mexp);

        // If a candidate already has this keyword, then all earlier
        // candidates also has the keyword
        if (as == MatchCandidate::M_EXISTS) break;


        // Just accepted this candidate into another higher level
        if (as != MatchCandidate::M_OVERLAP) {
            MatchCandidate* mu = k->Complex();
            RefCandidate(mu);
        }

        // we should allow a slighly larger winsize here because we have not found all matches yet.
        if ((as == MatchCandidate::M_EXPIRED) || ((k->startpos() - m->startpos()) >= static_cast<int>(_winsizeFallback))) {
            // remove from current pos and delete - can never be satisfied
            match_sequence::reverse_iterator new_rit(ws.erase((++rit).base()));
            rit = new_rit;
            DerefCandidate(m);
        } else {
            // If this one got complete, move it to the ranked set or trigger updates
            // of parent candidates if subquery match
            if (m->complete()) {
                // STL hackers' heaven - removing this element unconditionally from _wrk_set['k']
                match_sequence::reverse_iterator new_rit(ws.erase((++rit).base()));
                rit = new_rit;

                if (m->matches_limit()) {
                    if (_need_complete_cnt > 0) {
                        _need_complete_cnt--;
                    }
                    update_match(m);
                } else {
                    DerefCandidate(m);
                }
            } else {
                ++rit;
            }
        }
    }
    if (LOG_WOULD_LOG(spam)) {
        std::string s; k->dump(s);
        LOG(spam, "END update_wrk_set, '%s'", s.c_str());
    }
}
示例#2
0
bool Matcher::add_occurrence(off_t pos, off_t tpos, size_t len)
{
    QueryTerm* mexp = _match_iter.current();

    LOG(spam, "Match: %s(%ld)", mexp->term(), tpos);

    // Add new occurrence to sequence of all occurrences
    key_occ_ptr k = new key_occ(mexp->term(), pos, tpos, len);
    if (!k) return false;

    _occ.push_back(k);

    if (!(_need_complete_cnt > 0)) {
        size_t nodeno;
        // From the head of the sequences, remove any candidates that are
        // "too old", eg. that is not complete within the winsize window
        // and also trig further processing of complete matches:
        for (nodeno = 0; nodeno < _nontermcnt; nodeno++) {
            match_sequence& ws = _wrk_set[nodeno];
            for (match_sequence::iterator it = ws.begin(); it != ws.end();) {
                MatchCandidate* m = (*it);
                if ((k->startpos() - m->startpos()) < static_cast<int>(_winsize)) break;
                it = ws.erase(it); // This moves the iterator forward
                if (m->partial_ok())
                    update_match(m);
                else
                    DerefCandidate(m);
            }
        }
    }

    // Then add a new candidate starting at the currently found keyword
    // for each subexpression that matches this keyword
    for (; mexp != NULL; mexp = _match_iter.next())
    {
        QueryNode* pexp = mexp->_parent;
        assert(pexp);
        MatchCandidate* nm = NewCandidate(pexp);
        if (!nm || nm->elems() < 0) {
            LOG(error, "Matcher could not allocate memory for candidate - bailing out");
            if (nm) DerefCandidate(nm);
            return false;
        }
        match_sequence& cs = _wrk_set[pexp->_node_idx];
        if (cs.size() >= _max_match_candidates) {
            DerefCandidate(nm);
            LOG(debug, "The max number of match candidates (%zu) in the work set for query node idx '%u' has been reached. "
                "No more candidates are added", _max_match_candidates, pexp->_node_idx);
        } else {
            cs.push_back(nm);
        }
        update_wrk_set(cs, k, mexp);
    }
    return true;
}
示例#3
0
// Flush all remaining candidates upon context change or document end:
void Matcher::flush_candidates()
{
    int cands = 0;
    for (size_t i = 0; i < _nontermcnt; i++) {
        match_sequence& ws = _wrk_set[i];
        for (match_sequence::iterator it = ws.begin(); it != ws.end(); ++it) {
            cands++;
            MatchCandidate* m = (*it);
            if (m->partial_ok())
                update_match(m);
            else
                DerefCandidate(m);
        }
        ws.clear();
    }
    LOG(debug, "Flushing done (%d candidates)", cands);
}
示例#4
0
static inline void
match(lzma_lzma1_encoder *coder, const uint32_t pos_state,
		const uint32_t distance, const uint32_t len)
{
	update_match(coder->state);

	length(&coder->rc, &coder->match_len_encoder, pos_state, len,
			coder->fast_mode);

	const uint32_t dist_slot = get_dist_slot(distance);
	const uint32_t dist_state = get_dist_state(len);
	rc_bittree(&coder->rc, coder->dist_slot[dist_state],
			DIST_SLOT_BITS, dist_slot);

	if (dist_slot >= DIST_MODEL_START) {
		const uint32_t footer_bits = (dist_slot >> 1) - 1;
		const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
		const uint32_t dist_reduced = distance - base;

		if (dist_slot < DIST_MODEL_END) {
			// Careful here: base - dist_slot - 1 can be -1, but
			// rc_bittree_reverse starts at probs[1], not probs[0].
			rc_bittree_reverse(&coder->rc,
				coder->dist_special + base - dist_slot - 1,
				footer_bits, dist_reduced);
		} else {
			rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS,
					footer_bits - ALIGN_BITS);
			rc_bittree_reverse(
					&coder->rc, coder->dist_align,
					ALIGN_BITS, dist_reduced & ALIGN_MASK);
			++coder->align_price_count;
		}
	}

	coder->reps[3] = coder->reps[2];
	coder->reps[2] = coder->reps[1];
	coder->reps[1] = coder->reps[0];
	coder->reps[0] = distance;
	++coder->match_price_count;
}