Exemple #1
0
bool Matcher::add_occurrence(off_t pos, off_t tpos, size_t len)
{
    QueryTerm* mexp = _match_iter.current();

    LOG(spam, "Match: %s(%ld)", mexp->term(), tpos);

    // Add new occurrence to sequence of all occurrences
    key_occ_ptr k = new key_occ(mexp->term(), pos, tpos, len);
    if (!k) return false;

    _occ.push_back(k);

    if (!(_need_complete_cnt > 0)) {
        size_t nodeno;
        // From the head of the sequences, remove any candidates that are
        // "too old", eg. that is not complete within the winsize window
        // and also trig further processing of complete matches:
        for (nodeno = 0; nodeno < _nontermcnt; nodeno++) {
            match_sequence& ws = _wrk_set[nodeno];
            for (match_sequence::iterator it = ws.begin(); it != ws.end();) {
                MatchCandidate* m = (*it);
                if ((k->startpos() - m->startpos()) < static_cast<int>(_winsize)) break;
                it = ws.erase(it); // This moves the iterator forward
                if (m->partial_ok())
                    update_match(m);
                else
                    DerefCandidate(m);
            }
        }
    }

    // Then add a new candidate starting at the currently found keyword
    // for each subexpression that matches this keyword
    for (; mexp != NULL; mexp = _match_iter.next())
    {
        QueryNode* pexp = mexp->_parent;
        assert(pexp);
        MatchCandidate* nm = NewCandidate(pexp);
        if (!nm || nm->elems() < 0) {
            LOG(error, "Matcher could not allocate memory for candidate - bailing out");
            if (nm) DerefCandidate(nm);
            return false;
        }
        match_sequence& cs = _wrk_set[pexp->_node_idx];
        if (cs.size() >= _max_match_candidates) {
            DerefCandidate(nm);
            LOG(debug, "The max number of match candidates (%zu) in the work set for query node idx '%u' has been reached. "
                "No more candidates are added", _max_match_candidates, pexp->_node_idx);
        } else {
            cs.push_back(nm);
        }
        update_wrk_set(cs, k, mexp);
    }
    return true;
}
Exemple #2
0
void Matcher::dump_statistics()
{
    int i;
    int nterms = QueryTerms();

    fprintf(stderr, "%20s %12s %12s\n", "Term", "Matches", "Exact");
    for (i = 0; i < nterms; i++) {
        QueryTerm* q = _mo->Term(i);
        fprintf(stderr, "%20s %12d %12d\n", q->term(), q->total_match_cnt,
                q->exact_match_cnt);
    }
}
                void interpolateWith(const lemur::langmod::UnigramLM &qModel,
                            double origModCoeff,
                            int howManyWord,
                            double prSumThresh,
                            double prThresh) {
                    if (!qm) {
                        qm = new lemur::api::IndexedRealVector();
                    } else {
                        qm->clear();
                    }

                    qModel.startIteration();
                    while (qModel.hasMore()) {
                        IndexedReal entry;
                        qModel.nextWordProb((TERMID_T &)entry.ind,entry.val);
                        qm->push_back(entry);

                    }
                    qm->Sort();

                    double countSum = totalCount();


                    startIteration();
                    while (hasMore()) {
                        QueryTerm *qt = nextTerm();
                        setCount(qt->id(), qt->weight()*origModCoeff/countSum);
                        delete qt;
                    }

                    cout << "-------- FB terms --------" << endl;
                    double prSum = 0;
                    int wdCount = 0;
                    IndexedRealVector::iterator it;
                    it = qm->begin();
                    while (it != qm->end() && prSum < prSumThresh &&
                                wdCount < howManyWord && (*it).val >=prThresh) {
                        incCount((*it).ind, (*it).val*(1-origModCoeff));
                        cout << ind.term(it->ind) << endl;
                        prSum += (*it).val;
                        it++;
                        wdCount++;
                    }
                    cout << "--------------------------" << endl;


                    colQLikelihood = 0;
                    colQueryLikelihood();
                    colKLComputed = false;
                }
Exemple #4
0
void lemur::retrieval::QueryModel::interpolateWith(const lemur::langmod::UnigramLM &qModel,
                                                   double origModCoeff,
                                                   int howManyWord,
                                                   double prSumThresh,
                                                   double prThresh) {
    if (!qm) {
        qm = new lemur::api::IndexedRealVector();
    } else {
        qm->clear();
    }

    qModel.startIteration();
    while (qModel.hasMore()) {
        IndexedReal entry;
        qModel.nextWordProb((TERMID_T &)entry.ind,entry.val);
        qm->push_back(entry);

    }
    qm->Sort();

    double countSum = totalCount();

    // discounting the original model
    startIteration();
    while (hasMore()) {
        QueryTerm *qt = nextTerm();
        incCount(qt->id(), qt->weight()*origModCoeff/countSum);
        delete qt;
    }

    // now adding the new model
    double prSum = 0;
    int wdCount = 0;
    IndexedRealVector::iterator it;
    it = qm->begin();
    while (it != qm->end() && prSum < prSumThresh &&
           wdCount < howManyWord && (*it).val >=prThresh) {
        incCount((*it).ind, (*it).val*(1-origModCoeff));
        prSum += (*it).val;
        it++;
        wdCount++;
    }

    //Sum w in Q qtf * log(qtcf/termcount);
    colQLikelihood = 0;
    colQueryLikelihood();
    colKLComputed = false;
}
Exemple #5
0
void lemur::retrieval::QueryModel::clarity(ostream &os)
{
    int count = 0;
    double sum=0, ln_Pr=0;
    startIteration();
    QueryTerm *qt;
    while (hasMore()) {
        qt = nextTerm();
        count++;
        // query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))}
        // P(w)=cf(w)/|C|
        double pw = ((double)ind.termCount(qt->id())/(double)ind.termCount());
        // P(w|Q) is a prob computed by any model, e.g. relevance models
        double pwq = qt->weight();
        sum += pwq;
        ln_Pr += (pwq)*log(pwq/pw);
        delete qt;
    }
    // clarity should be computed with log_2, so divide by log(2).
    os << "=" << count << " " << (ln_Pr/(sum ? sum : 1.0)/log(2.0)) << endl;
    startIteration();
    while (hasMore()) {
        qt = nextTerm();
        // print clarity for each query term
        // clarity should be computed with log_2, so divide by log(2).
        os << ind.term(qt->id()) << " "
           << (qt->weight()*log(qt->weight()/
                                ((double)ind.termCount(qt->id())/
                                 (double)ind.termCount())))/log(2.0) << endl;
        delete qt;
    }
}
Exemple #6
0
void lemur::retrieval::QueryModel::save(ostream &os)
{
    int count = 0;
    startIteration();
    QueryTerm *qt;
    while (hasMore()) {
        qt = nextTerm();
        count++;
        delete qt;
    }
    os << " " << count << endl;
    startIteration();
    while (hasMore()) {
        qt = nextTerm();
        os << ind.term(qt->id()) << " "<< qt->weight() << endl;
        delete qt;
    }
}
Exemple #7
0
void QueryGroupView::re_load_terms(QueryGroup* group)
{
    QueryTermView* view;

    // destroy widgets of term view
    for (view = f_term_view_list; view; view = view->next_term_view())
	view->destroy_widgets();

    // delete term views
    while (f_term_view_list != NULL)
	delete f_term_view_list;

    delete f_query_group;
    f_query_group = group;

    // load new terms
    QueryTerm *term;
    for (view = 0, term= group->term_list(); term != NULL; term = term->next())
	view = new QueryTermView (term, this, view, NULL);
}
Exemple #8
0
QueryTerm* match_iterator::first()
{
    for (; _el != NULL; _el = _el->GetNext())
    {
        QueryTerm* q = _el->GetItem();

        // If exact match is desired by this subexpression,
        // only have effect if exact match
        if (q->Exact() && _len > q->len) continue;

        if (q->is_wildcard())
        {
            if (fast::util::wildcard_match(_term, q->ucs4_term()) == false) continue;
            return q;
        }

        if (_len < q->ucs4_len) continue;
        // allow prefix match iff prefix query term or
        // rest < _stem_extend and length > stem_min
        if (!q->is_prefix())
        {
            size_t stem_extend = (q->ucs4_len <= _stem_min ? 0 : _stemext);
            if (_len > q->ucs4_len + stem_extend) continue;
        }
        if (juniper::strncmp(_term, q->ucs4_term(), q->ucs4_len) != 0) continue;
        return q;
    }
    return NULL;
}
Exemple #9
0
void lemur::retrieval::QueryModel::load(istream &is)
{
    // clear existing counts
    startIteration();
    QueryTerm *qt;
    while (hasMore()) {
        qt = nextTerm();
        setCount(qt->id(),0);
    }
    colQLikelihood = 0;

    int count;
    is >> count;
    char wd[500];
    double pr;
    while (count-- >0) {
        is >> wd >> pr;
        TERMID_T id = ind.term(wd);
        if (id != 0) setCount(id, pr); // don't load OOV terms
    }
    colQueryLikelihood();
    colKLComputed = false;
}
Exemple #10
0
double lemur::retrieval::QueryModel::clarity() const
{
    int count = 0;
    double sum=0, ln_Pr=0;
    startIteration();
    QueryTerm *qt;
    while (hasMore()) {
        qt = nextTerm();
        count++;
        // query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))}
        // P(w)=cf(w)/|C|
        double pw = ((double)ind.termCount(qt->id())/(double)ind.termCount());
        // P(w|Q) is a prob computed by any model, e.g. relevance models
        double pwq = qt->weight();
        sum += pwq;
        ln_Pr += (pwq)*log(pwq/pw);
        delete qt;
    }
    // normalize by sum of probabilities in the input model
    ln_Pr = ln_Pr/(sum ? sum : 1.0);
    // clarity should be computed with log_2, so divide by log(2).
    return (ln_Pr/log(2.0));
}
Exemple #11
0
QueryGroupView::QueryGroupView (QueryGroup *group, Widget parent)
: f_query_group (group),
  f_term_view_list (NULL)
{
  f_restraint = WRestraint (WComposite (parent), "group_view", WAutoManage);
  f_form = WXmForm (f_restraint, "group_form");

  QueryTermView *view = NULL;
  QueryTerm *term;

  for (term= group->term_list(); term != NULL; term = term->next())
    view = new QueryTermView (term, this, view, NULL);

  f_form.Realize();  // This statement is extremely critical.  02/03/93 DJB 
  f_form.Manage();

  // Need special event handler for top level group to catch
  // resizes of window and resize the query view.
  // NOTE: It's just about time to move this to another method. 
  if (XtClass (XtParent (f_restraint)) == xmDrawingAreaWidgetClass)
    {
      // Let's see what the parent's size is now:
      ON_DEBUG(printf ("Drawing area is width: %d, height: %d\n",
		       WCore(f_restraint.Parent()).Width(),
		       WCore(f_restraint.Parent()).Height()));
      // We need to grow the drawing area if it is smaller than the
      // restraint widget, since we don't deal with horizontal scrolling.
      Dimension restraint_width = f_restraint.Width();
      Dimension da_width = WCore(f_restraint.Parent()).Width();
      
      if (da_width < restraint_width)
	{
	  // Can't resize the drawing area because the ^$&(*% Motif
	  // pane widget will not allow the horizontal resize.  So,
	  // instead calculate the size increase and do it on the shell. 
	  Dimension increase = restraint_width - da_width;
	  // Find the shell widget. 
	  Widget w = f_restraint.Parent();
	  while (!XtIsShell (w))
	    w = XtParent(w);
	  WTopLevelShell shell (w);

	  ON_DEBUG(printf ("** Resizing shell by = %d\n", increase));

	  // Change state if needed. 
	  Boolean allow_resize = shell.AllowShellResize();
	  if (!allow_resize)
	    shell.AllowShellResize (True);
	  // Change the width. 
	  shell.MinWidth (shell.Width() + increase);
	  shell.Width (shell.Width() + increase);
	  // Restore state if needed. 
	  if (!allow_resize)
	    shell.AllowShellResize (False);
	}
      else if (da_width > restraint_width)
	{
	  f_restraint.Width (WCore(f_restraint.Parent()).Width());
	}
      
      XtAddEventHandler (XtParent (f_restraint), StructureNotifyMask, False,
			 (XtEventHandler) &QueryGroupView::resize,
			 (Widget) f_restraint);
      // Store the min width in UserData...
      f_restraint.UserData ((XtPointer)(size_t) f_restraint.Width());
    }
  // Make sure the restraint widget isn't too narrow.
  else
    {
      f_restraint.Width (WCore(f_restraint.Parent()).Width());
    }
}