コード例 #1
0
ファイル: RetMethod.cpp プロジェクト: Hosssein/RS
void lemur::retrieval::QueryModel::clarity(ostream &os)
{
    int count = 0;
    double sum=0, ln_Pr=0;
    startIteration();
    QueryTerm *qt;
    while (hasMore()) {
        qt = nextTerm();
        count++;
        // query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))}
        // P(w)=cf(w)/|C|
        double pw = ((double)ind.termCount(qt->id())/(double)ind.termCount());
        // P(w|Q) is a prob computed by any model, e.g. relevance models
        double pwq = qt->weight();
        sum += pwq;
        ln_Pr += (pwq)*log(pwq/pw);
        delete qt;
    }
    // clarity should be computed with log_2, so divide by log(2).
    os << "=" << count << " " << (ln_Pr/(sum ? sum : 1.0)/log(2.0)) << endl;
    startIteration();
    while (hasMore()) {
        qt = nextTerm();
        // print clarity for each query term
        // clarity should be computed with log_2, so divide by log(2).
        os << ind.term(qt->id()) << " "
           << (qt->weight()*log(qt->weight()/
                                ((double)ind.termCount(qt->id())/
                                 (double)ind.termCount())))/log(2.0) << endl;
        delete qt;
    }
}
コード例 #2
0
                void interpolateWith(const lemur::langmod::UnigramLM &qModel,
                            double origModCoeff,
                            int howManyWord,
                            double prSumThresh,
                            double prThresh) {
                    if (!qm) {
                        qm = new lemur::api::IndexedRealVector();
                    } else {
                        qm->clear();
                    }

                    qModel.startIteration();
                    while (qModel.hasMore()) {
                        IndexedReal entry;
                        qModel.nextWordProb((TERMID_T &)entry.ind,entry.val);
                        qm->push_back(entry);

                    }
                    qm->Sort();

                    double countSum = totalCount();


                    startIteration();
                    while (hasMore()) {
                        QueryTerm *qt = nextTerm();
                        setCount(qt->id(), qt->weight()*origModCoeff/countSum);
                        delete qt;
                    }

                    cout << "-------- FB terms --------" << endl;
                    double prSum = 0;
                    int wdCount = 0;
                    IndexedRealVector::iterator it;
                    it = qm->begin();
                    while (it != qm->end() && prSum < prSumThresh &&
                                wdCount < howManyWord && (*it).val >=prThresh) {
                        incCount((*it).ind, (*it).val*(1-origModCoeff));
                        cout << ind.term(it->ind) << endl;
                        prSum += (*it).val;
                        it++;
                        wdCount++;
                    }
                    cout << "--------------------------" << endl;


                    colQLikelihood = 0;
                    colQueryLikelihood();
                    colKLComputed = false;
                }
コード例 #3
0
ファイル: RetMethod.cpp プロジェクト: Hosssein/RS
void lemur::retrieval::QueryModel::interpolateWith(const lemur::langmod::UnigramLM &qModel,
                                                   double origModCoeff,
                                                   int howManyWord,
                                                   double prSumThresh,
                                                   double prThresh) {
    if (!qm) {
        qm = new lemur::api::IndexedRealVector();
    } else {
        qm->clear();
    }

    qModel.startIteration();
    while (qModel.hasMore()) {
        IndexedReal entry;
        qModel.nextWordProb((TERMID_T &)entry.ind,entry.val);
        qm->push_back(entry);

    }
    qm->Sort();

    double countSum = totalCount();

    // discounting the original model
    startIteration();
    while (hasMore()) {
        QueryTerm *qt = nextTerm();
        incCount(qt->id(), qt->weight()*origModCoeff/countSum);
        delete qt;
    }

    // now adding the new model
    double prSum = 0;
    int wdCount = 0;
    IndexedRealVector::iterator it;
    it = qm->begin();
    while (it != qm->end() && prSum < prSumThresh &&
           wdCount < howManyWord && (*it).val >=prThresh) {
        incCount((*it).ind, (*it).val*(1-origModCoeff));
        prSum += (*it).val;
        it++;
        wdCount++;
    }

    //Sum w in Q qtf * log(qtcf/termcount);
    colQLikelihood = 0;
    colQueryLikelihood();
    colKLComputed = false;
}
コード例 #4
0
ファイル: RetMethod.cpp プロジェクト: Hosssein/RS
void lemur::retrieval::QueryModel::save(ostream &os)
{
    int count = 0;
    startIteration();
    QueryTerm *qt;
    while (hasMore()) {
        qt = nextTerm();
        count++;
        delete qt;
    }
    os << " " << count << endl;
    startIteration();
    while (hasMore()) {
        qt = nextTerm();
        os << ind.term(qt->id()) << " "<< qt->weight() << endl;
        delete qt;
    }
}
コード例 #5
0
ファイル: RetMethod.cpp プロジェクト: Hosssein/RS
void lemur::retrieval::QueryModel::load(istream &is)
{
    // clear existing counts
    startIteration();
    QueryTerm *qt;
    while (hasMore()) {
        qt = nextTerm();
        setCount(qt->id(),0);
    }
    colQLikelihood = 0;

    int count;
    is >> count;
    char wd[500];
    double pr;
    while (count-- >0) {
        is >> wd >> pr;
        TERMID_T id = ind.term(wd);
        if (id != 0) setCount(id, pr); // don't load OOV terms
    }
    colQueryLikelihood();
    colKLComputed = false;
}
コード例 #6
0
ファイル: RetMethod.cpp プロジェクト: Hosssein/RS
double lemur::retrieval::QueryModel::clarity() const
{
    int count = 0;
    double sum=0, ln_Pr=0;
    startIteration();
    QueryTerm *qt;
    while (hasMore()) {
        qt = nextTerm();
        count++;
        // query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))}
        // P(w)=cf(w)/|C|
        double pw = ((double)ind.termCount(qt->id())/(double)ind.termCount());
        // P(w|Q) is a prob computed by any model, e.g. relevance models
        double pwq = qt->weight();
        sum += pwq;
        ln_Pr += (pwq)*log(pwq/pw);
        delete qt;
    }
    // normalize by sum of probabilities in the input model
    ln_Pr = ln_Pr/(sum ? sum : 1.0);
    // clarity should be computed with log_2, so divide by log(2).
    return (ln_Pr/log(2.0));
}