void lemur::retrieval::QueryModel::clarity(ostream &os) { int count = 0; double sum=0, ln_Pr=0; startIteration(); QueryTerm *qt; while (hasMore()) { qt = nextTerm(); count++; // query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))} // P(w)=cf(w)/|C| double pw = ((double)ind.termCount(qt->id())/(double)ind.termCount()); // P(w|Q) is a prob computed by any model, e.g. relevance models double pwq = qt->weight(); sum += pwq; ln_Pr += (pwq)*log(pwq/pw); delete qt; } // clarity should be computed with log_2, so divide by log(2). os << "=" << count << " " << (ln_Pr/(sum ? sum : 1.0)/log(2.0)) << endl; startIteration(); while (hasMore()) { qt = nextTerm(); // print clarity for each query term // clarity should be computed with log_2, so divide by log(2). os << ind.term(qt->id()) << " " << (qt->weight()*log(qt->weight()/ ((double)ind.termCount(qt->id())/ (double)ind.termCount())))/log(2.0) << endl; delete qt; } }
void interpolateWith(const lemur::langmod::UnigramLM &qModel, double origModCoeff, int howManyWord, double prSumThresh, double prThresh) { if (!qm) { qm = new lemur::api::IndexedRealVector(); } else { qm->clear(); } qModel.startIteration(); while (qModel.hasMore()) { IndexedReal entry; qModel.nextWordProb((TERMID_T &)entry.ind,entry.val); qm->push_back(entry); } qm->Sort(); double countSum = totalCount(); startIteration(); while (hasMore()) { QueryTerm *qt = nextTerm(); setCount(qt->id(), qt->weight()*origModCoeff/countSum); delete qt; } cout << "-------- FB terms --------" << endl; double prSum = 0; int wdCount = 0; IndexedRealVector::iterator it; it = qm->begin(); while (it != qm->end() && prSum < prSumThresh && wdCount < howManyWord && (*it).val >=prThresh) { incCount((*it).ind, (*it).val*(1-origModCoeff)); cout << ind.term(it->ind) << endl; prSum += (*it).val; it++; wdCount++; } cout << "--------------------------" << endl; colQLikelihood = 0; colQueryLikelihood(); colKLComputed = false; }
void lemur::retrieval::QueryModel::interpolateWith(const lemur::langmod::UnigramLM &qModel, double origModCoeff, int howManyWord, double prSumThresh, double prThresh) { if (!qm) { qm = new lemur::api::IndexedRealVector(); } else { qm->clear(); } qModel.startIteration(); while (qModel.hasMore()) { IndexedReal entry; qModel.nextWordProb((TERMID_T &)entry.ind,entry.val); qm->push_back(entry); } qm->Sort(); double countSum = totalCount(); // discounting the original model startIteration(); while (hasMore()) { QueryTerm *qt = nextTerm(); incCount(qt->id(), qt->weight()*origModCoeff/countSum); delete qt; } // now adding the new model double prSum = 0; int wdCount = 0; IndexedRealVector::iterator it; it = qm->begin(); while (it != qm->end() && prSum < prSumThresh && wdCount < howManyWord && (*it).val >=prThresh) { incCount((*it).ind, (*it).val*(1-origModCoeff)); prSum += (*it).val; it++; wdCount++; } //Sum w in Q qtf * log(qtcf/termcount); colQLikelihood = 0; colQueryLikelihood(); colKLComputed = false; }
void lemur::retrieval::QueryModel::save(ostream &os) { int count = 0; startIteration(); QueryTerm *qt; while (hasMore()) { qt = nextTerm(); count++; delete qt; } os << " " << count << endl; startIteration(); while (hasMore()) { qt = nextTerm(); os << ind.term(qt->id()) << " "<< qt->weight() << endl; delete qt; } }
double lemur::retrieval::QueryModel::clarity() const { int count = 0; double sum=0, ln_Pr=0; startIteration(); QueryTerm *qt; while (hasMore()) { qt = nextTerm(); count++; // query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))} // P(w)=cf(w)/|C| double pw = ((double)ind.termCount(qt->id())/(double)ind.termCount()); // P(w|Q) is a prob computed by any model, e.g. relevance models double pwq = qt->weight(); sum += pwq; ln_Pr += (pwq)*log(pwq/pw); delete qt; } // normalize by sum of probabilities in the input model ln_Pr = ln_Pr/(sum ? sum : 1.0); // clarity should be computed with log_2, so divide by log(2). return (ln_Pr/log(2.0)); }