Beispiel #1
0
static void printClarity( const std::string& query, 
                          indri::api::QueryEnvironment & env, 
                          const std::vector<indri::query::RelevanceModel::Gram*>& grams, int numTerms ) {

  int count = 0;
  double sum=0, ln_Pr=0;
  for( size_t j=0; j< numTerms && j < grams.size(); j++ ) {
    std::string t = grams[j]->terms[0];
    count++;
    // query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))}
    // P(w)=cf(w)/|C|
    // the relevance model uses stemmed terms, so use stemCount
    double pw = ((double)env.stemCount(t)/(double)env.termCount());
    // P(w|Q) is a prob computed by any model, e.g. relevance models
    double pwq = grams[j]->weight;
    sum += pwq;    
    ln_Pr += (pwq)*log(pwq/pw);
  }
  std::cout << "# query: " << query <<  " = " << count << " " 
            << (ln_Pr/(sum ? sum : 1.0)/log(2.0)) << std::endl;
  for( size_t j=0; j< numTerms && j < grams.size(); j++ ) {
    std::string t = grams[j]->terms[0];
    double pw = ((double)env.stemCount(t)/(double)env.termCount());
    std::cout << t << " "
              << (grams[j]->weight*log(grams[j]->weight/
    // the relevance model uses stemmed terms, so use stemCount
                            ((double)env.stemCount(t)/
                             (double)env.termCount())))/log(2.0) << std::endl;
  }
}
Beispiel #2
0
 SEXP collFreq(string _term) {
     long res = environment.termCount(_term);
     return Rcpp::wrap(res);
 }
Beispiel #3
0
 SEXP getTermCount() {
     long res = environment.termCount();
     return Rcpp::wrap(res);
 }