void matIR::QueryStats::init(const std::string& query, indri::api::QueryEnvironment& environment) { // Extract only the terms from the query and add to the vector indri::api::QueryParserWrapper *parser = indri::api::QueryParserFactory::get(query, "indri"); indri::lang::ScoredExtentNode* rootNode = parser->query(); indri::lang::RawScorerNodeExtractor extractor; rootNode->walk(extractor); std::vector<indri::lang::RawScorerNode*>& scorerNodes = extractor.getScorerNodes(); for (int i = 0; i < scorerNodes.size(); i++){ std::string qterm = environment.stemTerm(scorerNodes[i]->queryText()); queryString.push_back(qterm); if(environment.stemCount(qterm) == 0) continue; if( _queryTokens.find(qterm) == _queryTokens.end() ) _queryTokens.insert(make_pair( qterm, 1)); else _queryTokens[qterm] += 1; } // Initialize vectors _query_collectionFrequency.set_size(_queryTokens.size()); _query_documentFrequency.set_size(_queryTokens.size()); // Now obtain the statistics int i = 0; map<std::string, int>::const_iterator iter; for (iter=_queryTokens.begin(); iter != _queryTokens.end(); ++iter) { std::string stem = environment.stemTerm(iter->first); _query_collectionFrequency(i) = (double) environment.stemCount(stem); _query_documentFrequency(i) = (double) environment.documentStemCount(stem); ++i; } }
void updateQueryDetails(indri::api::QueryEnvironment& environment, Results& resultData, string query){ indri::api::QueryParserWrapper *parser = indri::api::QueryParserFactory::get(query, "indri"); indri::lang::ScoredExtentNode* rootNode = parser->query(); indri::lang::RawScorerNodeExtractor extractor; rootNode->walk(extractor); vector<indri::lang::RawScorerNode*>& scorerNodes = extractor.getScorerNodes(); for (int i = 0; i < scorerNodes.size(); i++){ string qterm = environment.stemTerm(scorerNodes[i]->queryText()); if(environment.stemCount(qterm) == 0) continue; if( resultData.queryStems.find(qterm) == resultData.queryStems.end() ){ resultData.queryStems.insert(make_pair( qterm, 1)); resultData.queryStemOrder.push_back(qterm); } else resultData.queryStems[qterm] += 1; } }
SEXP stemTerm(string _term) { return Rcpp::wrap(environment.stemTerm(_term)); }