Exemplo n.º 1
0
void matIR::QueryStats::init(const std::string& query, indri::api::QueryEnvironment& environment)
{

    // Extract only the terms from the query and add to the vector
    indri::api::QueryParserWrapper *parser = indri::api::QueryParserFactory::get(query, "indri");
    indri::lang::ScoredExtentNode* rootNode = parser->query();
    indri::lang::RawScorerNodeExtractor extractor;
    rootNode->walk(extractor);
    std::vector<indri::lang::RawScorerNode*>& scorerNodes = extractor.getScorerNodes();

    for (int i = 0; i < scorerNodes.size(); i++){
        std::string qterm = environment.stemTerm(scorerNodes[i]->queryText());
        queryString.push_back(qterm);
        if(environment.stemCount(qterm) == 0)
            continue;
        if( _queryTokens.find(qterm) == _queryTokens.end() )
            _queryTokens.insert(make_pair( qterm, 1));
        else
            _queryTokens[qterm] += 1;
    }

    // Initialize vectors


    _query_collectionFrequency.set_size(_queryTokens.size());
    _query_documentFrequency.set_size(_queryTokens.size());



    // Now obtain the statistics
    int i = 0;
    map<std::string, int>::const_iterator iter;
    for (iter=_queryTokens.begin(); iter != _queryTokens.end(); ++iter) {
        std::string stem = environment.stemTerm(iter->first);
        _query_collectionFrequency(i) = (double) environment.stemCount(stem);
        _query_documentFrequency(i) = (double) environment.documentStemCount(stem);
        ++i;

    }
}
Exemplo n.º 2
0
    void updateQueryDetails(indri::api::QueryEnvironment& environment,
                            Results& resultData,
                            string query){

        indri::api::QueryParserWrapper *parser = indri::api::QueryParserFactory::get(query, "indri");
        indri::lang::ScoredExtentNode* rootNode = parser->query();
        indri::lang::RawScorerNodeExtractor extractor;
        rootNode->walk(extractor);
        vector<indri::lang::RawScorerNode*>& scorerNodes = extractor.getScorerNodes();

        for (int i = 0; i < scorerNodes.size(); i++){
            string qterm = environment.stemTerm(scorerNodes[i]->queryText());
            if(environment.stemCount(qterm) == 0)
                continue;
            if( resultData.queryStems.find(qterm) == resultData.queryStems.end() ){
                resultData.queryStems.insert(make_pair( qterm, 1));
                resultData.queryStemOrder.push_back(qterm);
            }
            else
                resultData.queryStems[qterm] += 1;
        }
    }
Exemplo n.º 3
0
 SEXP stemTerm(string _term) {
     return Rcpp::wrap(environment.stemTerm(_term));
 }