Esempio n. 1
0
  // Runs the query, expanding it if necessary.  Will print output as well if verbose is on.
  void _runQuery( std::stringstream& output, const std::string& query,
                  const std::string &queryType, const std::vector<std::string> &workingSet, std::vector<std::string> relFBDocs ) {
    try {
      if( _printQuery ) output << "# query: " << query << std::endl;
      std::vector<lemur::api::DOCID_T> docids;;
      if (workingSet.size() > 0) 
        docids = _environment.documentIDsFromMetadata("docno", workingSet);

      if (relFBDocs.size() == 0) {
          if( _printSnippets ) {
            if (workingSet.size() > 0) 
              _annotation = _environment.runAnnotatedQuery( query, docids, _initialRequested, queryType ); 
            else
              _annotation = _environment.runAnnotatedQuery( query, _initialRequested );
            _results = _annotation->getResults();
          } else {
            if (workingSet.size() > 0)
              _results = _environment.runQuery( query, docids, _initialRequested, queryType );
            else
              _results = _environment.runQuery( query, _initialRequested, queryType );
          }
      }
      
      if( _expander ) {
        std::vector<indri::api::ScoredExtentResult> fbDocs;
        if (relFBDocs.size() > 0) {
          docids = _environment.documentIDsFromMetadata("docno", relFBDocs);
          for (size_t i = 0; i < docids.size(); i++) {
            indri::api::ScoredExtentResult r(0.0, docids[i]);
            fbDocs.push_back(r);
          }
        }
        std::string expandedQuery;
        if (relFBDocs.size() != 0)
          expandedQuery = _expander->expand( query, fbDocs );
        else
          expandedQuery = _expander->expand( query, _results );
        if( _printQuery ) output << "# expanded: " << expandedQuery << std::endl;
        if (workingSet.size() > 0) {
          docids = _environment.documentIDsFromMetadata("docno", workingSet);
          _results = _environment.runQuery( expandedQuery, docids, _requested, queryType );
        } else {
          _results = _environment.runQuery( expandedQuery, _requested, queryType );
        }
      }
    }
    catch( lemur::api::Exception& e )
    {
      _results.clear();
      LEMUR_RETHROW(e, "QueryThread::_runQuery Exception");
    }
  }
Esempio n. 2
0
    SEXP generateResultsFromSet(string _qno, string _query, vector<string> docSet){
        resultsData = resultsData_nullCopy;
        documentIDs.clear();
        scores.clear();
        extDocIDs.clear();
        terms.clear();
        _gramTable.clear();
        results.clear();
        qno = _qno;
        query = _query;

        documentLimit = docSet.size();
        documentIDs = environment.documentIDsFromMetadata("docno", docSet);
        qa = environment.runAnnotatedQuery(query, documentIDs, documentLimit);


        results = qa->getResults();
        _logtoposterior(results);

        // Extract Documents
        for (size_t i = 0; i < results.size(); i++){
            scores.push_back(results[i].score);
        }
        extDocIDs = environment.documentMetadata(documentIDs, "docno");

        updateQueryDetails(environment, resultsData, query);
        countGrams();
        buildStats();

        return Rcpp::wrap(true);
    }
Esempio n. 3
0
multimap<double, pair<string, string> > indri::query::ConceptSelectorFuns::normConceptScorePrf(
                                                            vector<pair<string, string> > concatenatedGoodConcepts,
                                                            string qId,
                                                            vector<string> topDocsNames,
                                                            indri::api::QueryEnvironment & env,
                                                            indri::query::QueryReformulator * queryReformulator,
                                                            vector<string> resourceNames_)
{
    std::vector<lemur::api::DOCID_T> topDocIds = env.documentIDsFromMetadata("docno", topDocsNames);
    multimap<double, pair<string, string>, std::greater<double> > scoredConcepts_;
    for(auto concStyStrPair: concatenatedGoodConcepts) // for each each extracted concept
    {
        string conceptSty = concStyStrPair.first;
        string conceptStr = concStyStrPair.second;
        double conceptScore = indri::query::ConceptSelectorFuns::findConceptScorePrf(conceptSty,
                                                                                        conceptStr,
                                                                                        qId,
                                                                                        topDocIds,
                                                                                        env,
                                                                                        queryReformulator,
                                                                                        resourceNames_);

        scoredConcepts_.insert(make_pair(conceptScore, make_pair(conceptSty, conceptStr)));
        cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: conceptScore = " << conceptStr << " -> " << conceptScore << endl;
    }

    double max_sc = 0;
    double min_sc = std::numeric_limits<double>::infinity();
    for (auto sc: scoredConcepts_)
    {
        max_sc = max(max_sc, sc.first);
        min_sc = min(min_sc, sc.first);
    }
    cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: min_sc, max_sc: " << min_sc << ", " << max_sc << endl;

    // min-max normalize socores in scoredConcepts_
    multimap<double, pair<string, string> > scoredConcepts_norm;
    for (auto itSc = scoredConcepts_.begin(); itSc != scoredConcepts_.end(); itSc++)
    {
        double conceptScore = (itSc->first- min_sc)/(max_sc- min_sc);
        scoredConcepts_norm.insert(make_pair(conceptScore, make_pair((itSc->second).first, (itSc->second).second)));
        cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: scoredConcepts_norm: scoredConcepts_ = " << itSc->first  << endl;
        cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: scoredConcepts_norm: conceptScore = " << conceptScore << " =  (" << itSc->first << " - " << min_sc << " )/( " << max_sc << " - " << min_sc << " )" << endl;
    }

    return scoredConcepts_norm;
}
Esempio n. 4
0
void convert_docnoscore_to_binary( indri::file::File& outfile, const std::string& infile, indri::api::QueryEnvironment& env ) {
  std::ifstream in;
  std::string docnoName = "docno";
  
  indri::file::SequentialWriteBuffer* outb = new indri::file::SequentialWriteBuffer( outfile, 1024*1024 );
  in.open( infile.c_str(), std::ifstream::in );
  
  while( !in.eof() ) {
    std::string docno;
    double score;
    
    in >> docno
       >> score;

    if( in.eof() )
      break;
       
    std::cout << "looking up: " << docno << " " << score << std::endl;
       
    std::vector<std::string> docnoValues;
    docnoValues.push_back( docno );
       
    std::vector<lemur::api::DOCID_T> result = env.documentIDsFromMetadata( docnoName, docnoValues );
    
    if( result.size() == 0 ) {
      //      LEMUR_THROW( LEMUR_IO_ERROR, "No document exists with docno: " + docno );
      continue; // allow entries that don't exist and ignore silently.
    }
    
    int document = result[0];
    std::cout << document << std::endl;
      
    outb->write( (const void*) &document, sizeof(UINT32) );
    outb->write( (const void*) &score, sizeof(double) );
  }
  
  outb->flush();
  delete outb;
  in.close();
}