Пример #1
0
    SEXP generateResults(string _qno, string _query, int _documentLimit, bool stats) {

        resultsData = resultsData_nullCopy;
        documentIDs.clear();
        scores.clear();
        extDocIDs.clear();
        terms.clear();
        _gramTable.clear();
        results.clear();
        qno = _qno;
        query = _query;



        documentLimit = _documentLimit;
        qa = environment.runAnnotatedQuery(query, _documentLimit);


        results = qa->getResults();
        _logtoposterior(results);

        // Extract Documents
        for (size_t i = 0; i < results.size(); i++){
            documentIDs.push_back(results[i].document);
            scores.push_back(results[i].score);
        }
        extDocIDs = environment.documentMetadata(documentIDs, "docno");
        if(stats){
            updateQueryDetails(environment, resultsData, query);
            countGrams();
            buildStats();
        }

        return Rcpp::wrap(true);
    }
Пример #2
0
    SEXP runQuery(string _qno, string _query, int _documentLimit, string _runid="default"){
        indri::api::QueryAnnotation* qa;
        qa = environment.runAnnotatedQuery(_query, _documentLimit);

        std::vector<indri::api::ScoredExtentResult> results = qa->getResults();
        //_logtoposterior(results);

        // Extract Documents
        std::vector<lemur::api::DOCID_T> documentIDs;
        std::vector<double> scores;
        for (size_t i = 0; i < results.size(); i++){
            documentIDs.push_back(results[i].document);
            scores.push_back(results[i].score);
        }
        vector<string> res_qno;
        vector<string> res_q0;
        vector<string> res_runid;

        int documentLimit = _documentLimit;

        for(int i=0; i < documentLimit; i++){
            res_qno.push_back(qno);
            res_q0.push_back("Q0");
            res_runid.push_back(_runid);

        }
        std::vector<string> extDocIDs = environment.documentMetadata(documentIDs, "docno");
        return Rcpp::DataFrame::create( Named("topic")= _qno,
                Named("q0")= res_q0, Named("docID")=  wrap(extDocIDs),
                Named("rank")= seq( 1, documentLimit ),
                Named("score")= wrap(scores),
                Named("runID")= res_runid);
        }
Пример #3
0
  void _printResultRegion( std::stringstream& output, std::string queryIndex, int start, int end  ) {
    std::vector<std::string> documentNames;
    std::vector<indri::api::ParsedDocument*> documents;

    std::vector<indri::api::ScoredExtentResult> resultSubset;

    resultSubset.assign( _results.begin() + start, _results.begin() + end );


    // Fetch document data for printing
    if( _printDocuments || _printPassages || _printSnippets ) {
      // Need document text, so we'll fetch the whole document
      documents = _environment.documents( resultSubset );
      documentNames.clear();

      for( size_t i=0; i<resultSubset.size(); i++ ) {
        indri::api::ParsedDocument* doc = documents[i];
        std::string documentName;

        indri::utility::greedy_vector<indri::parse::MetadataPair>::iterator iter = std::find_if( documents[i]->metadata.begin(),
          documents[i]->metadata.end(),
          indri::parse::MetadataPair::key_equal( "docno" ) );

        if( iter != documents[i]->metadata.end() )
          documentName = (char*) iter->value;

        // store the document name in a separate vector so later code can find it
        documentNames.push_back( documentName );
      }
    } else {
      // We only want document names, so the documentMetadata call may be faster
      documentNames = _environment.documentMetadata( resultSubset, "docno" );
    }

    std::vector<std::string> pathNames;
    if ( _inexFormat ) {
      // retrieve path names
      pathNames = _environment.pathNames( resultSubset );
    }

    // Print results
    for( size_t i=0; i < resultSubset.size(); i++ ) {
      int rank = start+i+1;
      std::string queryNumber = queryIndex;

      if( _trecFormat ) {
        // TREC formatted output: queryNumber, Q0, documentName, rank, score, runID
        output << queryNumber << " "
                << "Q0 "
                << documentNames[i] << " "
                << rank << " "
                << resultSubset[ i ].score << " "
                << _runID << std::endl;
      } else if( _inexFormat ) {

  output << "    <result>" << std::endl
         << "      <file>" << documentNames[i] << "</file>" << std::endl
         << "      <path>" << pathNames[i] << "</path>" << std::endl
         << "      <rsv>" << resultSubset[i].score << "</rsv>"  << std::endl
         << "    </result>" << std::endl;
      }
      else {
        // score, documentName, firstWord, lastWord
        output << resultSubset[i].score << "\t"
                << documentNames[i] << "\t"
                << resultSubset[i].begin << "\t"
                << resultSubset[i].end << std::endl;
      }

      if( _printDocuments ) {
        output << documents[i]->text << std::endl;
      }

      if( _printPassages ) {
        int byteBegin = documents[i]->positions[ resultSubset[i].begin ].begin;
        int byteEnd = documents[i]->positions[ resultSubset[i].end-1 ].end;
        output.write( documents[i]->text + byteBegin, byteEnd - byteBegin );
        output << std::endl;
      }

      if( _printSnippets ) {
        indri::api::SnippetBuilder builder(false);
        output << builder.build( resultSubset[i].document, documents[i], _annotation ) << std::endl;
      }

      if( documents.size() )
        delete documents[i];
    }
  }
Пример #4
0
 SEXP getMetaData(string metaDataKey){
     vector<string> metaDataString = environment.documentMetadata(documentIDs, metaDataKey);
     CharacterVector c = wrap(metaDataString);
     c.attr("names") = extDocIDs;
     return c;
 }