SEXP generateResults(string _qno, string _query, int _documentLimit, bool stats) { resultsData = resultsData_nullCopy; documentIDs.clear(); scores.clear(); extDocIDs.clear(); terms.clear(); _gramTable.clear(); results.clear(); qno = _qno; query = _query; documentLimit = _documentLimit; qa = environment.runAnnotatedQuery(query, _documentLimit); results = qa->getResults(); _logtoposterior(results); // Extract Documents for (size_t i = 0; i < results.size(); i++){ documentIDs.push_back(results[i].document); scores.push_back(results[i].score); } extDocIDs = environment.documentMetadata(documentIDs, "docno"); if(stats){ updateQueryDetails(environment, resultsData, query); countGrams(); buildStats(); } return Rcpp::wrap(true); }
SEXP runQuery(string _qno, string _query, int _documentLimit, string _runid="default"){ indri::api::QueryAnnotation* qa; qa = environment.runAnnotatedQuery(_query, _documentLimit); std::vector<indri::api::ScoredExtentResult> results = qa->getResults(); //_logtoposterior(results); // Extract Documents std::vector<lemur::api::DOCID_T> documentIDs; std::vector<double> scores; for (size_t i = 0; i < results.size(); i++){ documentIDs.push_back(results[i].document); scores.push_back(results[i].score); } vector<string> res_qno; vector<string> res_q0; vector<string> res_runid; int documentLimit = _documentLimit; for(int i=0; i < documentLimit; i++){ res_qno.push_back(qno); res_q0.push_back("Q0"); res_runid.push_back(_runid); } std::vector<string> extDocIDs = environment.documentMetadata(documentIDs, "docno"); return Rcpp::DataFrame::create( Named("topic")= _qno, Named("q0")= res_q0, Named("docID")= wrap(extDocIDs), Named("rank")= seq( 1, documentLimit ), Named("score")= wrap(scores), Named("runID")= res_runid); }
void _printResultRegion( std::stringstream& output, std::string queryIndex, int start, int end ) { std::vector<std::string> documentNames; std::vector<indri::api::ParsedDocument*> documents; std::vector<indri::api::ScoredExtentResult> resultSubset; resultSubset.assign( _results.begin() + start, _results.begin() + end ); // Fetch document data for printing if( _printDocuments || _printPassages || _printSnippets ) { // Need document text, so we'll fetch the whole document documents = _environment.documents( resultSubset ); documentNames.clear(); for( size_t i=0; i<resultSubset.size(); i++ ) { indri::api::ParsedDocument* doc = documents[i]; std::string documentName; indri::utility::greedy_vector<indri::parse::MetadataPair>::iterator iter = std::find_if( documents[i]->metadata.begin(), documents[i]->metadata.end(), indri::parse::MetadataPair::key_equal( "docno" ) ); if( iter != documents[i]->metadata.end() ) documentName = (char*) iter->value; // store the document name in a separate vector so later code can find it documentNames.push_back( documentName ); } } else { // We only want document names, so the documentMetadata call may be faster documentNames = _environment.documentMetadata( resultSubset, "docno" ); } std::vector<std::string> pathNames; if ( _inexFormat ) { // retrieve path names pathNames = _environment.pathNames( resultSubset ); } // Print results for( size_t i=0; i < resultSubset.size(); i++ ) { int rank = start+i+1; std::string queryNumber = queryIndex; if( _trecFormat ) { // TREC formatted output: queryNumber, Q0, documentName, rank, score, runID output << queryNumber << " " << "Q0 " << documentNames[i] << " " << rank << " " << resultSubset[ i ].score << " " << _runID << std::endl; } else if( _inexFormat ) { output << " <result>" << std::endl << " <file>" << documentNames[i] << "</file>" << std::endl << " <path>" << pathNames[i] << "</path>" << std::endl << " <rsv>" << resultSubset[i].score << "</rsv>" << std::endl << " </result>" << std::endl; } else { // score, documentName, firstWord, lastWord output << resultSubset[i].score << "\t" << documentNames[i] << "\t" << resultSubset[i].begin << "\t" << resultSubset[i].end << std::endl; } if( _printDocuments ) { output << documents[i]->text << std::endl; } if( _printPassages ) { int byteBegin = documents[i]->positions[ resultSubset[i].begin ].begin; int byteEnd = documents[i]->positions[ resultSubset[i].end-1 ].end; output.write( documents[i]->text + byteBegin, byteEnd - byteBegin ); output << std::endl; } if( _printSnippets ) { indri::api::SnippetBuilder builder(false); output << builder.build( resultSubset[i].document, documents[i], _annotation ) << std::endl; } if( documents.size() ) delete documents[i]; } }
SEXP getMetaData(string metaDataKey){ vector<string> metaDataString = environment.documentMetadata(documentIDs, metaDataKey); CharacterVector c = wrap(metaDataString); c.attr("names") = extDocIDs; return c; }