// Runs the query, expanding it if necessary. Will print output as well if verbose is on. void _runQuery( std::stringstream& output, const std::string& query, const std::string &queryType, const std::vector<std::string> &workingSet, std::vector<std::string> relFBDocs ) { try { if( _printQuery ) output << "# query: " << query << std::endl; std::vector<lemur::api::DOCID_T> docids;; if (workingSet.size() > 0) docids = _environment.documentIDsFromMetadata("docno", workingSet); if (relFBDocs.size() == 0) { if( _printSnippets ) { if (workingSet.size() > 0) _annotation = _environment.runAnnotatedQuery( query, docids, _initialRequested, queryType ); else _annotation = _environment.runAnnotatedQuery( query, _initialRequested ); _results = _annotation->getResults(); } else { if (workingSet.size() > 0) _results = _environment.runQuery( query, docids, _initialRequested, queryType ); else _results = _environment.runQuery( query, _initialRequested, queryType ); } } if( _expander ) { std::vector<indri::api::ScoredExtentResult> fbDocs; if (relFBDocs.size() > 0) { docids = _environment.documentIDsFromMetadata("docno", relFBDocs); for (size_t i = 0; i < docids.size(); i++) { indri::api::ScoredExtentResult r(0.0, docids[i]); fbDocs.push_back(r); } } std::string expandedQuery; if (relFBDocs.size() != 0) expandedQuery = _expander->expand( query, fbDocs ); else expandedQuery = _expander->expand( query, _results ); if( _printQuery ) output << "# expanded: " << expandedQuery << std::endl; if (workingSet.size() > 0) { docids = _environment.documentIDsFromMetadata("docno", workingSet); _results = _environment.runQuery( expandedQuery, docids, _requested, queryType ); } else { _results = _environment.runQuery( expandedQuery, _requested, queryType ); } } } catch( lemur::api::Exception& e ) { _results.clear(); LEMUR_RETHROW(e, "QueryThread::_runQuery Exception"); } }
SEXP generateResultsFromSet(string _qno, string _query, vector<string> docSet){ resultsData = resultsData_nullCopy; documentIDs.clear(); scores.clear(); extDocIDs.clear(); terms.clear(); _gramTable.clear(); results.clear(); qno = _qno; query = _query; documentLimit = docSet.size(); documentIDs = environment.documentIDsFromMetadata("docno", docSet); qa = environment.runAnnotatedQuery(query, documentIDs, documentLimit); results = qa->getResults(); _logtoposterior(results); // Extract Documents for (size_t i = 0; i < results.size(); i++){ scores.push_back(results[i].score); } extDocIDs = environment.documentMetadata(documentIDs, "docno"); updateQueryDetails(environment, resultsData, query); countGrams(); buildStats(); return Rcpp::wrap(true); }
multimap<double, pair<string, string> > indri::query::ConceptSelectorFuns::normConceptScorePrf( vector<pair<string, string> > concatenatedGoodConcepts, string qId, vector<string> topDocsNames, indri::api::QueryEnvironment & env, indri::query::QueryReformulator * queryReformulator, vector<string> resourceNames_) { std::vector<lemur::api::DOCID_T> topDocIds = env.documentIDsFromMetadata("docno", topDocsNames); multimap<double, pair<string, string>, std::greater<double> > scoredConcepts_; for(auto concStyStrPair: concatenatedGoodConcepts) // for each each extracted concept { string conceptSty = concStyStrPair.first; string conceptStr = concStyStrPair.second; double conceptScore = indri::query::ConceptSelectorFuns::findConceptScorePrf(conceptSty, conceptStr, qId, topDocIds, env, queryReformulator, resourceNames_); scoredConcepts_.insert(make_pair(conceptScore, make_pair(conceptSty, conceptStr))); cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: conceptScore = " << conceptStr << " -> " << conceptScore << endl; } double max_sc = 0; double min_sc = std::numeric_limits<double>::infinity(); for (auto sc: scoredConcepts_) { max_sc = max(max_sc, sc.first); min_sc = min(min_sc, sc.first); } cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: min_sc, max_sc: " << min_sc << ", " << max_sc << endl; // min-max normalize socores in scoredConcepts_ multimap<double, pair<string, string> > scoredConcepts_norm; for (auto itSc = scoredConcepts_.begin(); itSc != scoredConcepts_.end(); itSc++) { double conceptScore = (itSc->first- min_sc)/(max_sc- min_sc); scoredConcepts_norm.insert(make_pair(conceptScore, make_pair((itSc->second).first, (itSc->second).second))); cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: scoredConcepts_norm: scoredConcepts_ = " << itSc->first << endl; cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: scoredConcepts_norm: conceptScore = " << conceptScore << " = (" << itSc->first << " - " << min_sc << " )/( " << max_sc << " - " << min_sc << " )" << endl; } return scoredConcepts_norm; }
void convert_docnoscore_to_binary( indri::file::File& outfile, const std::string& infile, indri::api::QueryEnvironment& env ) { std::ifstream in; std::string docnoName = "docno"; indri::file::SequentialWriteBuffer* outb = new indri::file::SequentialWriteBuffer( outfile, 1024*1024 ); in.open( infile.c_str(), std::ifstream::in ); while( !in.eof() ) { std::string docno; double score; in >> docno >> score; if( in.eof() ) break; std::cout << "looking up: " << docno << " " << score << std::endl; std::vector<std::string> docnoValues; docnoValues.push_back( docno ); std::vector<lemur::api::DOCID_T> result = env.documentIDsFromMetadata( docnoName, docnoValues ); if( result.size() == 0 ) { // LEMUR_THROW( LEMUR_IO_ERROR, "No document exists with docno: " + docno ); continue; // allow entries that don't exist and ignore silently. } int document = result[0]; std::cout << document << std::endl; outb->write( (const void*) &document, sizeof(UINT32) ); outb->write( (const void*) &score, sizeof(double) ); } outb->flush(); delete outb; in.close(); }