// returns a new instance of FieldInfoList which represents field entities in a document index for a specific field, you must delete the instance later. @see FieldInfoList // Note that not all index types support fields - those that do should override this method. lemur::api::FieldInfoList *lemur::index::LemurIndriIndex::fieldInfoList(lemur::api::DOCID_T docID, int fieldID) const { // get the index for this document indri::collection::Repository::index_state indexes = _repository->indexes(); indri::index::Index* index = _indexWithDocument( indexes, docID ); // ensure we do have an index (i.e. if the docID was invalid...) if (!index) return NULL; // and the indri term-list const indri::index::TermList *tList=index->termList((int)docID); // ensure we have a term list! if (!tList) return NULL; // create a blank field info list object lemur::api::IndriFieldInfoList *retVal=new lemur::api::IndriFieldInfoList(); // loop through our fields and insert those that match the field ID const indri::utility::greedy_vector< indri::index::FieldExtent > fieldVec=tList->fields(); int numFields=fieldVec.size(); for (int i=0; i < numFields; i++) { indri::index::FieldExtent thisField=fieldVec[i]; if (thisField.id==fieldID) { retVal->add(thisField); } } delete tList; // and return return retVal; }
lemur::api::COUNT_T lemur::index::LemurIndriIndex::docLength( lemur::api::DOCID_T documentID ) const { indri::collection::Repository::index_state indexes = _repository->indexes(); indri::index::Index* index = _indexWithDocument( indexes, documentID ); if( index ) { indri::thread::ScopedLock lock( index->statisticsLock() ); return index->documentLength( documentID ); } return 0; }
int indri::server::LocalQueryServer::documentLength( lemur::api::DOCID_T documentID ) { indri::collection::Repository::index_state indexes = _repository.indexes(); indri::index::Index* index = _indexWithDocument( indexes, documentID ); if( index ) { indri::thread::ScopedLock lock( index->statisticsLock() ); return index->documentLength( documentID ); } return 0; }
lemur::api::TermInfoList* lemur::index::LemurIndriIndex::termInfoListSeq(lemur::api::DOCID_T docID) const { indri::collection::Repository::index_state indexes = _repository->indexes(); indri::index::Index* index = _indexWithDocument( indexes, docID ); lemur::api::TermInfoList *list = NULL; if( index ) { indri::thread::ScopedLock lock( index->statisticsLock() ); const indri::index::TermList* termList = index->termList( docID ); list = new indri::index::PositionList(termList); } return list; }
indri::server::QueryServerMetadataResponse* indri::server::LocalQueryServer::pathNames( const std::vector<lemur::api::DOCID_T>& documentIDs, const std::vector<int>& pathBegins, const std::vector<int>& pathEnds ) { int lastDoc = 0; indri::index::DocumentStructure docStruct; std::vector<std::string> result; std::vector<std::pair<lemur::api::DOCID_T, int> > docSorted; for( size_t i=0; i<documentIDs.size(); i++ ) { docSorted.push_back( std::make_pair( documentIDs[i], i ) ); } std::sort( docSorted.begin(), docSorted.end() ); for( size_t i=0; i<docSorted.size(); i++ ) { indri::collection::Repository::index_state indexes = _repository.indexes(); bool docStructLoaded = true; lemur::api::DOCID_T documentID = docSorted[i].first; if ( documentID != lastDoc ) { indri::index::Index * index = _indexWithDocument(indexes, documentID); const indri::index::TermList * termList = index->termList( documentID ); if ( termList != 0 ) { docStruct.setIndex( *index ); docStruct.loadStructure( termList->fields() ); delete termList; lastDoc = docStructLoaded; } else { docStructLoaded = false; } } std::string path = ""; if ( docStructLoaded ) { path = docStruct.path( docStruct.findLeaf( pathBegins[docSorted[i].second], pathEnds[docSorted[i].second] ) ); } result.push_back( path ); } std::vector<std::string> actual; actual.resize( documentIDs.size() ); for( size_t i=0; i<docSorted.size(); i++ ) { actual[docSorted[i].second] = result[i]; } return new indri::server::LocalQueryServerMetadataResponse( actual ); }
indri::server::QueryServerVectorsResponse* indri::server::LocalQueryServer::documentVectors( const std::vector<lemur::api::DOCID_T>& documentIDs ) { indri::server::LocalQueryServerVectorsResponse* response = new indri::server::LocalQueryServerVectorsResponse( (int)documentIDs.size() ); indri::collection::Repository::index_state indexes = _repository.indexes(); std::map<int, std::string> termIDStringMap; for( size_t i=0; i<documentIDs.size(); i++ ) { indri::index::Index* index = _indexWithDocument( indexes, documentIDs[i] ); { indri::thread::ScopedLock lock( index->statisticsLock() ); const indri::index::TermList* termList = index->termList( documentIDs[i] ); indri::api::DocumentVector* result = new indri::api::DocumentVector( index, termList, termIDStringMap ); delete termList; response->addVector( result ); } } return response; }
// returns a new instance of FieldInfoList which represents all field entities in a document index, you must delete the instance later. @see FieldInfoList // Note that not all index types support fields - those that do should override this method. lemur::api::FieldInfoList *lemur::index::LemurIndriIndex::fieldInfoList(lemur::api::DOCID_T docID) const { // get the index for this document indri::collection::Repository::index_state indexes = _repository->indexes(); indri::index::Index* index = _indexWithDocument( indexes, docID ); // ensure we do have an index (i.e. if the docID was invalid...) if (!index) return NULL; // and the indri term-list const indri::index::TermList *tList=index->termList((int)docID); // ensure we have a term list! if (!tList) return NULL; // create our field info list object lemur::api::IndriFieldInfoList *retVal=new lemur::api::IndriFieldInfoList(tList->fields()); delete tList; // and return return retVal; }
lemur::api::TermInfoList* lemur::index::LemurIndriIndex::termInfoListSeq(lemur::api::DOCID_T docID) const { indri::collection::Repository::index_state indexes = _repository->indexes(); indri::index::Index* index = _indexWithDocument( indexes, docID ); lemur::api::TermInfoList *list = NULL; if( index ) { indri::thread::ScopedLock lock( index->statisticsLock() ); const indri::index::TermList* termList = index->termList( docID ); indri::utility::greedy_vector<lemur::api::TERMID_T> termIDs = termList->terms(); std::vector<lemur::index::LocatedTerm> locs; // indri indexes stopwords as [OOV], so skip those for( int i = 0; i < termIDs.size(); i++) { if (termIDs[i] != 0) { lemur::index::LocatedTerm lt; lt.term = termIDs[i]; lt.loc = i; locs.push_back(lt); } } list = new lemur::index::InvFPTermList(docID, locs.size(), locs); delete(termList); } return list; }