コード例 #1
0
// returns a new instance of FieldInfoList which represents field entities in a document index for a specific field, you must delete the instance later. @see FieldInfoList 
// Note that not all index types support fields - those that do should override this method.
lemur::api::FieldInfoList *lemur::index::LemurIndriIndex::fieldInfoList(lemur::api::DOCID_T docID, int fieldID) const {
  // get the index for this document
  indri::collection::Repository::index_state indexes = _repository->indexes();
  indri::index::Index* index = _indexWithDocument( indexes, docID );

  // ensure we do have an index (i.e. if the docID was invalid...)
  if (!index) return NULL;
  
  // and the indri term-list
  const indri::index::TermList *tList=index->termList((int)docID);

  // ensure we have a term list!
  if (!tList) return NULL;

  // create a blank field info list object
  lemur::api::IndriFieldInfoList *retVal=new lemur::api::IndriFieldInfoList();

  // loop through our fields and insert those that match the field ID
  const indri::utility::greedy_vector< indri::index::FieldExtent > fieldVec=tList->fields();
  int numFields=fieldVec.size();
  for (int i=0; i < numFields; i++) {
    indri::index::FieldExtent thisField=fieldVec[i];
    if (thisField.id==fieldID) {
      retVal->add(thisField);
    }
  }
  delete tList;
  // and return
  return retVal;
}
コード例 #2
0
lemur::api::COUNT_T lemur::index::LemurIndriIndex::docLength( lemur::api::DOCID_T documentID ) const { 
  indri::collection::Repository::index_state indexes = _repository->indexes();
  indri::index::Index* index = _indexWithDocument( indexes, documentID );

  if( index ) {
    indri::thread::ScopedLock lock( index->statisticsLock() );
    return index->documentLength( documentID );
  }
  return 0;
}
コード例 #3
0
ファイル: LocalQueryServer.cpp プロジェクト: blaze3j/DocHunt
int indri::server::LocalQueryServer::documentLength( lemur::api::DOCID_T documentID ) {
  indri::collection::Repository::index_state indexes = _repository.indexes();
  indri::index::Index* index = _indexWithDocument( indexes, documentID );

  if( index ) {
    indri::thread::ScopedLock lock( index->statisticsLock() );
    return index->documentLength( documentID );
  }

  return 0;
}
コード例 #4
0
lemur::api::TermInfoList* lemur::index::LemurIndriIndex::termInfoListSeq(lemur::api::DOCID_T docID) const { 
  indri::collection::Repository::index_state indexes = _repository->indexes();
  indri::index::Index* index = _indexWithDocument( indexes, docID );
  lemur::api::TermInfoList *list = NULL;
  if( index ) {
    indri::thread::ScopedLock lock( index->statisticsLock() );
    const indri::index::TermList* termList = index->termList( docID );
    list = new indri::index::PositionList(termList);
  }
  return list;
}
コード例 #5
0
ファイル: LocalQueryServer.cpp プロジェクト: blaze3j/DocHunt
indri::server::QueryServerMetadataResponse* indri::server::LocalQueryServer::pathNames( const std::vector<lemur::api::DOCID_T>& documentIDs, const std::vector<int>& pathBegins, const std::vector<int>& pathEnds ) {

  int lastDoc = 0;
  indri::index::DocumentStructure docStruct;
  std::vector<std::string> result;

  std::vector<std::pair<lemur::api::DOCID_T, int> > docSorted;
  for( size_t i=0; i<documentIDs.size(); i++ ) {
    docSorted.push_back( std::make_pair( documentIDs[i], i ) );
  }
  std::sort( docSorted.begin(), docSorted.end() );

  for( size_t i=0; i<docSorted.size(); i++ ) {
    indri::collection::Repository::index_state indexes = _repository.indexes();
    bool docStructLoaded = true;
    lemur::api::DOCID_T documentID = docSorted[i].first;
    if ( documentID != lastDoc ) {
      indri::index::Index * index = _indexWithDocument(indexes, documentID);
      const indri::index::TermList * termList = index->termList( documentID );
      if ( termList != 0 ) {
        docStruct.setIndex( *index );
        docStruct.loadStructure( termList->fields() );
        delete termList;
        lastDoc = docStructLoaded;
      } else {
        docStructLoaded = false;
      }       
    }

    std::string path = "";
    if ( docStructLoaded ) {
      path = docStruct.path( docStruct.findLeaf( pathBegins[docSorted[i].second], 
                                                 pathEnds[docSorted[i].second] ) );
    }
    result.push_back( path );
  }

  std::vector<std::string> actual;
  actual.resize( documentIDs.size() );
  for( size_t i=0; i<docSorted.size(); i++ ) {
    actual[docSorted[i].second] = result[i];
  }

  return new indri::server::LocalQueryServerMetadataResponse( actual );
}
コード例 #6
0
ファイル: LocalQueryServer.cpp プロジェクト: blaze3j/DocHunt
indri::server::QueryServerVectorsResponse* indri::server::LocalQueryServer::documentVectors( const std::vector<lemur::api::DOCID_T>& documentIDs ) {
  indri::server::LocalQueryServerVectorsResponse* response = new indri::server::LocalQueryServerVectorsResponse( (int)documentIDs.size() );
  indri::collection::Repository::index_state indexes = _repository.indexes();
  std::map<int, std::string> termIDStringMap;

  for( size_t i=0; i<documentIDs.size(); i++ ) {
    indri::index::Index* index = _indexWithDocument( indexes, documentIDs[i] );

    {
      indri::thread::ScopedLock lock( index->statisticsLock() );
  
      const indri::index::TermList* termList = index->termList( documentIDs[i] );
      indri::api::DocumentVector* result = new indri::api::DocumentVector( index, termList, termIDStringMap );
      delete termList;
      response->addVector( result );
    }
  }

  return response;
}
コード例 #7
0
// returns a new instance of FieldInfoList which represents all field entities in a document index, you must delete the instance later. @see FieldInfoList 
// Note that not all index types support fields - those that do should override this method.
lemur::api::FieldInfoList *lemur::index::LemurIndriIndex::fieldInfoList(lemur::api::DOCID_T docID) const {
  // get the index for this document
  indri::collection::Repository::index_state indexes = _repository->indexes();
  indri::index::Index* index = _indexWithDocument( indexes, docID );

  // ensure we do have an index (i.e. if the docID was invalid...)
  if (!index) return NULL;
  
  // and the indri term-list
  const indri::index::TermList *tList=index->termList((int)docID);

  // ensure we have a term list!
  if (!tList) return NULL;

  // create our field info list object
  lemur::api::IndriFieldInfoList *retVal=new lemur::api::IndriFieldInfoList(tList->fields());
  delete tList;
  // and return
  return retVal;
}
コード例 #8
0
lemur::api::TermInfoList* lemur::index::LemurIndriIndex::termInfoListSeq(lemur::api::DOCID_T docID) const { 
  indri::collection::Repository::index_state indexes = _repository->indexes();
  indri::index::Index* index = _indexWithDocument( indexes, docID );
  lemur::api::TermInfoList *list = NULL;
  if( index ) {
    indri::thread::ScopedLock lock( index->statisticsLock() );
    const indri::index::TermList* termList = index->termList( docID );
    indri::utility::greedy_vector<lemur::api::TERMID_T> termIDs = termList->terms();
    std::vector<lemur::index::LocatedTerm> locs;
    
    // indri indexes stopwords as [OOV], so skip those
    for( int i = 0; i < termIDs.size(); i++) {
      if (termIDs[i] != 0) {
        lemur::index::LocatedTerm lt;
        lt.term = termIDs[i];
        lt.loc = i;
        locs.push_back(lt);
      }
    }
    list = new lemur::index::InvFPTermList(docID, locs.size(), locs);
    delete(termList);
  }
  return list;
}