Ejemplo n.º 1
0
void indri::infnet::ExtentInsideNode::prepare( lemur::api::DOCID_T documentID ) {
  // initialize the child / sibling pointer
  initpointer();
  _extents.clear();
  _lastExtent.begin = -1;
  _lastExtent.end = -1;

  if( !_inner || !_outer )
    return;

  const indri::utility::greedy_vector<indri::index::Extent>& inExtents = _inner->extents();
  const indri::utility::greedy_vector<indri::index::Extent>& outExtents = _outer->extents();

  indri::utility::greedy_vector<indri::index::Extent>::const_iterator innerIter = inExtents.begin();
  indri::utility::greedy_vector<indri::index::Extent>::const_iterator outerIter = outExtents.begin();

  while( innerIter != inExtents.end() && outerIter != outExtents.end() ) {
    if( outerIter->contains( *innerIter ) ) {
      _extents.push_back( *innerIter );
      innerIter++;
    } else if( outerIter->begin <= innerIter->begin ) {
      outerIter++;
    } else { 
      innerIter++;
    }
  }
}
Ejemplo n.º 2
0
void indri::infnet::ExtentAndNode::prepare( lemur::api::DOCID_T documentID ) {
  // initialize the child / sibling pointer
  initpointer();

  _extents.clear();

  if( _children.size() == 2 ) {
    _and( _extents, _children[0]->extents(), _children[1]->extents() );
  } else if( _children.size() > 2 ) {
    indri::utility::greedy_vector<indri::index::Extent> first;
    indri::utility::greedy_vector<indri::index::Extent> second;
    unsigned int i;

    // this part is a little complex because I'm trying
    // to avoid copying extent vectors too much
    _and( first, _children[0]->extents(), _children[1]->extents() );

    for( i=2; i<_children.size()-2; i+=2 ) {
      _and( second, first, _children[i]->extents() );
      _and( first, second, _children[i+1]->extents() );
    }

    if( i==_children.size()-1 ) {
      _and( _extents, first, _children[i]->extents() ); 
    } else {
      _extents = first;
    }
  }
}
Ejemplo n.º 3
0
void indri::infnet::FieldEqualsNode::prepare( lemur::api::DOCID_T documentID ) {
  // initialize the child / sibling pointer
  initpointer();
  _extents.clear();
  
  if( !_field )
    return;

  const indri::utility::greedy_vector<INT64>& numbers = _field->numbers();
  const indri::utility::greedy_vector<indri::index::Extent>& extents = _field->extents();

  for( size_t i=0; i<numbers.size(); i++ ) {
    if( numbers[i] == _constant ) {
      _extents.push_back( extents[i] );
    }
  }
}
Ejemplo n.º 4
0
void indri::infnet::DocListIteratorNode::prepare( lemur::api::DOCID_T documentID ) {
  // initialize the child / sibling pointer
  initpointer();

  _extents.clear();
  _lastExtent.begin = -1;
  _lastExtent.end = -1;

  if( !_list )
    return;

  indri::index::DocListIterator::DocumentData* info = _list->currentEntry();

  if( !info || info->document != documentID )
    return;
  
  indri::utility::greedy_vector<int>& positions = info->positions;

  for( size_t i = 0; i < positions.size(); i++ ) {
    _extents.push_back( indri::index::Extent( positions[i], positions[i]+1 ) );
  }
}
Ejemplo n.º 5
0
void indri::infnet::FieldWildcardNode::prepare( lemur::api::DOCID_T documentID ) {
  // initialize the child / sibling pointer
  initpointer();
  _extents.clear();

  if (documentID <= _index->documentCount()) {
    while (_docIterID < documentID) {
      _docIterID++;
      _docIter->nextEntry();
    }

    indri::index::TermList * termList = _docIter->currentEntry();
    indri::utility::greedy_vector<indri::index::FieldExtent> inExtents = termList->fields();
    indri::utility::greedy_vector<indri::index::FieldExtent>::iterator innerIter = inExtents.begin(); 

    int lastBegin = -1;
    int lastEnd = -1;
    // stuff all fields into the doc
    indri::index::Extent innerExtent;
    while( innerIter != inExtents.end() ) {
      innerExtent.begin = innerIter->begin;
      innerExtent.end = innerIter->end;
      if ( lastBegin != innerExtent.begin || lastEnd != innerExtent.end ) {
        _extents.push_back( innerExtent );
        lastBegin = innerExtent.begin;
        lastEnd = innerExtent.end;
      }
      innerIter++;
    }
  }

  _nextDocument = documentID + 1;
  if (_nextDocument > _index->documentMaximum()) {
    _nextDocument = MAX_INT32;
  }
}
Ejemplo n.º 6
0
void indri::infnet::NestedExtentInsideNode::prepare( lemur::api::DOCID_T documentID ) {
  // initialize the child / sibling pointer
  initpointer();
  _extents.clear();
  _lastExtent.begin = -1;
  _lastExtent.end = -1;

  if( !_inner || !_outer )
    return;

  const indri::utility::greedy_vector<indri::index::Extent>& inExtents = _inner->extents();
  const indri::utility::greedy_vector<indri::index::Extent>& outExtents = _outer->extents();

  indri::utility::greedy_vector<indri::index::Extent>::const_iterator innerIter = inExtents.begin();
  indri::utility::greedy_vector<indri::index::Extent>::const_iterator outerIter = outExtents.begin();


  // Walk through the inner list.
  // As we encounter a new node in the inner list:
  // - add new extents to an active outer list in the outer list that have the same begin or less
  // - remove extents from the active outer list where the end is less then the begin of the inner
  // Scan the active outer list for an extent that contains the inner.

  // Sort the active outer list by increasing end.
  // - When removing, the extents to remove will be at the beginning
  // - When scanning, check the last active outer extent.  If its end is larger than the inner
  //   extent's end, then we can add the inner extent.

  // Active outer extents
  std::set<indri::index::Extent, indri::index::Extent::ends_before_less> activeOuterExtents;
  while ( innerIter != inExtents.end() ) {
    // remove outer extents we don't need anymore
    std::set<indri::index::Extent, indri::index::Extent::ends_before_less>::iterator activeIter = activeOuterExtents.begin();
    std::set<indri::index::Extent, indri::index::Extent::ends_before_less>::iterator activeEnd = activeOuterExtents.end();
    while ( activeIter != activeEnd ) {
      if ( activeIter->end >= innerIter->begin ) {
        break;  
      } 
      activeIter++;
    }
    activeOuterExtents.erase( activeOuterExtents.begin(), activeIter );

    // push new outer extents on that we may need
    while ( outerIter != outExtents.end() && outerIter->begin <= innerIter->begin ) {
      // only insert if still applicable
      if ( outerIter->end >= innerIter->begin ) {
        activeOuterExtents.insert( *outerIter );
      }
      outerIter++;
    }
    // check to see if the last extent in the outer list contains the inner extent
    activeIter = activeOuterExtents.end();
    if (!activeOuterExtents.empty()) {
      activeIter--;
      if ( activeIter->end >= innerIter->end ) {
        // Since we know that all active outer extents have a begin that is at or before
        // the inner iter's begin, and from the if statement we know the end of one
        // of the active outer extents is at least 
        // as large as the inner end, we know the inner iter extent is contained
        // by the last extent in the active list (and possibly others)

        // !!!!!!!!!!!!!!! This may be wrong to use the activeIter weight here !!!!!!!!!!!!!!!!!
        // What if multiple outer extents match? Here we just take the weight of the first one.
        indri::index::Extent extent( innerIter->weight * activeIter->weight, 
                                     innerIter->begin,
                                     innerIter->end,
                                     innerIter->ordinal );
        _extents.push_back( extent );
      }
    }
    innerIter++;
  }

}
Ejemplo n.º 7
0
void indri::infnet::ExtentParentNode::prepare( lemur::api::DOCID_T documentID ) {
  // initialize the child / sibling pointer
  initpointer();
  _extents.clear();
  _lastExtent.begin = -1;
  _lastExtent.end = -1;

  if( !_inner || !_outer )
    return;

  const indri::utility::greedy_vector<indri::index::Extent>& inExtents = _inner->extents();
  const indri::utility::greedy_vector<indri::index::Extent>& outExtents = _outer->extents();

  indri::utility::greedy_vector<indri::index::Extent>::const_iterator innerIter = inExtents.begin();
  indri::utility::greedy_vector<indri::index::Extent>::const_iterator outerIter = outExtents.begin();

  indri::index::DocumentStructure * docStruct = _docStructHolder.getDocumentStructure();  


  // check the inner extents, searching for a parent in outerNodes
  while ( innerIter != inExtents.end() ) {

    _leafs.clear();
    if ( innerIter->ordinal == 0 ) {
      docStruct->findLeafs( &_leafs, innerIter->begin, innerIter->end, true );
    } else {
      _leafs.insert( innerIter->ordinal );
    }


    std::set<int>::iterator leaf = _leafs.begin();
    bool found = false;
    while ( leaf != _leafs.end() && !found) {


      outerIter = outExtents.begin();
      while ( outerIter != outExtents.end() && !found ) {

        _ancestors.clear();
        if ( outerIter->ordinal == 0 ) {
          docStruct->findLeafs( &_ancestors, outerIter->begin, outerIter->end, true );
        } else {
          _ancestors.insert( outerIter->ordinal );
        }

        std::set<int>::iterator ancestor = _ancestors.begin();
        while ( ancestor != _ancestors.end() && !found ) {

          if ( *leaf == docStruct->parent( *ancestor ) ) {
            found = true;
            indri::index::Extent extent( innerIter->weight * outerIter->weight, 
                                         innerIter->begin,
                                         innerIter->end,
                                         innerIter->ordinal);     

            _extents.push_back( extent );
          }
          ancestor++;
        }
        outerIter++;
      } 
      leaf++;
    }
    innerIter++;
  }

}