void indri::infnet::ExtentInsideNode::prepare( lemur::api::DOCID_T documentID ) { // initialize the child / sibling pointer initpointer(); _extents.clear(); _lastExtent.begin = -1; _lastExtent.end = -1; if( !_inner || !_outer ) return; const indri::utility::greedy_vector<indri::index::Extent>& inExtents = _inner->extents(); const indri::utility::greedy_vector<indri::index::Extent>& outExtents = _outer->extents(); indri::utility::greedy_vector<indri::index::Extent>::const_iterator innerIter = inExtents.begin(); indri::utility::greedy_vector<indri::index::Extent>::const_iterator outerIter = outExtents.begin(); while( innerIter != inExtents.end() && outerIter != outExtents.end() ) { if( outerIter->contains( *innerIter ) ) { _extents.push_back( *innerIter ); innerIter++; } else if( outerIter->begin <= innerIter->begin ) { outerIter++; } else { innerIter++; } } }
void indri::infnet::ExtentAndNode::prepare( lemur::api::DOCID_T documentID ) { // initialize the child / sibling pointer initpointer(); _extents.clear(); if( _children.size() == 2 ) { _and( _extents, _children[0]->extents(), _children[1]->extents() ); } else if( _children.size() > 2 ) { indri::utility::greedy_vector<indri::index::Extent> first; indri::utility::greedy_vector<indri::index::Extent> second; unsigned int i; // this part is a little complex because I'm trying // to avoid copying extent vectors too much _and( first, _children[0]->extents(), _children[1]->extents() ); for( i=2; i<_children.size()-2; i+=2 ) { _and( second, first, _children[i]->extents() ); _and( first, second, _children[i+1]->extents() ); } if( i==_children.size()-1 ) { _and( _extents, first, _children[i]->extents() ); } else { _extents = first; } } }
void indri::infnet::FieldEqualsNode::prepare( lemur::api::DOCID_T documentID ) { // initialize the child / sibling pointer initpointer(); _extents.clear(); if( !_field ) return; const indri::utility::greedy_vector<INT64>& numbers = _field->numbers(); const indri::utility::greedy_vector<indri::index::Extent>& extents = _field->extents(); for( size_t i=0; i<numbers.size(); i++ ) { if( numbers[i] == _constant ) { _extents.push_back( extents[i] ); } } }
void indri::infnet::DocListIteratorNode::prepare( lemur::api::DOCID_T documentID ) { // initialize the child / sibling pointer initpointer(); _extents.clear(); _lastExtent.begin = -1; _lastExtent.end = -1; if( !_list ) return; indri::index::DocListIterator::DocumentData* info = _list->currentEntry(); if( !info || info->document != documentID ) return; indri::utility::greedy_vector<int>& positions = info->positions; for( size_t i = 0; i < positions.size(); i++ ) { _extents.push_back( indri::index::Extent( positions[i], positions[i]+1 ) ); } }
void indri::infnet::FieldWildcardNode::prepare( lemur::api::DOCID_T documentID ) { // initialize the child / sibling pointer initpointer(); _extents.clear(); if (documentID <= _index->documentCount()) { while (_docIterID < documentID) { _docIterID++; _docIter->nextEntry(); } indri::index::TermList * termList = _docIter->currentEntry(); indri::utility::greedy_vector<indri::index::FieldExtent> inExtents = termList->fields(); indri::utility::greedy_vector<indri::index::FieldExtent>::iterator innerIter = inExtents.begin(); int lastBegin = -1; int lastEnd = -1; // stuff all fields into the doc indri::index::Extent innerExtent; while( innerIter != inExtents.end() ) { innerExtent.begin = innerIter->begin; innerExtent.end = innerIter->end; if ( lastBegin != innerExtent.begin || lastEnd != innerExtent.end ) { _extents.push_back( innerExtent ); lastBegin = innerExtent.begin; lastEnd = innerExtent.end; } innerIter++; } } _nextDocument = documentID + 1; if (_nextDocument > _index->documentMaximum()) { _nextDocument = MAX_INT32; } }
void indri::infnet::NestedExtentInsideNode::prepare( lemur::api::DOCID_T documentID ) { // initialize the child / sibling pointer initpointer(); _extents.clear(); _lastExtent.begin = -1; _lastExtent.end = -1; if( !_inner || !_outer ) return; const indri::utility::greedy_vector<indri::index::Extent>& inExtents = _inner->extents(); const indri::utility::greedy_vector<indri::index::Extent>& outExtents = _outer->extents(); indri::utility::greedy_vector<indri::index::Extent>::const_iterator innerIter = inExtents.begin(); indri::utility::greedy_vector<indri::index::Extent>::const_iterator outerIter = outExtents.begin(); // Walk through the inner list. // As we encounter a new node in the inner list: // - add new extents to an active outer list in the outer list that have the same begin or less // - remove extents from the active outer list where the end is less then the begin of the inner // Scan the active outer list for an extent that contains the inner. // Sort the active outer list by increasing end. // - When removing, the extents to remove will be at the beginning // - When scanning, check the last active outer extent. If its end is larger than the inner // extent's end, then we can add the inner extent. // Active outer extents std::set<indri::index::Extent, indri::index::Extent::ends_before_less> activeOuterExtents; while ( innerIter != inExtents.end() ) { // remove outer extents we don't need anymore std::set<indri::index::Extent, indri::index::Extent::ends_before_less>::iterator activeIter = activeOuterExtents.begin(); std::set<indri::index::Extent, indri::index::Extent::ends_before_less>::iterator activeEnd = activeOuterExtents.end(); while ( activeIter != activeEnd ) { if ( activeIter->end >= innerIter->begin ) { break; } activeIter++; } activeOuterExtents.erase( activeOuterExtents.begin(), activeIter ); // push new outer extents on that we may need while ( outerIter != outExtents.end() && outerIter->begin <= innerIter->begin ) { // only insert if still applicable if ( outerIter->end >= innerIter->begin ) { activeOuterExtents.insert( *outerIter ); } outerIter++; } // check to see if the last extent in the outer list contains the inner extent activeIter = activeOuterExtents.end(); if (!activeOuterExtents.empty()) { activeIter--; if ( activeIter->end >= innerIter->end ) { // Since we know that all active outer extents have a begin that is at or before // the inner iter's begin, and from the if statement we know the end of one // of the active outer extents is at least // as large as the inner end, we know the inner iter extent is contained // by the last extent in the active list (and possibly others) // !!!!!!!!!!!!!!! This may be wrong to use the activeIter weight here !!!!!!!!!!!!!!!!! // What if multiple outer extents match? Here we just take the weight of the first one. indri::index::Extent extent( innerIter->weight * activeIter->weight, innerIter->begin, innerIter->end, innerIter->ordinal ); _extents.push_back( extent ); } } innerIter++; } }
void indri::infnet::ExtentParentNode::prepare( lemur::api::DOCID_T documentID ) { // initialize the child / sibling pointer initpointer(); _extents.clear(); _lastExtent.begin = -1; _lastExtent.end = -1; if( !_inner || !_outer ) return; const indri::utility::greedy_vector<indri::index::Extent>& inExtents = _inner->extents(); const indri::utility::greedy_vector<indri::index::Extent>& outExtents = _outer->extents(); indri::utility::greedy_vector<indri::index::Extent>::const_iterator innerIter = inExtents.begin(); indri::utility::greedy_vector<indri::index::Extent>::const_iterator outerIter = outExtents.begin(); indri::index::DocumentStructure * docStruct = _docStructHolder.getDocumentStructure(); // check the inner extents, searching for a parent in outerNodes while ( innerIter != inExtents.end() ) { _leafs.clear(); if ( innerIter->ordinal == 0 ) { docStruct->findLeafs( &_leafs, innerIter->begin, innerIter->end, true ); } else { _leafs.insert( innerIter->ordinal ); } std::set<int>::iterator leaf = _leafs.begin(); bool found = false; while ( leaf != _leafs.end() && !found) { outerIter = outExtents.begin(); while ( outerIter != outExtents.end() && !found ) { _ancestors.clear(); if ( outerIter->ordinal == 0 ) { docStruct->findLeafs( &_ancestors, outerIter->begin, outerIter->end, true ); } else { _ancestors.insert( outerIter->ordinal ); } std::set<int>::iterator ancestor = _ancestors.begin(); while ( ancestor != _ancestors.end() && !found ) { if ( *leaf == docStruct->parent( *ancestor ) ) { found = true; indri::index::Extent extent( innerIter->weight * outerIter->weight, innerIter->begin, innerIter->end, innerIter->ordinal); _extents.push_back( extent ); } ancestor++; } outerIter++; } leaf++; } innerIter++; } }