void IndexWriter::_writeDirectLists( WriterIndexContext* context, indri::file::SequentialWriteBuffer* directOutput, indri::file::SequentialWriteBuffer* lengthsOutput, indri::file::SequentialWriteBuffer* dataOutput ) { VocabularyIterator* vocabulary = context->index->frequentVocabularyIterator(); indri::index::Index* index = context->index; vocabulary->startIteration(); while( !vocabulary->finished() ) { indri::index::DiskTermData* diskTermData = vocabulary->currentEntry(); context->oldFrequent->add( diskTermData->termID, diskTermData->termData->term ); vocabulary->nextEntry(); } delete vocabulary; vocabulary = 0; TermListFileIterator* iterator = index->termListFileIterator(); TermTranslator* translator = _buildTermTranslator( _infrequentTermsReader, _frequentTermsReader, *context->oldFrequent, context->oldInfrequent, *context->newlyFrequent, index, context->bitmap ); iterator->startIteration(); TermList writeList; indri::utility::Buffer outputBuffer( 256*1024 ); indri::index::DocumentDataIterator* dataIterator = context->index->documentDataIterator(); dataIterator->startIteration(); while( !iterator->finished() ) { writeList.clear(); TermList* list = iterator->currentEntry(); assert( list ); int currentTerm; int translated; // copy and translate terms for( int i=0; i<list->terms().size(); i++ ) { currentTerm = list->terms()[i]; assert( currentTerm >= 0 ); assert( currentTerm <= index->uniqueTermCount() ); translated = (*translator)( currentTerm ); assert( translated > 0 || (translated == 0 && currentTerm == 0) ); writeList.addTerm( translated ); } // copy field data int fieldCount = list->fields().size(); const indri::utility::greedy_vector<indri::index::FieldExtent>& fields = list->fields(); for( int i=0; i<fieldCount; i++ ) { writeList.addField( fields[i] ); } // record the start position size_t writeStart = outputBuffer.position(); UINT32 length = 0; // write the list, leaving room for a length count outputBuffer.write( sizeof(UINT32) ); writeList.write( outputBuffer ); // record the end position, compute length size_t writeEnd = outputBuffer.position(); length = writeEnd - (writeStart + sizeof(UINT32)); // store length assert( outputBuffer.position() >= (sizeof(UINT32) + length + writeStart) ); memcpy( outputBuffer.front() + writeStart, &length, sizeof(UINT32) ); assert( dataIterator ); // get a copy of the document data assert( dataIterator ); assert( !dataIterator->finished() ); indri::index::DocumentData documentData = *dataIterator->currentEntry(); // store offset information documentData.byteLength = length; documentData.offset = directOutput->tell() + writeStart + sizeof(UINT32); // tell has to happen before a write or the offset will be wrong. if( outputBuffer.position() > 128*1024 ) { directOutput->write( outputBuffer.front(), outputBuffer.position() ); outputBuffer.clear(); } dataOutput->write( &documentData, sizeof(DocumentData) ); int termLength = documentData.totalLength; assert( termLength >= 0 ); lengthsOutput->write( &termLength, sizeof(UINT32) ); iterator->nextEntry(); dataIterator->nextEntry(); } delete iterator; delete dataIterator; delete translator; directOutput->write( outputBuffer.front(), outputBuffer.position() ); directOutput->flush(); lengthsOutput->flush(); outputBuffer.clear(); }