static void zlib_read_document( z_stream_s& stream, indri::file::File& infile, UINT64 offset, indri::utility::Buffer& outputBuffer ) { // read in data from the file until the stream ends // split up the data as necessary // decompress positional info // read some data char inputBuffer[INPUT_BUFFER_SIZE]; outputBuffer.grow( INPUT_BUFFER_SIZE ); outputBuffer.write( sizeof(indri::api::ParsedDocument) ); stream.avail_in = 0; stream.avail_out = 0; while(true) { if( !stream.avail_in ) { UINT64 readSize = infile.read( inputBuffer, offset, sizeof inputBuffer ); offset += readSize; stream.avail_in = readSize; stream.next_in = (Bytef*) inputBuffer; } stream.avail_out = outputBuffer.size() - outputBuffer.position(); stream.next_out = (Bytef*) outputBuffer.write( outputBuffer.size() - outputBuffer.position() ); int result = inflate( &stream, Z_NO_FLUSH ); outputBuffer.unwrite( stream.avail_out ); if( result == Z_STREAM_END ) { result = inflate( &stream, Z_FINISH ); if( result < 0 ) LEMUR_THROW( result, "Something bad happened while trying to finish decompressing a document." ); inflateEnd( &stream ); break; } if( result < 0 ) { LEMUR_THROW( result, "Something bad happened while trying to decompress a document." ); } if( stream.avail_out == 0 ) { outputBuffer.grow(); } } }
bool lemur::file::SortMergeTextFiles::_readLine(FILE *_in, char*& beginLine, size_t& lineLength, indri::utility::Buffer &_buffer ) { lineLength = 0; size_t actual; // make a buffer of a reasonable size so we're not always allocating if( _buffer.size() < 1024*1024 ) _buffer.grow( 1024*1024 ); // if we're running out of room, add 25MB if( (_buffer.size() - _buffer.position()) < 512*1024 ) { _buffer.grow( _buffer.size() + 1024*1024*25 ); } if( _buffer.position() ) { // get rid of null terminator from previous call _buffer.unwrite(1); } size_t readAmount = _buffer.size() - _buffer.position() - 2; // fetch next document line char* buffer = _buffer.write( readAmount ); char* result = fgets( buffer, (int)readAmount, _in ); if(!result) { return false; } actual = strlen(buffer); lineLength += actual; _buffer.unwrite( readAmount - actual ); // all finished reading *_buffer.write(1) = 0; beginLine = _buffer.front() + _buffer.position() - lineLength - 1; // strip the \n off the end if (beginLine[lineLength-1]=='\n') { beginLine[lineLength-1]=0; } return true; }