예제 #1
0
 void FieldInfos::read(IndexInputPtr input, const String& fileName)
 {
     int32_t firstInt = input->readVInt();
     format = firstInt < 0 ? firstInt : FORMAT_PRE; // This is a real format?
     
     if (format != FORMAT_PRE && format != FORMAT_START)
         boost::throw_exception(CorruptIndexException(L"unrecognized format " + StringUtils::toString(format) + L" in file \"" + fileName + L"\""));
     
     int32_t size = format == FORMAT_PRE ? firstInt : input->readVInt(); // read in the size if required
     for (int32_t i = 0; i < size; ++i)
     {
         String name(input->readString());
         uint8_t bits = input->readByte();
         
         addInternal(name, (bits & IS_INDEXED) != 0, (bits & STORE_TERMVECTOR) != 0, (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0,
                     (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0, (bits & OMIT_NORMS) != 0, (bits & STORE_PAYLOADS) != 0,
                     (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0);
     }
     
     if (input->getFilePointer() != input->length())
     {
         boost::throw_exception(CorruptIndexException(L"did not read all bytes from file \"" + fileName + L"\": read " + 
                                                      StringUtils::toString(input->getFilePointer()) + L" vs size " + 
                                                      StringUtils::toString(input->length())));
     }
 }
예제 #2
0
 void TermBuffer::read(IndexInputPtr input, FieldInfosPtr fieldInfos)
 {
     this->term.reset(); // invalidate cache
     int32_t start = input->readVInt();
     int32_t length = input->readVInt();
     int32_t totalLength = start + length;
     if (preUTF8Strings)
         text->setLength(start + input->readChars(text->result.get(), start, length));
     else
     {
         StringUtils::toUTF8(text->result.get(), text->length, bytes);
         bytes->setLength(totalLength);
         input->readBytes(bytes->result.get(), start, length);
         StringUtils::toUnicode(bytes->result.get(), totalLength, text);
     }
     this->field = fieldInfos->fieldName(input->readVInt());
 }
void checkReadBytes(IndexInputPtr input, int32_t size, int32_t pos)
{
    // Just to see that "offset" is treated properly in readBytes(), we add an arbitrary offset at 
    // the beginning of the array
    int32_t offset = size % 10; // arbitrary
    ByteArray buffer(ByteArray::newInstance(10));
    buffer.resize(MiscUtils::getNextSize(offset + size));
    BOOST_CHECK_EQUAL(pos, input->getFilePointer());
    int64_t left = TEST_FILE_LENGTH - input->getFilePointer();
    if (left <= 0)
        return;
    else if (left < size)
        size = (int32_t)left;
    input->readBytes(buffer.get(), offset, size);
    BOOST_CHECK_EQUAL(pos + size, input->getFilePointer());
    for (int32_t i = 0; i < size; ++i)
        BOOST_CHECK_EQUAL(byten(pos + i), buffer[offset + i]);
}
void runReadBytes(IndexInputPtr input, int32_t bufferSize)
{
    int32_t pos = 0;
    RandomPtr random = newLucene<Random>();
    
    // gradually increasing size
    for (int32_t size = 1; size < bufferSize * 10; size = size + size / 200 + 1)
    {
        checkReadBytes(input, size, pos);
        pos += size;
        if (pos >= TEST_FILE_LENGTH)
        {
            // wrap
            pos = 0;
            input->seek(0);
        }
    }
    // wildly fluctuating size
    for (int64_t i = 0; i < 1000; ++i)
    {
        int32_t size = random->nextInt(10000);
        checkReadBytes(input, 1 + size, pos);
        pos += 1 + size;
        if (pos >= TEST_FILE_LENGTH)
        {
            // wrap
            pos = 0;
            input->seek(0);
        }
    }
    // constant small size (7 bytes)
    for (int32_t i = 0; i < bufferSize; ++i)
    {
        checkReadBytes(input, 7, pos);
        pos += 7;
        if (pos >= TEST_FILE_LENGTH)
        {
            // wrap
            pos = 0;
            input->seek(0);
        }
    }
}
void runReadBytesAndClose(IndexInputPtr input, int32_t bufferSize)
{
    LuceneException finally;
    try
    {
        runReadBytes(input, bufferSize);
    }
    catch (LuceneException& e)
    {
        finally = e;
    }
    input->close();
    finally.throwException();
}
 LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()) {
     return newLucene<SeeksCountingStream>(boost::dynamic_pointer_cast<IndexInput>(input->clone()));
 }
 virtual int64_t length() {
     return input->length();
 }
 virtual int64_t getFilePointer() {
     return input->getFilePointer();
 }
 virtual void close() {
     input->close();
 }
 virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) {
     input->readBytes(b, offset, length);
 }
 virtual uint8_t readByte() {
     return input->readByte();
 }
void SeeksCountingStream::seek(int64_t pos) {
    ++LazyProxSkippingTest::seeksCounter;
    input->seek(pos);
}
예제 #13
0
    void FindSegmentsFile::doRun(IndexCommitPtr commit)
    {
        if (commit)
        {
            if (directory != commit->getDirectory())
                boost::throw_exception(IOException(L"The specified commit does not match the specified Directory"));
            runBody(commit->getSegmentsFileName());
            return;
        }
        
        String segmentFileName;
        int64_t lastGen = -1;
        int64_t gen = 0;
        int32_t genLookaheadCount = 0;
        bool retry = false;
        LuceneException exc;
        SegmentInfosPtr segmentInfos(_segmentInfos);
        
        int32_t method = 0;
        
        // Loop until we succeed in calling runBody() without hitting an IOException.  An IOException most likely
        // means a commit was in process and has finished, in the time it took us to load the now-old infos files
        // (and segments files).  It's also possible it's a true error (corrupt index).  To distinguish these,
        // on each retry we must see "forward progress" on which generation we are trying to load.  If we don't, 
        // then the original error is real and we throw it.

        // We have three methods for determining the current generation.  We try the first two in parallel, and
        // fall back to the third when necessary.
        
        while (true)
        {
            if (method == 0)
            {
                // Method 1: list the directory and use the highest segments_N file.  This method works well as long
                // as there is no stale caching on the directory contents (NOTE: NFS clients often have such stale caching)
                HashSet<String> files(directory->listAll());
                int64_t genA = segmentInfos->getCurrentSegmentGeneration(files);

                segmentInfos->message(L"directory listing genA=" + StringUtils::toString(genA));

                // Method 2: open segments.gen and read its contents.  Then we take the larger of the two gens.  This way,
                // if either approach is hitting a stale cache (NFS) we have a better chance of getting the right generation.
                int64_t genB = -1;
                for (int32_t i = 0; i < SegmentInfos::defaultGenFileRetryCount; ++i)
                {
                    IndexInputPtr genInput;
                    try
                    {
                        genInput = directory->openInput(IndexFileNames::SEGMENTS_GEN());
                    }
                    catch (FileNotFoundException& e)
                    {
                        segmentInfos->message(L"Segments.gen open: FileNotFoundException " + e.getError());
                        break;
                    }
                    catch (IOException& e)
                    {
                        segmentInfos->message(L"Segments.gen open: IOException " + e.getError());
                    }
                    
                    if (genInput)
                    {
                        LuceneException finally;
                        bool fileConsistent = false;
                        try
                        {
                            int32_t version = genInput->readInt();
                            if (version == SegmentInfos::FORMAT_LOCKLESS)
                            {
                                int64_t gen0 = genInput->readLong();
                                int64_t gen1 = genInput->readLong();
                                segmentInfos->message(L"fallback check: " + StringUtils::toString(gen0) + L"; " + StringUtils::toString(gen1));
                                if (gen0 == gen1)
                                {
                                    // the file is consistent
                                    genB = gen0;
                                    fileConsistent = true;
                                }
                            }
                        }
                        catch (IOException&)
                        {
                            // will retry
                        }
                        catch (LuceneException& e)
                        {
                            finally = e;
                        }
                        genInput->close();
                        finally.throwException();
                        if (fileConsistent)
                            break;
                    }
                    
                    LuceneThread::threadSleep(SegmentInfos::defaultGenFileRetryPauseMsec);
                }
                
                segmentInfos->message(String(IndexFileNames::SEGMENTS_GEN()) + L" check: genB=" + StringUtils::toString(genB));

                // pick the larger of the two gen's
                gen = std::max(genA, genB);
                
                // neither approach found a generation
                if (gen == -1)
                    boost::throw_exception(FileNotFoundException(L"No segments* file found in directory"));
            }
            
            // Third method (fallback if first & second methods are not reliable): since both directory cache and
            // file contents cache seem to be stale, just advance the generation.
            if (method == 1 || (method == 0 && lastGen == gen && retry))
            {
                method = 1;
                
                if (genLookaheadCount < SegmentInfos::defaultGenLookaheadCount)
                {
                    ++gen;
                    ++genLookaheadCount;
                    segmentInfos->message(L"look ahead increment gen to " + StringUtils::toString(gen));
                }
            }
            
            if (lastGen == gen)
            {
                // This means we're about to try the same segments_N last tried.  This is allowed, exactly once, because 
                // writer could have been in the process of writing segments_N last time.
                
                if (retry)
                {
                    // OK, we've tried the same segments_N file twice in a row, so this must be a real error.
                    exc.throwException();
                }
                else
                    retry = true;
            }
            else if (method == 0)
            {
                // Segment file has advanced since our last loop, so reset retry
                retry = false;
            }
            
            lastGen = gen;
            
            segmentFileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen);
            
            try
            {
                runBody(segmentFileName);
                segmentInfos->message(L"success on " + segmentFileName);
                return;
            }
            catch (LuceneException& err)
            {
                // Save the original root cause
                if (exc.isNull())
                    exc = err;
                
                segmentInfos->message(L"primary Exception on '" + segmentFileName + L"': " + err.getError() + L"'; will retry: retry=" + StringUtils::toString(retry) + L"; gen = " + StringUtils::toString(gen));
                
                if (!retry && gen > 1)
                {
                    // This is our first time trying this segments file (because retry is false), and, there is possibly a 
                    // segments_(N-1) (because gen > 1). So, check if the segments_(N-1) exists and try it if so.
                    String prevSegmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen - 1));
                    
                    if (directory->fileExists(prevSegmentFileName))
                    {
                        segmentInfos->message(L"fallback to prior segment file '" + prevSegmentFileName + L"'");
                        
                        try
                        {
                            runBody(prevSegmentFileName);
                            if (!exc.isNull())
                                segmentInfos->message(L"success on fallback " + prevSegmentFileName);
                            return;
                        }
                        catch (LuceneException& err2)
                        {
                            segmentInfos->message(L"secondary Exception on '" + prevSegmentFileName + L"': " + err2.getError() + L"'; will retry");
                        }
                    }
                }
            }
        }
    }