void FieldInfos::read(IndexInputPtr input, const String& fileName) { int32_t firstInt = input->readVInt(); format = firstInt < 0 ? firstInt : FORMAT_PRE; // This is a real format? if (format != FORMAT_PRE && format != FORMAT_START) boost::throw_exception(CorruptIndexException(L"unrecognized format " + StringUtils::toString(format) + L" in file \"" + fileName + L"\"")); int32_t size = format == FORMAT_PRE ? firstInt : input->readVInt(); // read in the size if required for (int32_t i = 0; i < size; ++i) { String name(input->readString()); uint8_t bits = input->readByte(); addInternal(name, (bits & IS_INDEXED) != 0, (bits & STORE_TERMVECTOR) != 0, (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0, (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0, (bits & OMIT_NORMS) != 0, (bits & STORE_PAYLOADS) != 0, (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0); } if (input->getFilePointer() != input->length()) { boost::throw_exception(CorruptIndexException(L"did not read all bytes from file \"" + fileName + L"\": read " + StringUtils::toString(input->getFilePointer()) + L" vs size " + StringUtils::toString(input->length()))); } }
void TermBuffer::read(IndexInputPtr input, FieldInfosPtr fieldInfos) { this->term.reset(); // invalidate cache int32_t start = input->readVInt(); int32_t length = input->readVInt(); int32_t totalLength = start + length; if (preUTF8Strings) text->setLength(start + input->readChars(text->result.get(), start, length)); else { StringUtils::toUTF8(text->result.get(), text->length, bytes); bytes->setLength(totalLength); input->readBytes(bytes->result.get(), start, length); StringUtils::toUnicode(bytes->result.get(), totalLength, text); } this->field = fieldInfos->fieldName(input->readVInt()); }
void checkReadBytes(IndexInputPtr input, int32_t size, int32_t pos) { // Just to see that "offset" is treated properly in readBytes(), we add an arbitrary offset at // the beginning of the array int32_t offset = size % 10; // arbitrary ByteArray buffer(ByteArray::newInstance(10)); buffer.resize(MiscUtils::getNextSize(offset + size)); BOOST_CHECK_EQUAL(pos, input->getFilePointer()); int64_t left = TEST_FILE_LENGTH - input->getFilePointer(); if (left <= 0) return; else if (left < size) size = (int32_t)left; input->readBytes(buffer.get(), offset, size); BOOST_CHECK_EQUAL(pos + size, input->getFilePointer()); for (int32_t i = 0; i < size; ++i) BOOST_CHECK_EQUAL(byten(pos + i), buffer[offset + i]); }
void runReadBytes(IndexInputPtr input, int32_t bufferSize) { int32_t pos = 0; RandomPtr random = newLucene<Random>(); // gradually increasing size for (int32_t size = 1; size < bufferSize * 10; size = size + size / 200 + 1) { checkReadBytes(input, size, pos); pos += size; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } // wildly fluctuating size for (int64_t i = 0; i < 1000; ++i) { int32_t size = random->nextInt(10000); checkReadBytes(input, 1 + size, pos); pos += 1 + size; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } // constant small size (7 bytes) for (int32_t i = 0; i < bufferSize; ++i) { checkReadBytes(input, 7, pos); pos += 7; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } }
void runReadBytesAndClose(IndexInputPtr input, int32_t bufferSize) { LuceneException finally; try { runReadBytes(input, bufferSize); } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); }
LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()) { return newLucene<SeeksCountingStream>(boost::dynamic_pointer_cast<IndexInput>(input->clone())); }
virtual int64_t length() { return input->length(); }
virtual int64_t getFilePointer() { return input->getFilePointer(); }
virtual void close() { input->close(); }
virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) { input->readBytes(b, offset, length); }
virtual uint8_t readByte() { return input->readByte(); }
void SeeksCountingStream::seek(int64_t pos) { ++LazyProxSkippingTest::seeksCounter; input->seek(pos); }
void FindSegmentsFile::doRun(IndexCommitPtr commit) { if (commit) { if (directory != commit->getDirectory()) boost::throw_exception(IOException(L"The specified commit does not match the specified Directory")); runBody(commit->getSegmentsFileName()); return; } String segmentFileName; int64_t lastGen = -1; int64_t gen = 0; int32_t genLookaheadCount = 0; bool retry = false; LuceneException exc; SegmentInfosPtr segmentInfos(_segmentInfos); int32_t method = 0; // Loop until we succeed in calling runBody() without hitting an IOException. An IOException most likely // means a commit was in process and has finished, in the time it took us to load the now-old infos files // (and segments files). It's also possible it's a true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on which generation we are trying to load. If we don't, // then the original error is real and we throw it. // We have three methods for determining the current generation. We try the first two in parallel, and // fall back to the third when necessary. while (true) { if (method == 0) { // Method 1: list the directory and use the highest segments_N file. This method works well as long // as there is no stale caching on the directory contents (NOTE: NFS clients often have such stale caching) HashSet<String> files(directory->listAll()); int64_t genA = segmentInfos->getCurrentSegmentGeneration(files); segmentInfos->message(L"directory listing genA=" + StringUtils::toString(genA)); // Method 2: open segments.gen and read its contents. Then we take the larger of the two gens. This way, // if either approach is hitting a stale cache (NFS) we have a better chance of getting the right generation. int64_t genB = -1; for (int32_t i = 0; i < SegmentInfos::defaultGenFileRetryCount; ++i) { IndexInputPtr genInput; try { genInput = directory->openInput(IndexFileNames::SEGMENTS_GEN()); } catch (FileNotFoundException& e) { segmentInfos->message(L"Segments.gen open: FileNotFoundException " + e.getError()); break; } catch (IOException& e) { segmentInfos->message(L"Segments.gen open: IOException " + e.getError()); } if (genInput) { LuceneException finally; bool fileConsistent = false; try { int32_t version = genInput->readInt(); if (version == SegmentInfos::FORMAT_LOCKLESS) { int64_t gen0 = genInput->readLong(); int64_t gen1 = genInput->readLong(); segmentInfos->message(L"fallback check: " + StringUtils::toString(gen0) + L"; " + StringUtils::toString(gen1)); if (gen0 == gen1) { // the file is consistent genB = gen0; fileConsistent = true; } } } catch (IOException&) { // will retry } catch (LuceneException& e) { finally = e; } genInput->close(); finally.throwException(); if (fileConsistent) break; } LuceneThread::threadSleep(SegmentInfos::defaultGenFileRetryPauseMsec); } segmentInfos->message(String(IndexFileNames::SEGMENTS_GEN()) + L" check: genB=" + StringUtils::toString(genB)); // pick the larger of the two gen's gen = std::max(genA, genB); // neither approach found a generation if (gen == -1) boost::throw_exception(FileNotFoundException(L"No segments* file found in directory")); } // Third method (fallback if first & second methods are not reliable): since both directory cache and // file contents cache seem to be stale, just advance the generation. if (method == 1 || (method == 0 && lastGen == gen && retry)) { method = 1; if (genLookaheadCount < SegmentInfos::defaultGenLookaheadCount) { ++gen; ++genLookaheadCount; segmentInfos->message(L"look ahead increment gen to " + StringUtils::toString(gen)); } } if (lastGen == gen) { // This means we're about to try the same segments_N last tried. This is allowed, exactly once, because // writer could have been in the process of writing segments_N last time. if (retry) { // OK, we've tried the same segments_N file twice in a row, so this must be a real error. exc.throwException(); } else retry = true; } else if (method == 0) { // Segment file has advanced since our last loop, so reset retry retry = false; } lastGen = gen; segmentFileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen); try { runBody(segmentFileName); segmentInfos->message(L"success on " + segmentFileName); return; } catch (LuceneException& err) { // Save the original root cause if (exc.isNull()) exc = err; segmentInfos->message(L"primary Exception on '" + segmentFileName + L"': " + err.getError() + L"'; will retry: retry=" + StringUtils::toString(retry) + L"; gen = " + StringUtils::toString(gen)); if (!retry && gen > 1) { // This is our first time trying this segments file (because retry is false), and, there is possibly a // segments_(N-1) (because gen > 1). So, check if the segments_(N-1) exists and try it if so. String prevSegmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen - 1)); if (directory->fileExists(prevSegmentFileName)) { segmentInfos->message(L"fallback to prior segment file '" + prevSegmentFileName + L"'"); try { runBody(prevSegmentFileName); if (!exc.isNull()) segmentInfos->message(L"success on fallback " + prevSegmentFileName); return; } catch (LuceneException& err2) { segmentInfos->message(L"secondary Exception on '" + prevSegmentFileName + L"': " + err2.getError() + L"'; will retry"); } } } } } }