boost::any StringCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); Collection<String> retArray(Collection<String>::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; String termval(term->text()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; }
void SegmentInfos::write(DirectoryPtr directory) { String segmentFileName(getNextSegmentFileName()); // always advance the generation on write if (generation == -1) generation = 1; else ++generation; ChecksumIndexOutputPtr segnOutput(newLucene<ChecksumIndexOutput>(directory->createOutput(segmentFileName))); bool success = false; LuceneException finally; try { segnOutput->writeInt(CURRENT_FORMAT); // write FORMAT segnOutput->writeLong(++version); // every write changes the index segnOutput->writeInt(counter); // write counter segnOutput->writeInt(segmentInfos.size()); // write infos for (Collection<SegmentInfoPtr>::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) (*seginfo)->write(segnOutput); segnOutput->writeStringStringMap(userData); segnOutput->prepareCommit(); success = true; pendingSegnOutput = segnOutput; } catch (LuceneException& e) { finally = e; } if (!success) { // We hit an exception above; try to close the file but suppress any exception try { segnOutput->close(); } catch (...) { // Suppress so we keep throwing the original exception } try { // try not to leave a truncated segments_n file in the index directory->deleteFile(segmentFileName); } catch (...) { // Suppress so we keep throwing the original exception } } finally.throwException(); }
void ConcurrentMergeScheduler::merge(const IndexWriterPtr& writer) { BOOST_ASSERT(!writer->holdsLock()); this->_writer = writer; initMergeThreadPriority(); dir = writer->getDirectory(); // First, quickly run through the newly proposed merges and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to the queue. If we are way behind on merging, // many of these newly proposed merges will likely already be registered. message(L"now merge"); message(L" index: " + writer->segString()); // Iterate, pulling from the IndexWriter's queue of pending merges, until it's empty while (true) { OneMergePtr merge(writer->getNextMerge()); if (!merge) { message(L" no more merges pending; now return"); return; } // We do this with the primary thread to keep deterministic assignment of segment names writer->mergeInit(merge); bool success = false; LuceneException finally; try { SyncLock syncLock(this); MergeThreadPtr merger; while (mergeThreadCount() >= maxThreadCount) { message(L" too many merge threads running; stalling..."); wait(1000); } message(L" consider merge " + merge->segString(dir)); BOOST_ASSERT(mergeThreadCount() < maxThreadCount); // OK to spawn a new merge thread to handle this merge merger = getMergeThread(writer, merge); mergeThreads.add(merger); message(L" launch new thread"); merger->start(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { writer->mergeFinish(merge); } finally.throwException(); } }
boost::any StringIndexCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); Collection<int32_t> retArray(Collection<int32_t>::newInstance(reader->maxDoc())); Collection<String> mterms(Collection<String>::newInstance(reader->maxDoc() + 1)); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); int32_t t = 0; // current term number // an entry for documents that have no terms in this field should a document with no terms be at // top or bottom? This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to // change as well. mterms[t++] = L""; LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field || t >= mterms.size() ) break; // store term text mterms[t] = term->text(); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = t; ++t; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (t == 0) { // if there are no terms, make the term array have a single null entry mterms = Collection<String>::newInstance(1); } else if (t < mterms.size()) { // if there are less terms than documents, trim off the dead array space mterms.resize(t); } return newLucene<StringIndex>(retArray, mterms); }
void MergeThread::run() { // First time through the while loop we do the merge that we were started with OneMergePtr merge(this->startMerge); ConcurrentMergeSchedulerPtr merger(_merger); LuceneException finally; try { merger->message(L" merge thread: start"); IndexWriterPtr writer(_writer); while (true) { setRunningMerge(merge); merger->doMerge(merge); // Subsequent times through the loop we do any new merge that writer says is necessary merge = writer->getNextMerge(); if (merge) { writer->mergeInit(merge); merger->message(L" merge thread: do another merge " + merge->segString(merger->dir)); } else break; } merger->message(L" merge thread: done"); } catch (MergeAbortedException&) { // Ignore the exception if it was due to abort } catch (LuceneException& e) { if (!merger->suppressExceptions) { // suppressExceptions is normally only set during testing. merger->anyExceptions = true; merger->handleMergeException(e); } else finally = e; } { SyncLock syncLock(merger); merger->notifyAll(); bool removed = merger->mergeThreads.remove(shared_from_this()); BOOST_ASSERT(removed); } finally.throwException(); }
boost::any DoubleCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); DoubleParserPtr parser(VariantUtils::get<DoubleParserPtr>(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any doubles; try { doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER()); } catch (NumberFormatException&) { doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER()); } return doubles; } Collection<double> retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; double termval = parser->parseDouble(term->text()); if (!retArray) // late init retArray = Collection<double>::newInstance(reader->maxDoc()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) // no values retArray = Collection<double>::newInstance(reader->maxDoc()); return retArray; }
void runReadBytesAndClose(IndexInputPtr input, int32_t bufferSize) { LuceneException finally; try { runReadBytes(input, bufferSize); } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); }
void FieldInfos::write(DirectoryPtr d, const String& name) { IndexOutputPtr output(d->createOutput(name)); LuceneException finally; try { write(output); } catch (LuceneException& e) { finally = e; } output->close(); finally.throwException(); }
QueryPtr FuzzyQuery::rewrite(const IndexReaderPtr& reader) { if (!termLongEnough) { // can only match if it's exact return newLucene<TermQuery>(term); } int32_t maxSize = BooleanQuery::getMaxClauseCount(); ScoreTermQueuePtr stQueue(newLucene<ScoreTermQueue>(maxSize + 1)); FilteredTermEnumPtr enumerator(getEnum(reader)); LuceneException finally; try { ScoreTermPtr st = newLucene<ScoreTerm>(); do { TermPtr t(enumerator->term()); if (!t) { break; } double score = enumerator->difference(); // ignore uncompetitive hits if (stQueue->size() >= maxSize && score <= stQueue->top()->score) { continue; } // add new entry in PQ st->term = t; st->score = score; stQueue->add(st); // possibly drop entries from queue st = (stQueue->size() > maxSize) ? stQueue->pop() : newLucene<ScoreTerm>(); } while (enumerator->next()); } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); BooleanQueryPtr query(newLucene<BooleanQuery>(true)); int32_t size = stQueue->size(); for (int32_t i = 0; i < size; ++i) { ScoreTermPtr st(stQueue->pop()); TermQueryPtr tq(newLucene<TermQuery>(st->term)); // found a match tq->setBoost(getBoost() * st->score); // set the boost query->add(tq, BooleanClause::SHOULD); // add to query } return query; }
FieldInfos::FieldInfos(DirectoryPtr d, const String& name) { format = 0; byNumber = Collection<FieldInfoPtr>::newInstance(); byName = MapStringFieldInfo::newInstance(); IndexInputPtr input(d->openInput(name)); LuceneException finally; try { try { read(input, name); } catch (IOException& e) { if (format == FORMAT_PRE) { input->seek(0); input->setModifiedUTF8StringsMode(); byNumber.clear(); byName.clear(); try { read(input, name); } catch (...) { // Ignore any new exception & throw original IOE finally = e; } } else finally = e; } } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); }
int32_t IndexReader::deleteDocuments(const TermPtr& term) { ensureOpen(); TermDocsPtr docs(termDocs(term)); if (!docs) { return 0; } int32_t n = 0; LuceneException finally; try { while (docs->next()) { deleteDocument(docs->doc()); ++n; } } catch (LuceneException& e) { finally = e; } docs->close(); finally.throwException(); return n; }
boost::any ByteCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); ByteParserPtr parser(VariantUtils::get<ByteParserPtr>(entry->custom)); if (!parser) return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER()); Collection<uint8_t> retArray(Collection<uint8_t>::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; uint8_t termval = parser->parseByte(term->text()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; }
void handle(const LuceneException& t) { FAIL() << t.getError(); SyncLock syncLock(&failures); failures.add(t); }
void IndexReader::main(Collection<String> args) { String filename; bool extract = false; for (Collection<String>::iterator arg = args.begin(); arg != args.end(); ++arg) { if (*arg == L"-extract") { extract = true; } else if (filename.empty()) { filename = *arg; } } if (filename.empty()) { std::wcout << L"Usage: IndexReader [-extract] <cfsfile>"; return; } DirectoryPtr dir; CompoundFileReaderPtr cfr; LuceneException finally; try { String dirname(FileUtils::extractPath(filename)); filename = FileUtils::extractPath(filename); dir = FSDirectory::open(dirname); cfr = newLucene<CompoundFileReader>(dir, filename); HashSet<String> _files(cfr->listAll()); Collection<String> files(Collection<String>::newInstance(_files.begin(), _files.end())); std::sort(files.begin(), files.end()); // sort the array of filename so that the output is more readable for (Collection<String>::iterator file = files.begin(); file != files.end(); ++file) { int64_t len = cfr->fileLength(*file); if (extract) { std::wcout << L"extract " << *file << L" with " << len << L" bytes to local directory..."; IndexInputPtr ii(cfr->openInput(*file)); boost::filesystem::ofstream f(*file, std::ios::binary | std::ios::out); // read and write with a small buffer, which is more effective than reading byte by byte ByteArray buffer(ByteArray::newInstance(1024)); int32_t chunk = buffer.size(); while (len > 0) { int32_t bufLen = std::min(chunk, (int32_t)len); ii->readBytes(buffer.get(), 0, bufLen); f.write((char*)buffer.get(), bufLen); len -= bufLen; } ii->close(); } else { std::wcout << *file << L": " << len << " bytes\n"; } } } catch (LuceneException& e) { finally = e; } if (dir) { dir->close(); } if (cfr) { cfr->close(); } finally.throwException(); }
void SegmentInfos::finishCommit(DirectoryPtr dir) { if (!pendingSegnOutput) boost::throw_exception(IllegalStateException(L"prepareCommit was not called")); bool success = false; LuceneException finally; try { pendingSegnOutput->finishCommit(); pendingSegnOutput->close(); pendingSegnOutput.reset(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) rollbackCommit(dir); finally.throwException(); // NOTE: if we crash here, we have left a segments_N file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. String fileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); success = false; try { dir->sync(fileName); success = true; } catch (...) { } if (!success) dir->deleteFile(fileName); lastGeneration = generation; IndexOutputPtr genOutput; try { genOutput = dir->createOutput(IndexFileNames::SEGMENTS_GEN()); try { genOutput->writeInt(FORMAT_LOCKLESS); genOutput->writeLong(generation); genOutput->writeLong(generation); } catch (LuceneException& e) { finally = e; } genOutput->close(); finally.throwException(); } catch (...) { } }
void FindSegmentsFile::doRun(IndexCommitPtr commit) { if (commit) { if (directory != commit->getDirectory()) boost::throw_exception(IOException(L"The specified commit does not match the specified Directory")); runBody(commit->getSegmentsFileName()); return; } String segmentFileName; int64_t lastGen = -1; int64_t gen = 0; int32_t genLookaheadCount = 0; bool retry = false; LuceneException exc; SegmentInfosPtr segmentInfos(_segmentInfos); int32_t method = 0; // Loop until we succeed in calling runBody() without hitting an IOException. An IOException most likely // means a commit was in process and has finished, in the time it took us to load the now-old infos files // (and segments files). It's also possible it's a true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on which generation we are trying to load. If we don't, // then the original error is real and we throw it. // We have three methods for determining the current generation. We try the first two in parallel, and // fall back to the third when necessary. while (true) { if (method == 0) { // Method 1: list the directory and use the highest segments_N file. This method works well as long // as there is no stale caching on the directory contents (NOTE: NFS clients often have such stale caching) HashSet<String> files(directory->listAll()); int64_t genA = segmentInfos->getCurrentSegmentGeneration(files); segmentInfos->message(L"directory listing genA=" + StringUtils::toString(genA)); // Method 2: open segments.gen and read its contents. Then we take the larger of the two gens. This way, // if either approach is hitting a stale cache (NFS) we have a better chance of getting the right generation. int64_t genB = -1; for (int32_t i = 0; i < SegmentInfos::defaultGenFileRetryCount; ++i) { IndexInputPtr genInput; try { genInput = directory->openInput(IndexFileNames::SEGMENTS_GEN()); } catch (FileNotFoundException& e) { segmentInfos->message(L"Segments.gen open: FileNotFoundException " + e.getError()); break; } catch (IOException& e) { segmentInfos->message(L"Segments.gen open: IOException " + e.getError()); } if (genInput) { LuceneException finally; bool fileConsistent = false; try { int32_t version = genInput->readInt(); if (version == SegmentInfos::FORMAT_LOCKLESS) { int64_t gen0 = genInput->readLong(); int64_t gen1 = genInput->readLong(); segmentInfos->message(L"fallback check: " + StringUtils::toString(gen0) + L"; " + StringUtils::toString(gen1)); if (gen0 == gen1) { // the file is consistent genB = gen0; fileConsistent = true; } } } catch (IOException&) { // will retry } catch (LuceneException& e) { finally = e; } genInput->close(); finally.throwException(); if (fileConsistent) break; } LuceneThread::threadSleep(SegmentInfos::defaultGenFileRetryPauseMsec); } segmentInfos->message(String(IndexFileNames::SEGMENTS_GEN()) + L" check: genB=" + StringUtils::toString(genB)); // pick the larger of the two gen's gen = std::max(genA, genB); // neither approach found a generation if (gen == -1) boost::throw_exception(FileNotFoundException(L"No segments* file found in directory")); } // Third method (fallback if first & second methods are not reliable): since both directory cache and // file contents cache seem to be stale, just advance the generation. if (method == 1 || (method == 0 && lastGen == gen && retry)) { method = 1; if (genLookaheadCount < SegmentInfos::defaultGenLookaheadCount) { ++gen; ++genLookaheadCount; segmentInfos->message(L"look ahead increment gen to " + StringUtils::toString(gen)); } } if (lastGen == gen) { // This means we're about to try the same segments_N last tried. This is allowed, exactly once, because // writer could have been in the process of writing segments_N last time. if (retry) { // OK, we've tried the same segments_N file twice in a row, so this must be a real error. exc.throwException(); } else retry = true; } else if (method == 0) { // Segment file has advanced since our last loop, so reset retry retry = false; } lastGen = gen; segmentFileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen); try { runBody(segmentFileName); segmentInfos->message(L"success on " + segmentFileName); return; } catch (LuceneException& err) { // Save the original root cause if (exc.isNull()) exc = err; segmentInfos->message(L"primary Exception on '" + segmentFileName + L"': " + err.getError() + L"'; will retry: retry=" + StringUtils::toString(retry) + L"; gen = " + StringUtils::toString(gen)); if (!retry && gen > 1) { // This is our first time trying this segments file (because retry is false), and, there is possibly a // segments_(N-1) (because gen > 1). So, check if the segments_(N-1) exists and try it if so. String prevSegmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen - 1)); if (directory->fileExists(prevSegmentFileName)) { segmentInfos->message(L"fallback to prior segment file '" + prevSegmentFileName + L"'"); try { runBody(prevSegmentFileName); if (!exc.isNull()) segmentInfos->message(L"success on fallback " + prevSegmentFileName); return; } catch (LuceneException& err2) { segmentInfos->message(L"secondary Exception on '" + prevSegmentFileName + L"': " + err2.getError() + L"'; will retry"); } } } } } }
void SegmentInfos::read(DirectoryPtr directory, const String& segmentFileName) { bool success = false; // clear any previous segments segmentInfos.clear(); ChecksumIndexInputPtr input(newLucene<ChecksumIndexInput>(directory->openInput(segmentFileName))); generation = generationFromSegmentsFileName(segmentFileName); lastGeneration = generation; LuceneException finally; try { int32_t format = input->readInt(); if (format < 0) // file contains explicit format info { if (format < CURRENT_FORMAT) boost::throw_exception(CorruptIndexException(L"Unknown format version: " + StringUtils::toString(format))); version = input->readLong(); // read version counter = input->readInt(); // read counter } else counter = format; for (int32_t i = input->readInt(); i > 0; --i) // read segmentInfos segmentInfos.add(newLucene<SegmentInfo>(directory, format, input)); // in old format the version number may be at the end of the file if (format >= 0) { if (input->getFilePointer() >= input->length()) version = MiscUtils::currentTimeMillis(); // old file format without version number else input->readLong(); // read version } if (format <= FORMAT_USER_DATA) { if (format <= FORMAT_DIAGNOSTICS) userData = input->readStringStringMap(); else if (input->readByte() != 0) { if (!singletonUserData) singletonUserData = MapStringString::newInstance(); singletonUserData[String(L"userData")] = input->readString(); userData = singletonUserData; } else userData.clear(); } else userData.clear(); if (format <= FORMAT_CHECKSUM) { int64_t checksumNow = input->getChecksum(); int64_t checksumThen = input->readLong(); if (checksumNow != checksumThen) boost::throw_exception(CorruptIndexException(L"Checksum mismatch in segments file")); } success = true; } catch (LuceneException& e) { finally = e; } input->close(); // clear any segment infos we had loaded so we have a clean slate on retry if (!success) segmentInfos.clear(); finally.throwException(); }
void DocInverterPerField::processFields(Collection<FieldablePtr> fields, int32_t count) { fieldState->reset(docState->doc->getBoost()); int32_t maxFieldLength = docState->maxFieldLength; bool doInvert = consumer->start(fields, count); DocumentsWriterPtr docWriter(docState->_docWriter); DocInverterPerThreadPtr perThread(_perThread); for (int32_t i = 0; i < count; ++i) { FieldablePtr field = fields[i]; if (field->isIndexed() && doInvert) { bool anyToken; if (fieldState->length > 0) fieldState->position += docState->analyzer->getPositionIncrementGap(fieldInfo->name); if (!field->isTokenized()) { // un-tokenized field String stringValue(field->stringValue()); int32_t valueLength = (int32_t)stringValue.length(); perThread->singleToken->reinit(stringValue, 0, valueLength); fieldState->attributeSource = perThread->singleToken; consumer->start(field); bool success = false; LuceneException finally; try { consumer->add(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) docWriter->setAborting(); finally.throwException(); fieldState->offset += valueLength; ++fieldState->length; ++fieldState->position; anyToken = (valueLength > 0); } else { // tokenized field TokenStreamPtr stream; TokenStreamPtr streamValue(field->tokenStreamValue()); if (streamValue) stream = streamValue; else { // the field does not have a TokenStream, so we have to obtain one from the analyzer ReaderPtr reader; // find or make Reader ReaderPtr readerValue(field->readerValue()); if (readerValue) reader = readerValue; else { String stringValue(field->stringValue()); perThread->stringReader->init(stringValue); reader = perThread->stringReader; } // Tokenize field and add to postingTable stream = docState->analyzer->reusableTokenStream(fieldInfo->name, reader); } // reset the TokenStream to the first token stream->reset(); int32_t startLength = fieldState->length; LuceneException finally; try { int32_t offsetEnd = fieldState->offset - 1; bool hasMoreTokens = stream->incrementToken(); fieldState->attributeSource = stream; OffsetAttributePtr offsetAttribute(fieldState->attributeSource->addAttribute<OffsetAttribute>()); PositionIncrementAttributePtr posIncrAttribute(fieldState->attributeSource->addAttribute<PositionIncrementAttribute>()); consumer->start(field); while (true) { // If we hit an exception in stream.next below (which is fairly common, eg if analyzer // chokes on a given document), then it's non-aborting and (above) this one document // will be marked as deleted, but still consume a docID if (!hasMoreTokens) break; int32_t posIncr = posIncrAttribute->getPositionIncrement(); fieldState->position += posIncr; if (fieldState->position > 0) --fieldState->position; if (posIncr == 0) ++fieldState->numOverlap; bool success = false; try { // If we hit an exception in here, we abort all buffered documents since the last // flush, on the likelihood that the internal state of the consumer is now corrupt // and should not be flushed to a new segment consumer->add(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) docWriter->setAborting(); finally.throwException(); ++fieldState->position; offsetEnd = fieldState->offset + offsetAttribute->endOffset(); if (++fieldState->length >= maxFieldLength) { if (docState->infoStream) *docState->infoStream << L"maxFieldLength " << StringUtils::toString(maxFieldLength) << L" reached for field " << fieldInfo->name << L", ignoring following tokens\n"; break; } hasMoreTokens = stream->incrementToken(); } // trigger streams to perform end-of-stream operations stream->end(); fieldState->offset += offsetAttribute->endOffset(); anyToken = (fieldState->length > startLength); } catch (LuceneException& e) { finally = e; } stream->close(); finally.throwException(); } if (anyToken) fieldState->offset += docState->analyzer->getOffsetGap(field); fieldState->boost *= field->getBoost(); } // don't hang onto the field fields[i].reset(); } consumer->finish(); endConsumer->finish(); }
TermInfosReader::TermInfosReader(DirectoryPtr dir, const String& seg, FieldInfosPtr fis, int32_t readBufferSize, int32_t indexDivisor) { bool success = false; if (indexDivisor < 1 && indexDivisor != -1) boost::throw_exception(IllegalArgumentException(L"indexDivisor must be -1 (don't load terms index) or greater than 0: got " + StringUtils::toString(indexDivisor))); LuceneException finally; try { directory = dir; segment = seg; fieldInfos = fis; origEnum = newLucene<SegmentTermEnum>(directory->openInput(segment + L"." + IndexFileNames::TERMS_EXTENSION(), readBufferSize), fieldInfos, false); _size = origEnum->size; if (indexDivisor != -1) { // Load terms index totalIndexInterval = origEnum->indexInterval * indexDivisor; SegmentTermEnumPtr indexEnum(newLucene<SegmentTermEnum>(directory->openInput(segment + L"." + IndexFileNames::TERMS_INDEX_EXTENSION(), readBufferSize), fieldInfos, true)); try { int32_t indexSize = 1 + ((int32_t)indexEnum->size - 1) / indexDivisor; // otherwise read index indexTerms = Collection<TermPtr>::newInstance(indexSize); indexInfos = Collection<TermInfoPtr>::newInstance(indexSize); indexPointers = Collection<int64_t>::newInstance(indexSize); for (int32_t i = 0; indexEnum->next(); ++i) { indexTerms[i] = indexEnum->term(); indexInfos[i] = indexEnum->termInfo(); indexPointers[i] = indexEnum->indexPointer; for (int32_t j = 1; j < indexDivisor; ++j) { if (!indexEnum->next()) break; } } } catch (LuceneException& e) { finally = e; } indexEnum->close(); } else { // Do not load terms index totalIndexInterval = -1; } success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. // In this case, we want to explicitly close any subset of things that were opened. if (!success) close(); finally.throwException(); }