/// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it. void checkAgainstRAMDirectory() { StringStream fooField; StringStream termField; // add up to 250 terms to field "foo" int32_t fieldCount = random->nextInt(250) + 1; for (int32_t i = 0; i < fieldCount; ++i) { fooField << L" " << randomTerm(); } // add up to 250 terms to field "foo" int32_t termCount = random->nextInt(250) + 1; for (int32_t i = 0; i < termCount; ++i) { termField << L" " << randomTerm(); } RAMDirectoryPtr ramdir = newLucene<RAMDirectory>(); AnalyzerPtr analyzer = randomAnalyzer(); IndexWriterPtr writer = newLucene<IndexWriter>(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene<Document>(); FieldPtr field1 = newLucene<Field>(L"foo", fooField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); FieldPtr field2 = newLucene<Field>(L"term", termField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); doc->add(field1); doc->add(field2); writer->addDocument(doc); writer->close(); MemoryIndexPtr memory = newLucene<MemoryIndex>(); memory->addField(L"foo", fooField.str(), analyzer); memory->addField(L"term", termField.str(), analyzer); checkAllQueries(memory, ramdir, analyzer); }
/// Return a random unicode term, like StressIndexingTest. String randomString() { int32_t end = random->nextInt(20); if (buffer.size() < 1 + end) { buffer.resize((int32_t)((double)(1 + end) * 1.25)); } for (int32_t i = 0; i < end; ++i) { int32_t t = random->nextInt(5); if (t == 0 && i < end - 1) { #ifdef LPP_UNICODE_CHAR_SIZE_2 // Make a surrogate pair // High surrogate buffer[i++] = (wchar_t)nextInt(0xd800, 0xdc00); // Low surrogate buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); #else buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); #endif } else if (t <= 1) { buffer[i] = (wchar_t)nextInt(0x01, 0x80); } else if (t == 2) { buffer[i] = (wchar_t)nextInt(0x80, 0x800); } else if (t == 3) { buffer[i] = (wchar_t)nextInt(0x800, 0xd800); } else if (t == 4) { buffer[i] = (wchar_t)nextInt(0xe000, 0xfff0); } } return String(buffer.get(), end); }
void testSorting(int32_t precisionStep) { RandomPtr rnd = newLucene<Random>(); String field = L"field" + StringUtils::toString(precisionStep); // 10 random tests, the index order is ascending, so using a reverse sort field should return descending documents for (int32_t i = 0; i < 10; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; int32_t upper = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; if (lower > upper) { std::swap(lower, upper); } QueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TopDocsPtr topDocs = searcher->search(tq, FilterPtr(), noDocs, newLucene<Sort>(newLucene<SortField>(field, SortField::INT, true))); if (topDocs->totalHits == 0) { continue; } Collection<ScoreDocPtr> sd = topDocs->scoreDocs; EXPECT_TRUE(sd); int32_t last = StringUtils::toInt(searcher->doc(sd[0]->doc)->get(field)); for (int32_t j = 1; j < sd.size(); ++j) { int32_t act = StringUtils::toInt(searcher->doc(sd[j]->doc)->get(field)); EXPECT_TRUE(last > act); last = act; } } }
void testRangeSplit(int32_t precisionStep) { RandomPtr rnd = newLucene<Random>(); String field = L"ascfield" + StringUtils::toString(precisionStep); // 50 random tests for (int32_t i = 0; i < 50; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs - noDocs / 2.0); int32_t upper = (int32_t)(rnd->nextDouble() * noDocs - noDocs / 2.0); if (lower > upper) { std::swap(lower, upper); } // test inclusive range QueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); EXPECT_EQ(upper - lower + 1, tTopDocs->totalHits); // test exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, false); tTopDocs = searcher->search(tq, 1); EXPECT_EQ(std::max(upper - lower - 1, (int32_t)0), tTopDocs->totalHits); // test left exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, true); tTopDocs = searcher->search(tq, 1); EXPECT_EQ(upper - lower, tTopDocs->totalHits); // test right exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, false); tTopDocs = searcher->search(tq, 1); EXPECT_EQ(upper - lower, tTopDocs->totalHits); } }
/// half of the time, returns a random term from TEST_TERMS. /// the other half of the time, returns a random unicode string. String randomTerm() { if (random->nextInt() % 2 == 1) { // return a random TEST_TERM return TEST_TERMS[random->nextInt(TEST_TERMS.size())]; } else { // return a random unicode term return randomString(); } }
String randomToken() { static const wchar_t* alphabet = L"abcdefghijklmnopqrstuvwxyz"; int32_t tl = 1 + rndToken->nextInt(7); StringStream sb; for (int32_t cx = 0; cx < tl; ++cx) sb << alphabet[rndToken->nextInt(25)]; return sb.str(); }
void testRandomTrieAndClassicRangeQuery(int32_t precisionStep) { RandomPtr rnd = newLucene<Random>(); String field = L"field" + StringUtils::toString(precisionStep); int32_t termCountT = 0; int32_t termCountC = 0; for (int32_t i = 0; i < 50; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; int32_t upper = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; if (lower > upper) { std::swap(lower, upper); } // test inclusive range NumericRangeQueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TermRangeQueryPtr cq = newLucene<TermRangeQuery>(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); TopDocsPtr cTopDocs = searcher->search(cq, 1); EXPECT_EQ(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, false); cq = newLucene<TermRangeQuery>(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), false, false); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); EXPECT_EQ(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test left exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, true); cq = newLucene<TermRangeQuery>(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), false, true); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); EXPECT_EQ(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test right exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, false); cq = newLucene<TermRangeQuery>(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), true, false); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); EXPECT_EQ(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); } if (precisionStep == INT_MAX) { EXPECT_EQ(termCountT, termCountC); } }
String randomField() { int32_t fl = 1 + rndToken->nextInt(3); StringStream fb; for (int32_t fx = 0; fx < fl; ++fx) fb << randomToken() << L" "; return fb.str(); }
void runReadBytes(IndexInputPtr input, int32_t bufferSize) { int32_t pos = 0; RandomPtr random = newLucene<Random>(); // gradually increasing size for (int32_t size = 1; size < bufferSize * 10; size = size + size / 200 + 1) { checkReadBytes(input, size, pos); pos += size; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } // wildly fluctuating size for (int64_t i = 0; i < 1000; ++i) { int32_t size = random->nextInt(10000); checkReadBytes(input, 1 + size, pos); pos += 1 + size; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } // constant small size (7 bytes) for (int32_t i = 0; i < bufferSize; ++i) { checkReadBytes(input, 7, pos); pos += 7; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } }
AnalyzerPtr randomAnalyzer() { switch (random->nextInt(3)) { case 0: return newLucene<SimpleAnalyzer>(); case 1: return newLucene<StopAnalyzer>(LuceneVersion::LUCENE_CURRENT); default: return newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT); } }
virtual bool testPoint(const String& name) { if (random->nextInt(4) == 2) LuceneThread::threadYield(); return true; }
/// start is inclusive and end is exclusive int32_t nextInt(int32_t start, int32_t end) { return start + random->nextInt(end - start); }
String getTempDir(const String& desc) { return FileUtils::joinPath(getTempDir(), desc + L"." + StringUtils::toString(randomTest->nextInt())); }
namespace Lucene { static RandomPtr randomTest = newLucene<Random>(); static String testDir; void setTestDir(const String& dir) { testDir = dir; } String getTestDir() { if (testDir.empty()) { boost::throw_exception(RuntimeException(L"test directory not set")); } return testDir; } String getTempDir() { static String tempDir; if (tempDir.empty()) { tempDir = FileUtils::joinPath(getTestDir(), L"temp"); FileUtils::createDirectory(tempDir); } return tempDir; } String getTempDir(const String& desc) { return FileUtils::joinPath(getTempDir(), desc + L"." + StringUtils::toString(randomTest->nextInt())); } void syncConcurrentMerges(const IndexWriterPtr& writer) { syncConcurrentMerges(writer->getMergeScheduler()); } void syncConcurrentMerges(const MergeSchedulerPtr& ms) { if (MiscUtils::typeOf<ConcurrentMergeScheduler>(ms)) { boost::dynamic_pointer_cast<ConcurrentMergeScheduler>(ms)->sync(); } } String intToEnglish(int32_t i) { String english(_intToEnglish(i)); boost::trim(english); return english; } String _intToEnglish(int32_t i) { String english; if (i == 0) { return L"zero"; } if (i < 0) { english += L"minus "; i = -i; } if (i >= 1000000000) { // billions english += _intToEnglish(i / 1000000000); english += L"billion, "; i = i % 1000000000; } if (i >= 1000000) { // millions english += _intToEnglish(i / 1000000); english += L"million, "; i = i % 1000000; } if (i >= 1000) { // thousands english += _intToEnglish(i / 1000); english += L"thousand, "; i = i % 1000; } if (i >= 100) { // hundreds english += _intToEnglish(i / 100); english += L"hundred "; i = i % 100; } if (i >= 20) { switch (i/10) { case 9: english += L"ninety"; break; case 8: english += L"eighty"; break; case 7: english += L"seventy"; break; case 6: english += L"sixty"; break; case 5: english += L"fifty"; break; case 4: english += L"forty"; break; case 3: english += L"thirty"; break; case 2: english += L"twenty"; break; } i = i % 10; english += i == 0 ? L" " : L"-"; } switch (i) { case 19: english += L"nineteen "; break; case 18: english += L"eighteen "; break; case 17: english += L"seventeen "; break; case 16: english += L"sixteen "; break; case 15: english += L"fifteen "; break; case 14: english += L"fourteen "; break; case 13: english += L"thirteen "; break; case 12: english += L"twelve "; break; case 11: english += L"eleven "; break; case 10: english += L"ten "; break; case 9: english += L"nine "; break; case 8: english += L"eight "; break; case 7: english += L"seven "; break; case 6: english += L"six "; break; case 5: english += L"five "; break; case 4: english += L"four "; break; case 3: english += L"three "; break; case 2: english += L"two "; break; case 1: english += L"one "; break; } return english; } bool checkIndex(const DirectoryPtr& dir) { CheckIndexPtr checker = newLucene<CheckIndex>(dir); IndexStatusPtr indexStatus = checker->checkIndex(); if (!indexStatus || !indexStatus->clean) { boost::throw_exception(RuntimeException(L"CheckIndex failed")); return false; } return true; } }