void IndexContentTestCase::testIndexContent_DL() { Index* pIndex; IndexReaderPtr pReader; const Term* pTerm; TermIteratorPtr pTermIter; int docCount = 0; int termCount = 0; uint32_t i; uint32_t indexTermId; string fileName; //Check posting list Path indexPath = TestHelper::getTestDataPath(); indexPath.makeDirectory(); indexPath.pushDirectory(_T("test_dlindex")); pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL); auto_ptr<Index> indexPtr(pIndex); pReader = pIndex->acquireReader(); TermReaderPtr pTermReader = pReader->termReader(); pTermIter = pTermReader->termIterator("BODY"); StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader(); //Iterator all terms while(pTermIter->next()) { pTerm = pTermIter->term(); CPPUNIT_ASSERT(pTermReader->seek(pTerm)); indexTermId = (pTerm->cast<int32_t>())->getValue(); docCount = 0; TermPostingIteratorPtr pTermDocFreqs = pTermReader->termPostings(); while(pTermDocFreqs->nextDoc()) { DocumentPtr pDoc = pDocReader->document(pTermDocFreqs->doc()); docCount++; // 获取文件路径 fileName.assign(pDoc->getField("PATH")->getValue().c_str()); TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName); CPPUNIT_ASSERT(pTermIdList != NULL); for(i = 0, termCount = 0; i < pTermIdList->getSize(); i++) { if(indexTermId == pTermIdList->getValue(i)) { termCount++; } } CPPUNIT_ASSERT_EQUAL((tf_t)termCount, pTermDocFreqs->freq()); }//end while nextDoc() CPPUNIT_ASSERT_EQUAL((df_t)docCount, pTermDocFreqs->getDocFreq()); } CPPUNIT_ASSERT(m_pDocScanner->getTotalTermCount() == pReader->getNumTerms()); }
static bool verifyIndex(DirectoryPtr directory, int32_t startAt) { bool fail = false; IndexReaderPtr reader = IndexReader::open(directory, true); int32_t max = reader->maxDoc(); for (int32_t i = 0; i < max; ++i) { DocumentPtr temp = reader->document(i); if (temp->getField(L"count")->stringValue() != StringUtils::toString(i + startAt)) fail = true; } reader->close(); return fail; }
void IndexContentTestCase::testIndexContent_WL() { Index* pIndex; IndexReaderPtr pReader; const Term* pTerm; TermIteratorPtr pTermIter; int docCount = 0; int termCount = 0; int pos = -1; uint32_t indexTermId; string fileName; //Check posting list Path indexPath = TestHelper::getTestDataPath(); indexPath.makeDirectory(); indexPath.pushDirectory(_T("test_wlindex")); pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL); auto_ptr<Index> indexPtr(pIndex); CPPUNIT_ASSERT(pIndex != NULL); pReader = pIndex->acquireReader(); TermReaderPtr pTermReader = pReader->termReader(); pTermIter = pTermReader->termIterator("BODY"); StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader(); //Iterator all terms while(pTermIter->next()) { pTerm = pTermIter->term(); CPPUNIT_ASSERT(pTermReader->seek(pTerm)); indexTermId = (pTerm->cast<int32_t>())->getValue(); TermPositionIteratorPtr pPositions = pTermReader->termPositions(); docCount = 0; while(pPositions->nextDoc()) { DocumentPtr pDoc = pDocReader->document(pPositions->doc()); docCount++; fileName.assign(pDoc->getField("PATH")->getValue().c_str()); TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName); CPPUNIT_ASSERT(pTermIdList != NULL); pos = pPositions->nextPosition(); termCount = 0; while(pos != -1) { termCount++; CPPUNIT_ASSERT(indexTermId == pTermIdList->getValue(pos)); pos = pPositions->nextPosition(); } CPPUNIT_ASSERT(termCount == pPositions->freq()); }//end while nextDoc() CPPUNIT_ASSERT(docCount == pPositions->getDocFreq()); } CPPUNIT_ASSERT_EQUAL((int64_t)m_pDocScanner->getTotalTermCount(), (int64_t)pReader->getNumTerms()); }