void IndexContentTestCase::testIndexContent_DL() { Index* pIndex; IndexReaderPtr pReader; const Term* pTerm; TermIteratorPtr pTermIter; int docCount = 0; int termCount = 0; uint32_t i; uint32_t indexTermId; string fileName; //Check posting list Path indexPath = TestHelper::getTestDataPath(); indexPath.makeDirectory(); indexPath.pushDirectory(_T("test_dlindex")); pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL); auto_ptr<Index> indexPtr(pIndex); pReader = pIndex->acquireReader(); TermReaderPtr pTermReader = pReader->termReader(); pTermIter = pTermReader->termIterator("BODY"); StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader(); //Iterator all terms while(pTermIter->next()) { pTerm = pTermIter->term(); CPPUNIT_ASSERT(pTermReader->seek(pTerm)); indexTermId = (pTerm->cast<int32_t>())->getValue(); docCount = 0; TermPostingIteratorPtr pTermDocFreqs = pTermReader->termPostings(); while(pTermDocFreqs->nextDoc()) { DocumentPtr pDoc = pDocReader->document(pTermDocFreqs->doc()); docCount++; // 获取文件路径 fileName.assign(pDoc->getField("PATH")->getValue().c_str()); TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName); CPPUNIT_ASSERT(pTermIdList != NULL); for(i = 0, termCount = 0; i < pTermIdList->getSize(); i++) { if(indexTermId == pTermIdList->getValue(i)) { termCount++; } } CPPUNIT_ASSERT_EQUAL((tf_t)termCount, pTermDocFreqs->freq()); }//end while nextDoc() CPPUNIT_ASSERT_EQUAL((df_t)docCount, pTermDocFreqs->getDocFreq()); } CPPUNIT_ASSERT(m_pDocScanner->getTotalTermCount() == pReader->getNumTerms()); }
void QueryResult::init(const FieldSelector& selector, const IndexReaderPtr& pIndexReader, const QueryHits& hits) { StoredFieldsReaderPtr pStoredFieldsReader = pIndexReader->createStoredFieldsReader(); if (pStoredFieldsReader.isNull()) { return; } TimeProbe probe; probe.start(); m_docs.reserve(hits.size()); setTracer(hits.getTracer()); setTotalHits(hits.getTotalHits()); QueryHits::Iterator it = hits.iterator(); while (it.hasNext()) { const QueryHits::HitDoc& hitDoc = it.next(); ResultDocPtr pResDoc(new ResultDoc(hitDoc.getDocId(), hitDoc.getScore(), selector.size())); addDoc(pResDoc); pStoredFieldsReader->getDocument(selector, *pResDoc); } probe.stop(); FX_QUERY_TRACE(INFO, getTracer(), "fetch field time [%d] ms", (int32_t)probe.elapsed() / 1000); }
void IndexTestCase::testTextIndex() { DocumentSchema schema; schema.addUnIndexedField("PATH"); schema.addTextField("CONTENT"); buildIndex(schema, "file1.txt, hello world."); tstring str = getTestPath(); Index index; index.open(str, Index::READ, NULL); IndexReaderPtr pReader = index.acquireReader(); CPPUNIT_ASSERT(pReader != NULL); TermReaderPtr pTermReader = pReader->termReader(); CPPUNIT_ASSERT(pTermReader); TermIteratorPtr pTermIterator = pTermReader->termIterator("CONTENT"); CPPUNIT_ASSERT(pTermIterator != NULL); while (pTermIterator->hasNext()) { TermIterator::TermEntry entry = pTermIterator->next(); const TermMeta& termMeta = entry.postingIterator->getTermMeta(); CPPUNIT_ASSERT_EQUAL((df_t)1, termMeta.getDocFreq()); CPPUNIT_ASSERT_EQUAL((ctf_t)1, termMeta.getCTF()); } Term term("CONTENT", "hello"); TermPostingIteratorPtr pPost = pTermReader->seek(&term); CPPUNIT_ASSERT(pPost); docid_t docId = pPost->skipTo(0); CPPUNIT_ASSERT_EQUAL((docid_t)0, docId); docId = pPost->skipTo(++docId); CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId); StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader(); CPPUNIT_ASSERT(pDocReader); FieldSelector selector(pReader->getDocSchema(), true, false); ResultDoc resultDoc(0); bool ret = pDocReader->getDocument(selector, resultDoc); CPPUNIT_ASSERT(ret); CPPUNIT_ASSERT(resultDoc.size() > 0); }
void IndexContentTestCase::testIndexContent_WL() { Index* pIndex; IndexReaderPtr pReader; const Term* pTerm; TermIteratorPtr pTermIter; int docCount = 0; int termCount = 0; int pos = -1; uint32_t indexTermId; string fileName; //Check posting list Path indexPath = TestHelper::getTestDataPath(); indexPath.makeDirectory(); indexPath.pushDirectory(_T("test_wlindex")); pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL); auto_ptr<Index> indexPtr(pIndex); CPPUNIT_ASSERT(pIndex != NULL); pReader = pIndex->acquireReader(); TermReaderPtr pTermReader = pReader->termReader(); pTermIter = pTermReader->termIterator("BODY"); StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader(); //Iterator all terms while(pTermIter->next()) { pTerm = pTermIter->term(); CPPUNIT_ASSERT(pTermReader->seek(pTerm)); indexTermId = (pTerm->cast<int32_t>())->getValue(); TermPositionIteratorPtr pPositions = pTermReader->termPositions(); docCount = 0; while(pPositions->nextDoc()) { DocumentPtr pDoc = pDocReader->document(pPositions->doc()); docCount++; fileName.assign(pDoc->getField("PATH")->getValue().c_str()); TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName); CPPUNIT_ASSERT(pTermIdList != NULL); pos = pPositions->nextPosition(); termCount = 0; while(pos != -1) { termCount++; CPPUNIT_ASSERT(indexTermId == pTermIdList->getValue(pos)); pos = pPositions->nextPosition(); } CPPUNIT_ASSERT(termCount == pPositions->freq()); }//end while nextDoc() CPPUNIT_ASSERT(docCount == pPositions->getDocFreq()); } CPPUNIT_ASSERT_EQUAL((int64_t)m_pDocScanner->getTotalTermCount(), (int64_t)pReader->getNumTerms()); }