void verifyIndex(const DirectoryPtr& dir) { IndexReaderPtr ir = IndexReader::open(dir, false); verifyIndex(ir); ir->close(); }
void RPCSearchService::handleQuery(std::string& sResult, const Statement& state) const { IndexReaderPtr pIndexReader = m_searchRes.getIndexReader(); FIRTEX_ASSERT2(pIndexReader.isNotNull()); try { TimeProbe probe; probe.start(); QueryParser parser(pIndexReader->getAnalyzerMapper(), m_searchRes.getDefaultField()); IndexSearcher searcher(pIndexReader); QueryHitsPtr pHits = searcher.search(state, parser); QueryResult result; if (pHits.isNotNull()) { FieldSelectClausePtr pFieldClause = state.getFieldSelectClause(); QueryClausePtr pQueryClause = state.getQueryClause(); if (pFieldClause.isNotNull() && pQueryClause.isNotNull()) { QueryPtr pQuery = parser.parse(pQueryClause->getQueryString()); FIRTEX_ASSERT2(pQuery.isNotNull()); FieldSelector selector(pIndexReader->getDocSchema()); for (size_t i = 0; i < pFieldClause->getFieldCount(); ++i) { const FieldSelectClause::SnippetParam& param = pFieldClause->getField(i); FieldFilterPtr pFieldFilter; if (param.snippet) { SnippetGenerator* pSnippetGen = new SnippetGenerator(); pFieldFilter.reset(pSnippetGen); if (!pSnippetGen->init(pQuery, parser.getAnalyzerMapper(), param.field, param.preTag, param.postTag, param.separator)) { FX_LOG(ERROR, "Init snippet generator for field: [%s] FAILED", param.field.c_str()); formatErrorMessage(sResult, "Init snippet generator for field: " + param.field + " FAILED"); } } if (!selector.addField(param.field, pFieldFilter)) { FX_LOG(ERROR, "Invalid field: [%s]", param.field.c_str()); } } result.init(selector, pIndexReader, *pHits); } else { result.init(pIndexReader, *pHits); } } probe.stop(); result.setTimeCost(probe.elapsed() / 1000); FX_QUERY_TRACE(INFO, result.getTracer(), "search phase time [%d] ms", (int32_t)result.getTimeCost()); std::stringstream ss; XMLResultFormatter formatter; formatter.format(result, ss); sResult = ss.str(); } catch(const FirteXException& e) { stringstream ss; state.toString(ss); FX_LOG(ERROR, "Handle request FAILED: [%s], reason: [%s]", ss.str().c_str(), e.what().c_str()); formatErrorMessage(sResult, "Handle request failed"); } }
void IndexTestCase::testDocumentDeletion() { DocumentSchema schema; schema.addField("URL", "PRIMARY_KEY", true); schema.addTextField("BODY"); schema.addField("MODIFIED", "INT64", true); stringstream ss1; const size_t NUM_DOCS = 1000; size_t i = 0; for (; i < NUM_DOCS; ++i) { ss1 << "url" << i << ", body" << i << " hot," << (i * 100) % 1000 << ";"; } buildIndex(schema, ss1.str()); stringstream ss2; for (; i < 2 * NUM_DOCS; ++i) { ss2 << "url" << i << ", body" << i << " hot," << (i * 100) % 1000 << ";"; } buildIndex(schema, ss2.str(), true); StandardAnalyzerPtr sa(new StandardAnalyzer()); sa->init(); TokenViewPtr pTokens = sa->tokenize("hot", 3); CPPUNIT_ASSERT(pTokens.isNull() != true); CPPUNIT_ASSERT(pTokens->getNumTokens() == 1); TokenView::Iterator it = pTokens->iterator(); TermPtr pTerm(new Term("BODY", it.next().getTextValue())); tstring str = getTestPath(); std::set<docid_t> answer; { Index index; index.open(str, Index::RDWR, NULL); IndexWriterPtr pIndexWriter = index.acquireWriter(); CPPUNIT_ASSERT(pIndexWriter != NULL); IndexReaderPtr pIndexReader = index.acquireReader(); CPPUNIT_ASSERT(pIndexReader != NULL); for (size_t i = 0; i < 2 * NUM_DOCS; ++i) { stringstream ss; ss << "url" << i; if (i == 1000 || i == 1500 || i == 1505 || i == 1999) { pIndexWriter->deleteDocument(ss.str()); } else { TermReaderPtr pTermReader = pIndexReader->termReader(); TermPtr pTerm(new Term("URL", ss.str())); TermPostingIteratorPtr pIt = pTermReader->seek(pTerm.get()); docid_t docId = pIt->skipTo(0); answer.insert(docId); } } TermReaderPtr pTermReader = pIndexReader->termReader(); TermPostingIteratorPtr pDocFreqs = pTermReader->seek(pTerm.get()); CPPUNIT_ASSERT(!pDocFreqs.isNull()); CPPUNIT_ASSERT_EQUAL((df_t)NUM_DOCS * 2, pDocFreqs->getTermMeta().getDocFreq()); std::set<docid_t>::const_iterator it = answer.begin(); for (docid_t i = 0; i < (docid_t)(2 * NUM_DOCS); ) { docid_t docId = pDocFreqs->skipTo((docid_t)i); i = docId + 1; if (docId == INVALID_DOCID) { break; } CPPUNIT_ASSERT_EQUAL(*it, docId); ++it; } CPPUNIT_ASSERT(it == answer.end()); } { Index index; index.open(str, Index::READ, NULL); IndexReaderPtr pIndexReader = index.acquireReader(); CPPUNIT_ASSERT(pIndexReader != NULL); TermReaderPtr pTermReader = pIndexReader->termReader(); TermPostingIteratorPtr pDocFreqs = pTermReader->seek(pTerm.get()); CPPUNIT_ASSERT(!pDocFreqs.isNull()); CPPUNIT_ASSERT_EQUAL((df_t)(2 * NUM_DOCS), pDocFreqs->getTermMeta().getDocFreq()); std::set<docid_t>::const_iterator it = answer.begin(); for (docid_t i = 0; i < (docid_t)(2 * NUM_DOCS); ) { docid_t docId = pDocFreqs->skipTo((docid_t)i); i = docId + 1; if (docId == INVALID_DOCID) { break; } CPPUNIT_ASSERT_EQUAL(*it, docId); ++it; } CPPUNIT_ASSERT(it == answer.end()); // for (std::set<docid_t>::const_iterator it = answer.begin(); // it != answer.end(); ++it) // { // docid_t docId = pDocFreqs->skipTo(*it); // CPPUNIT_ASSERT_EQUAL(*it, docId); // } // docid_t docId = pDocFreqs->skipTo(NUM_DOCS + 0); // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 1, docId); // docId = pDocFreqs->skipTo(NUM_DOCS + 500); // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 501, docId); // docId = pDocFreqs->skipTo(NUM_DOCS + 505); // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 506, docId); // docId = pDocFreqs->skipTo(2 * NUM_DOCS - 1); // CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId); } }