void verifyIndex(const DirectoryPtr& dir) {
     IndexReaderPtr ir = IndexReader::open(dir, false);
     verifyIndex(ir);
     ir->close();
 }
Exemplo n.º 2
0
void RPCSearchService::handleQuery(std::string& sResult, const Statement& state) const
{
    IndexReaderPtr pIndexReader = m_searchRes.getIndexReader();
    FIRTEX_ASSERT2(pIndexReader.isNotNull());

    try
    {
        TimeProbe probe;
        probe.start();

        QueryParser parser(pIndexReader->getAnalyzerMapper(),
                           m_searchRes.getDefaultField());

        IndexSearcher searcher(pIndexReader);
        QueryHitsPtr pHits = searcher.search(state, parser);
        QueryResult result;

        if (pHits.isNotNull())
        {
            FieldSelectClausePtr pFieldClause = state.getFieldSelectClause();
            QueryClausePtr pQueryClause = state.getQueryClause();
            if (pFieldClause.isNotNull() && pQueryClause.isNotNull())
            {
                QueryPtr pQuery = parser.parse(pQueryClause->getQueryString());
                FIRTEX_ASSERT2(pQuery.isNotNull());

                FieldSelector selector(pIndexReader->getDocSchema());
                
                for (size_t i = 0; i < pFieldClause->getFieldCount(); ++i)
                {
                    const FieldSelectClause::SnippetParam& param = 
                        pFieldClause->getField(i);
                    FieldFilterPtr pFieldFilter;
                    if (param.snippet)
                    {
                        SnippetGenerator* pSnippetGen = new SnippetGenerator();
                        pFieldFilter.reset(pSnippetGen);
                        
                        if (!pSnippetGen->init(pQuery, parser.getAnalyzerMapper(), 
                                        param.field, param.preTag, param.postTag,
                                        param.separator))
                        {
                            FX_LOG(ERROR, "Init snippet generator for field: [%s] FAILED", param.field.c_str());
                            formatErrorMessage(sResult, "Init snippet generator for field: "
                                    + param.field + " FAILED");
                        }                        
                    }

                    if (!selector.addField(param.field, pFieldFilter))
                    {
                        FX_LOG(ERROR, "Invalid field: [%s]", param.field.c_str());
                    }
                }
                result.init(selector, pIndexReader, *pHits);
            }
            else
            {
                result.init(pIndexReader, *pHits);
            }
        }

        probe.stop();
        result.setTimeCost(probe.elapsed() / 1000);

        FX_QUERY_TRACE(INFO, result.getTracer(), "search phase time [%d] ms",
                       (int32_t)result.getTimeCost());

        std::stringstream ss;
        XMLResultFormatter formatter;
        formatter.format(result, ss);
        sResult = ss.str();
    }
    catch(const FirteXException& e)
    {
        stringstream ss;
        state.toString(ss);
        FX_LOG(ERROR, "Handle request FAILED: [%s], reason: [%s]",
               ss.str().c_str(), e.what().c_str());
        formatErrorMessage(sResult, "Handle request failed");
    }
}
Exemplo n.º 3
0
void IndexTestCase::testDocumentDeletion()
{
    DocumentSchema schema;
    schema.addField("URL", "PRIMARY_KEY", true);
    schema.addTextField("BODY");
    schema.addField("MODIFIED", "INT64", true);
    
    stringstream ss1;
    const size_t NUM_DOCS = 1000;
    size_t i = 0;
    for (; i < NUM_DOCS; ++i)
    {
        ss1 << "url" << i << ", body" << i << " hot," 
            << (i * 100) % 1000 << ";";
    }
    buildIndex(schema, ss1.str());

    stringstream ss2;
    for (; i < 2 * NUM_DOCS; ++i)
    {
        ss2 << "url" << i << ", body" << i << " hot," 
            << (i * 100) % 1000 << ";";
    }

    buildIndex(schema, ss2.str(), true);

    StandardAnalyzerPtr sa(new StandardAnalyzer());
    sa->init();

    TokenViewPtr pTokens = sa->tokenize("hot", 3);
    CPPUNIT_ASSERT(pTokens.isNull() != true);
    CPPUNIT_ASSERT(pTokens->getNumTokens() == 1);
    TokenView::Iterator it = pTokens->iterator();
    TermPtr pTerm(new Term("BODY", it.next().getTextValue()));
    
    tstring str = getTestPath();
    
    std::set<docid_t> answer;

    {
        Index index;
        index.open(str, Index::RDWR, NULL); 
        IndexWriterPtr pIndexWriter = index.acquireWriter();
        CPPUNIT_ASSERT(pIndexWriter != NULL);

        IndexReaderPtr pIndexReader = index.acquireReader();
        CPPUNIT_ASSERT(pIndexReader != NULL);

        for (size_t i = 0; i < 2 * NUM_DOCS; ++i)
        {
            stringstream ss;
            ss << "url" << i;
            if (i == 1000 || i == 1500 || i == 1505 || i == 1999)
            {
                pIndexWriter->deleteDocument(ss.str());
            }
            else
            {
                TermReaderPtr pTermReader = pIndexReader->termReader();
                TermPtr pTerm(new Term("URL", ss.str()));
                TermPostingIteratorPtr pIt = pTermReader->seek(pTerm.get());
                docid_t docId = pIt->skipTo(0);
                answer.insert(docId);
            }
        }

        TermReaderPtr pTermReader = pIndexReader->termReader();
        TermPostingIteratorPtr pDocFreqs = pTermReader->seek(pTerm.get());
        CPPUNIT_ASSERT(!pDocFreqs.isNull());

        CPPUNIT_ASSERT_EQUAL((df_t)NUM_DOCS * 2, pDocFreqs->getTermMeta().getDocFreq());

        std::set<docid_t>::const_iterator it = answer.begin();
        for (docid_t i = 0; i < (docid_t)(2 * NUM_DOCS); )
        {        
            docid_t docId = pDocFreqs->skipTo((docid_t)i);
            i = docId + 1;
            if (docId == INVALID_DOCID)
            {
                break;
            }
            CPPUNIT_ASSERT_EQUAL(*it, docId);
            ++it;
        }
        CPPUNIT_ASSERT(it == answer.end());
    }

    {
        Index index;
        index.open(str, Index::READ, NULL); 
        IndexReaderPtr pIndexReader = index.acquireReader();
        CPPUNIT_ASSERT(pIndexReader != NULL);

        TermReaderPtr pTermReader = pIndexReader->termReader();
        TermPostingIteratorPtr pDocFreqs = pTermReader->seek(pTerm.get());
        CPPUNIT_ASSERT(!pDocFreqs.isNull());

        CPPUNIT_ASSERT_EQUAL((df_t)(2 * NUM_DOCS), pDocFreqs->getTermMeta().getDocFreq());
        std::set<docid_t>::const_iterator it = answer.begin();
        for (docid_t i = 0; i < (docid_t)(2 * NUM_DOCS); )
        {        
            docid_t docId = pDocFreqs->skipTo((docid_t)i);
            i = docId + 1;
            if (docId == INVALID_DOCID)
            {
                break;
            }
            CPPUNIT_ASSERT_EQUAL(*it, docId);
            ++it;
        }
        CPPUNIT_ASSERT(it == answer.end());

        // for (std::set<docid_t>::const_iterator it = answer.begin();
        //      it != answer.end(); ++it)
        // {
        //     docid_t docId = pDocFreqs->skipTo(*it);
        //     CPPUNIT_ASSERT_EQUAL(*it, docId);
        // }

        // docid_t docId = pDocFreqs->skipTo(NUM_DOCS + 0);
        // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 1, docId);
        // docId = pDocFreqs->skipTo(NUM_DOCS + 500);
        // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 501, docId);
        // docId = pDocFreqs->skipTo(NUM_DOCS + 505);
        // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 506, docId);
        // docId = pDocFreqs->skipTo(2 * NUM_DOCS - 1);
        // CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);
    }
}