Exemple #1
0
void IndexWriter::addDocument(const DocumentPtr& pDocument)
{
    FX_DEBUG("Add one document");
    pDocument->setAction(Document::AT_ADD);
    m_pDocConsumer->consume(pDocument);
    ++m_nOpCountSinceLastCommit;

    if (autoCommit())
    {
        // Perform commit automatically
        commit();
    }
}
void TrecDocumentProcessor::doProcessFile(const string& sFilePath, 
        DocumentSource& docSource)
{
    FX_DEBUG("Process file: [%s]", sFilePath.c_str());
    if (!docSource.toBeContinued())
    {
        // a new raw document
        if (!initDocumentReader(sFilePath))
        {
            FX_LOG(ERROR, "Skip documents: init document reader for [%s] FAILED.", 
                   sFilePath.c_str());
            return;
        }
    }

    if (m_pDocReader.isNull())
    {
        FX_LOG(WARN, "Skip document: document reader for [%s] FAILED did not initialize.", 
               sFilePath.c_str());
        return;
    }

    DocumentPtr pDoc = docSource.acquireDocument("trec");
    pDoc->setAction(Document::AT_ADD);

    const char* pReadUpTo = m_pBuffer + m_nReadUpTo;
    while (!m_pDocReader->isEof() || m_nReadUpTo < m_nBytesInBuffer)
    {
        bool bNotEmty = false;
        const char* pDocEnd = NULL;
        size_t nDocLen = processDocTag(pReadUpTo, pDocEnd, m_tagConsumer.getDocTag());
        if (nDocLen == 0)
        {
            if (!(pReadUpTo = ensureBuffer(pReadUpTo)))
            {
                docSource.setToBeContinued(false);
                m_pDocReader.reset();
                break;
            }
            m_nReadUpTo = (pReadUpTo - m_pBuffer);
            continue;
        }

        m_tagConsumer.reset(nDocLen);
    
        while (m_tagConsumer.hasNext())
        {
            if (m_tagConsumer.consumeNext(pReadUpTo, pDocEnd, pDoc))
            {
                bNotEmty = true;
            }
        }

        skipBlank(pDocEnd);

        m_nReadUpTo = pDocEnd - m_pBuffer;

        if (!bNotEmty)
        {
            FX_LOG(INFO, "Get empty document");
            continue;
        }

        if (m_nReadUpTo >= m_nBytesInBuffer && m_pDocReader->isEof())
        {
            FX_DEBUG("Completed the processing of  file: [%s], "
                     "read to: [%u], in buffer size: [%u], isEof[%d]",
                     sFilePath.c_str(), m_nReadUpTo, m_nBytesInBuffer, m_pDocReader->isEof());
            docSource.setToBeContinued(false);
            m_pDocReader.reset();
        }
        else 
        {
            FX_DEBUG("The processing of  file: [%s] need to be continued, "
                     "read to: [%u], in buffer size: [%u], isEof[%d]",
                     sFilePath.c_str(), m_nReadUpTo, m_nBytesInBuffer, m_pDocReader->isEof());

            docSource.setToBeContinued(true);
        }
        return;
    } // end while 
}