void IndexWriter::addDocument(const DocumentPtr& pDocument) { FX_DEBUG("Add one document"); pDocument->setAction(Document::AT_ADD); m_pDocConsumer->consume(pDocument); ++m_nOpCountSinceLastCommit; if (autoCommit()) { // Perform commit automatically commit(); } }
void TrecDocumentProcessor::doProcessFile(const string& sFilePath, DocumentSource& docSource) { FX_DEBUG("Process file: [%s]", sFilePath.c_str()); if (!docSource.toBeContinued()) { // a new raw document if (!initDocumentReader(sFilePath)) { FX_LOG(ERROR, "Skip documents: init document reader for [%s] FAILED.", sFilePath.c_str()); return; } } if (m_pDocReader.isNull()) { FX_LOG(WARN, "Skip document: document reader for [%s] FAILED did not initialize.", sFilePath.c_str()); return; } DocumentPtr pDoc = docSource.acquireDocument("trec"); pDoc->setAction(Document::AT_ADD); const char* pReadUpTo = m_pBuffer + m_nReadUpTo; while (!m_pDocReader->isEof() || m_nReadUpTo < m_nBytesInBuffer) { bool bNotEmty = false; const char* pDocEnd = NULL; size_t nDocLen = processDocTag(pReadUpTo, pDocEnd, m_tagConsumer.getDocTag()); if (nDocLen == 0) { if (!(pReadUpTo = ensureBuffer(pReadUpTo))) { docSource.setToBeContinued(false); m_pDocReader.reset(); break; } m_nReadUpTo = (pReadUpTo - m_pBuffer); continue; } m_tagConsumer.reset(nDocLen); while (m_tagConsumer.hasNext()) { if (m_tagConsumer.consumeNext(pReadUpTo, pDocEnd, pDoc)) { bNotEmty = true; } } skipBlank(pDocEnd); m_nReadUpTo = pDocEnd - m_pBuffer; if (!bNotEmty) { FX_LOG(INFO, "Get empty document"); continue; } if (m_nReadUpTo >= m_nBytesInBuffer && m_pDocReader->isEof()) { FX_DEBUG("Completed the processing of file: [%s], " "read to: [%u], in buffer size: [%u], isEof[%d]", sFilePath.c_str(), m_nReadUpTo, m_nBytesInBuffer, m_pDocReader->isEof()); docSource.setToBeContinued(false); m_pDocReader.reset(); } else { FX_DEBUG("The processing of file: [%s] need to be continued, " "read to: [%u], in buffer size: [%u], isEof[%d]", sFilePath.c_str(), m_nReadUpTo, m_nBytesInBuffer, m_pDocReader->isEof()); docSource.setToBeContinued(true); } return; } // end while }