Beispiel #1
0
    bool Pipeline::run(BSONObjBuilder &result, string &errmsg) {
        massert(16600, "should not have an empty pipeline",
                !sources.empty());

        /* chain together the sources we found */
        DocumentSource* prevSource = sources.front().get();
        for(SourceContainer::iterator iter(sources.begin() + 1),
                                      listEnd(sources.end());
                                    iter != listEnd;
                                    ++iter) {
            intrusive_ptr<DocumentSource> pTemp(*iter);
            pTemp->setSource(prevSource);
            prevSource = pTemp.get();
        }

        /*
          Iterate through the resulting documents, and add them to the result.
          We do this even if we're doing an explain, in order to capture
          the document counts and other stats.  However, we don't capture
          the result documents for explain.
        */
        if (explain) {
            if (!pCtx->getInRouter())
                writeExplainShard(result);
            else {
                writeExplainMongos(result);
            }
        }
        else {
            // the array in which the aggregation results reside
            // cant use subArrayStart() due to error handling
            BSONArrayBuilder resultArray;
            DocumentSource* finalSource = sources.back().get();
            for(bool hasDoc = !finalSource->eof(); hasDoc; hasDoc = finalSource->advance()) {
                Document pDocument(finalSource->getCurrent());

                /* add the document to the result set */
                BSONObjBuilder documentBuilder (resultArray.subobjStart());
                pDocument->toBson(&documentBuilder);
                documentBuilder.doneFast();
                // object will be too large, assert. the extra 1KB is for headers
                uassert(16389,
                        str::stream() << "aggregation result exceeds maximum document size ("
                                      << BSONObjMaxUserSize / (1024 * 1024) << "MB)",
                        resultArray.len() < BSONObjMaxUserSize - 1024);
            }

            resultArray.done();
            result.appendArray("result", resultArray.arr());
        }

    return true;
    }
Beispiel #2
0
    void Pipeline::run(BSONObjBuilder& result) {
        /*
          Iterate through the resulting documents, and add them to the result.
          We do this even if we're doing an explain, in order to capture
          the document counts and other stats.  However, we don't capture
          the result documents for explain.
        */
        if (explain) {
            if (!pCtx->getInRouter())
                writeExplainShard(result);
            else {
                writeExplainMongos(result);
            }
        }
        else {
            // the array in which the aggregation results reside
            // cant use subArrayStart() due to error handling
            BSONArrayBuilder resultArray;
            DocumentSource* finalSource = sources.back().get();
            for (bool hasDoc = !finalSource->eof(); hasDoc; hasDoc = finalSource->advance()) {
                Document pDocument(finalSource->getCurrent());

                /* add the document to the result set */
                BSONObjBuilder documentBuilder (resultArray.subobjStart());
                pDocument->toBson(&documentBuilder);
                documentBuilder.doneFast();
                // object will be too large, assert. the extra 1KB is for headers
                uassert(16389,
                        str::stream() << "aggregation result exceeds maximum document size ("
                                      << BSONObjMaxUserSize / (1024 * 1024) << "MB)",
                        resultArray.len() < BSONObjMaxUserSize - 1024);
            }

            resultArray.done();
            result.appendArray("result", resultArray.arr());
        }
    }
Beispiel #3
0
void Pipeline::run(BSONObjBuilder& result) {
    // We should not get here in the explain case.
    verify(!pCtx->isExplain);

    // the array in which the aggregation results reside
    // cant use subArrayStart() due to error handling
    BSONArrayBuilder resultArray;
    DocumentSource* finalSource = _sources.back().get();
    while (boost::optional<Document> next = finalSource->getNext()) {
        // add the document to the result set
        BSONObjBuilder documentBuilder(resultArray.subobjStart());
        next->toBson(&documentBuilder);
        documentBuilder.doneFast();
        // object will be too large, assert. the extra 1KB is for headers
        uassert(16389,
                str::stream() << "aggregation result exceeds maximum document size ("
                              << BSONObjMaxUserSize / (1024 * 1024)
                              << "MB)",
                resultArray.len() < BSONObjMaxUserSize - 1024);
    }

    resultArray.done();
    result.appendArray("result", resultArray.arr());
}
void TrecDocumentProcessor::doProcessFile(const string& sFilePath, 
        DocumentSource& docSource)
{
    FX_DEBUG("Process file: [%s]", sFilePath.c_str());
    if (!docSource.toBeContinued())
    {
        // a new raw document
        if (!initDocumentReader(sFilePath))
        {
            FX_LOG(ERROR, "Skip documents: init document reader for [%s] FAILED.", 
                   sFilePath.c_str());
            return;
        }
    }

    if (m_pDocReader.isNull())
    {
        FX_LOG(WARN, "Skip document: document reader for [%s] FAILED did not initialize.", 
               sFilePath.c_str());
        return;
    }

    DocumentPtr pDoc = docSource.acquireDocument("trec");
    pDoc->setAction(Document::AT_ADD);

    const char* pReadUpTo = m_pBuffer + m_nReadUpTo;
    while (!m_pDocReader->isEof() || m_nReadUpTo < m_nBytesInBuffer)
    {
        bool bNotEmty = false;
        const char* pDocEnd = NULL;
        size_t nDocLen = processDocTag(pReadUpTo, pDocEnd, m_tagConsumer.getDocTag());
        if (nDocLen == 0)
        {
            if (!(pReadUpTo = ensureBuffer(pReadUpTo)))
            {
                docSource.setToBeContinued(false);
                m_pDocReader.reset();
                break;
            }
            m_nReadUpTo = (pReadUpTo - m_pBuffer);
            continue;
        }

        m_tagConsumer.reset(nDocLen);
    
        while (m_tagConsumer.hasNext())
        {
            if (m_tagConsumer.consumeNext(pReadUpTo, pDocEnd, pDoc))
            {
                bNotEmty = true;
            }
        }

        skipBlank(pDocEnd);

        m_nReadUpTo = pDocEnd - m_pBuffer;

        if (!bNotEmty)
        {
            FX_LOG(INFO, "Get empty document");
            continue;
        }

        if (m_nReadUpTo >= m_nBytesInBuffer && m_pDocReader->isEof())
        {
            FX_DEBUG("Completed the processing of  file: [%s], "
                     "read to: [%u], in buffer size: [%u], isEof[%d]",
                     sFilePath.c_str(), m_nReadUpTo, m_nBytesInBuffer, m_pDocReader->isEof());
            docSource.setToBeContinued(false);
            m_pDocReader.reset();
        }
        else 
        {
            FX_DEBUG("The processing of  file: [%s] need to be continued, "
                     "read to: [%u], in buffer size: [%u], isEof[%d]",
                     sFilePath.c_str(), m_nReadUpTo, m_nBytesInBuffer, m_pDocReader->isEof());

            docSource.setToBeContinued(true);
        }
        return;
    } // end while 
}
Beispiel #5
0
    bool Pipeline::run(BSONObjBuilder &result, string &errmsg,
                       const intrusive_ptr<DocumentSource> &pInputSource) {

        /* chain together the sources we found */
        DocumentSource *pSource = pInputSource.get();
        for(SourceVector::iterator iter(sourceVector.begin()),
                listEnd(sourceVector.end()); iter != listEnd; ++iter) {
            intrusive_ptr<DocumentSource> pTemp(*iter);
            pTemp->setSource(pSource);
            pSource = pTemp.get();
        }
        /* pSource is left pointing at the last source in the chain */

        /*
          Iterate through the resulting documents, and add them to the result.
          We do this even if we're doing an explain, in order to capture
          the document counts and other stats.  However, we don't capture
          the result documents for explain.

          We wrap all the BSONObjBuilder calls with a try/catch in case the
          objects get too large and cause an exception.
        */
        try {
            if (explain) {
                if (!pCtx->getInRouter())
                    writeExplainShard(result, pInputSource);
                else {
                    writeExplainMongos(result, pInputSource);
                }
            }
            else
            {
                BSONArrayBuilder resultArray; // where we'll stash the results
                for(bool hasDocument = !pSource->eof(); hasDocument;
                    hasDocument = pSource->advance()) {
                    intrusive_ptr<Document> pDocument(pSource->getCurrent());

                    /* add the document to the result set */
                    BSONObjBuilder documentBuilder;
                    pDocument->toBson(&documentBuilder);
                    resultArray.append(documentBuilder.done());
                }

                result.appendArray("result", resultArray.arr());
            }
         } catch(AssertionException &ae) {
            /* 
               If its not the "object too large" error, rethrow.
               At time of writing, that error code comes from
               mongo/src/mongo/bson/util/builder.h
            */
            if (ae.getCode() != 13548)
                throw;

            /* throw the nicer human-readable error */
            uassert(16029, str::stream() <<
                    "aggregation result exceeds maximum document size limit ("
                    << (BSONObjMaxUserSize / (1024 * 1024)) << "MB)",
                    false);
         }

        return true;
    }