Example #1
0
    bool Pipeline::run(BSONObjBuilder &result, string &errmsg) {
        massert(16600, "should not have an empty pipeline",
                !sources.empty());

        /* chain together the sources we found */
        DocumentSource* prevSource = sources.front().get();
        for(SourceContainer::iterator iter(sources.begin() + 1),
                                      listEnd(sources.end());
                                    iter != listEnd;
                                    ++iter) {
            intrusive_ptr<DocumentSource> pTemp(*iter);
            pTemp->setSource(prevSource);
            prevSource = pTemp.get();
        }

        /*
          Iterate through the resulting documents, and add them to the result.
          We do this even if we're doing an explain, in order to capture
          the document counts and other stats.  However, we don't capture
          the result documents for explain.
        */
        if (explain) {
            if (!pCtx->getInRouter())
                writeExplainShard(result);
            else {
                writeExplainMongos(result);
            }
        }
        else {
            // the array in which the aggregation results reside
            // cant use subArrayStart() due to error handling
            BSONArrayBuilder resultArray;
            DocumentSource* finalSource = sources.back().get();
            for(bool hasDoc = !finalSource->eof(); hasDoc; hasDoc = finalSource->advance()) {
                Document pDocument(finalSource->getCurrent());

                /* add the document to the result set */
                BSONObjBuilder documentBuilder (resultArray.subobjStart());
                pDocument->toBson(&documentBuilder);
                documentBuilder.doneFast();
                // object will be too large, assert. the extra 1KB is for headers
                uassert(16389,
                        str::stream() << "aggregation result exceeds maximum document size ("
                                      << BSONObjMaxUserSize / (1024 * 1024) << "MB)",
                        resultArray.len() < BSONObjMaxUserSize - 1024);
            }

            resultArray.done();
            result.appendArray("result", resultArray.arr());
        }

    return true;
    }
Example #2
0
 void Pipeline::Optimizations::Local::moveMatchBeforeSort(Pipeline* pipeline) {
     SourceContainer& sources = pipeline->sources;
     for (size_t srcn = sources.size(), srci = 1; srci < srcn; ++srci) {
         intrusive_ptr<DocumentSource> &pSource = sources[srci];
         if (dynamic_cast<DocumentSourceMatch *>(pSource.get())) {
             intrusive_ptr<DocumentSource> &pPrevious = sources[srci - 1];
             if (dynamic_cast<DocumentSourceSort *>(pPrevious.get())) {
                 /* swap this item with the previous */
                 intrusive_ptr<DocumentSource> pTemp(pPrevious);
                 pPrevious = pSource;
                 pSource = pTemp;
             }
         }
     }
 }
Example #3
0
    void Pipeline::stitch() {
        massert(16600, "should not have an empty pipeline",
                !sources.empty());

        /* chain together the sources we found */
        DocumentSource* prevSource = sources.front().get();
        for(SourceContainer::iterator iter(sources.begin() + 1),
                                      listEnd(sources.end());
                                    iter != listEnd;
                                    ++iter) {
            intrusive_ptr<DocumentSource> pTemp(*iter);
            pTemp->setSource(prevSource);
            prevSource = pTemp.get();
        }
    }
Example #4
0
 void Pipeline::Optimizations::Local::moveMatchBeforeSort(Pipeline* pipeline) {
     // TODO Keep moving matches across multiple sorts as moveLimitBeforeSkip does below.
     // TODO Check sort for limit. Not an issue currently due to order optimizations are applied,
     // but should be fixed.
     SourceContainer& sources = pipeline->sources;
     for (size_t srcn = sources.size(), srci = 1; srci < srcn; ++srci) {
         intrusive_ptr<DocumentSource> &pSource = sources[srci];
         DocumentSourceMatch* match = dynamic_cast<DocumentSourceMatch *>(pSource.get());
         if (match && !match->isTextQuery()) {
             intrusive_ptr<DocumentSource> &pPrevious = sources[srci - 1];
             if (dynamic_cast<DocumentSourceSort *>(pPrevious.get())) {
                 /* swap this item with the previous */
                 intrusive_ptr<DocumentSource> pTemp(pPrevious);
                 pPrevious = pSource;
                 pSource = pTemp;
             }
         }
     }
 }
Example #5
0
    intrusive_ptr<Pipeline> Pipeline::parseCommand(
        string &errmsg, BSONObj &cmdObj,
        const intrusive_ptr<ExpressionContext> &pCtx) {
        intrusive_ptr<Pipeline> pPipeline(new Pipeline(pCtx));
        vector<BSONElement> pipeline;

        /* gather the specification for the aggregation */
        for(BSONObj::iterator cmdIterator = cmdObj.begin();
                cmdIterator.more(); ) {
            BSONElement cmdElement(cmdIterator.next());
            const char *pFieldName = cmdElement.fieldName();

            // ignore top-level fields prefixed with $. They are for the command processor, not us.
            if (pFieldName[0] == '$') {
                continue;
            }

            /* look for the aggregation command */
            if (!strcmp(pFieldName, commandName)) {
                pPipeline->collectionName = cmdElement.String();
                continue;
            }

            /* check for the collection name */
            if (!strcmp(pFieldName, pipelineName)) {
                pipeline = cmdElement.Array();
                continue;
            }

            /* check for explain option */
            if (!strcmp(pFieldName, explainName)) {
                pPipeline->explain = cmdElement.Bool();
                continue;
            }

            /* if the request came from the router, we're in a shard */
            if (!strcmp(pFieldName, fromRouterName)) {
                pCtx->setInShard(cmdElement.Bool());
                continue;
            }

            /* check for debug options */
            if (!strcmp(pFieldName, splitMongodPipelineName)) {
                pPipeline->splitMongodPipeline = true;
                continue;
            }

            /* we didn't recognize a field in the command */
            ostringstream sb;
            sb <<
               "unrecognized field \"" <<
               cmdElement.fieldName();
            errmsg = sb.str();
            return intrusive_ptr<Pipeline>();
        }

        /*
          If we get here, we've harvested the fields we expect for a pipeline.

          Set up the specified document source pipeline.
        */
        SourceContainer& sources = pPipeline->sources; // shorthand

        /* iterate over the steps in the pipeline */
        const size_t nSteps = pipeline.size();
        for(size_t iStep = 0; iStep < nSteps; ++iStep) {
            /* pull out the pipeline element as an object */
            BSONElement pipeElement(pipeline[iStep]);
            uassert(15942, str::stream() << "pipeline element " <<
                    iStep << " is not an object",
                    pipeElement.type() == Object);
            BSONObj bsonObj(pipeElement.Obj());

            // Parse a pipeline stage from 'bsonObj'.
            uassert(16435, "A pipeline stage specification object must contain exactly one field.",
                    bsonObj.nFields() == 1);
            BSONElement stageSpec = bsonObj.firstElement();
            const char* stageName = stageSpec.fieldName();

            // Create a DocumentSource pipeline stage from 'stageSpec'.
            StageDesc key;
            key.pName = stageName;
            const StageDesc* pDesc = (const StageDesc*)
                    bsearch(&key, stageDesc, nStageDesc, sizeof(StageDesc),
                            stageDescCmp);

            uassert(16436,
                    str::stream() << "Unrecognized pipeline stage name: '" << stageName << "'",
                    pDesc);
            intrusive_ptr<DocumentSource> stage = (*pDesc->pFactory)(&stageSpec, pCtx);
            verify(stage);
            stage->setPipelineStep(iStep);
            sources.push_back(stage);
        }

        /* if there aren't any pipeline stages, there's nothing more to do */
        if (sources.empty())
            return pPipeline;

        /*
          Move filters up where possible.

          CW TODO -- move filter past projections where possible, and noting
          corresponding field renaming.
        */

        /*
          Wherever there is a match immediately following a sort, swap them.
          This means we sort fewer items.  Neither changes the documents in
          the stream, so this transformation shouldn't affect the result.

          We do this first, because then when we coalesce operators below,
          any adjacent matches will be combined.
         */
        for (size_t srcn = sources.size(), srci = 1; srci < srcn; ++srci) {
            intrusive_ptr<DocumentSource> &pSource = sources[srci];
            if (dynamic_cast<DocumentSourceMatch *>(pSource.get())) {
                intrusive_ptr<DocumentSource> &pPrevious = sources[srci - 1];
                if (dynamic_cast<DocumentSourceSort *>(pPrevious.get())) {
                    /* swap this item with the previous */
                    intrusive_ptr<DocumentSource> pTemp(pPrevious);
                    pPrevious = pSource;
                    pSource = pTemp;
                }
            }
        }

        /* Move limits in front of skips. This is more optimal for sharding
         * since currently, we can only split the pipeline at a single source
         * and it is better to limit the results coming from each shard
         */
        for(int i = sources.size() - 1; i >= 1 /* not looking at 0 */; i--) {
            DocumentSourceLimit* limit =
                dynamic_cast<DocumentSourceLimit*>(sources[i].get());
            DocumentSourceSkip* skip =
                dynamic_cast<DocumentSourceSkip*>(sources[i-1].get());
            if (limit && skip) {
                // Increase limit by skip since the skipped docs now pass through the $limit
                limit->setLimit(limit->getLimit() + skip->getSkip());
                swap(sources[i], sources[i-1]);

                // Start at back again. This is needed to handle cases with more than 1 $limit
                // (S means skip, L means limit)
                //
                // These two would work without second pass (assuming back to front ordering)
                // SL   -> LS
                // SSL  -> LSS
                //
                // The following cases need a second pass to handle the second limit
                // SLL  -> LLS
                // SSLL -> LLSS
                // SLSL -> LLSS
                i = sources.size(); // decremented before next pass
            }
        }

        /*
          Coalesce adjacent filters where possible.  Two adjacent filters
          are equivalent to one filter whose predicate is the conjunction of
          the two original filters' predicates.  For now, capture this by
          giving any DocumentSource the option to absorb it's successor; this
          will also allow adjacent projections to coalesce when possible.

          Run through the DocumentSources, and give each one the opportunity
          to coalesce with its successor.  If successful, remove the
          successor.

          Move all document sources to a temporary list.
        */
        SourceContainer tempSources;
        sources.swap(tempSources);

        /* move the first one to the final list */
        sources.push_back(tempSources[0]);

        /* run through the sources, coalescing them or keeping them */
        for (size_t tempn = tempSources.size(), tempi = 1; tempi < tempn; ++tempi) {
            /*
              If we can't coalesce the source with the last, then move it
              to the final list, and make it the new last.  (If we succeeded,
              then we're still on the same last, and there's no need to move
              or do anything with the source -- the destruction of tempSources
              will take care of the rest.)
            */
            intrusive_ptr<DocumentSource> &pLastSource = sources.back();
            intrusive_ptr<DocumentSource> &pTemp = tempSources[tempi];
            verify(pTemp && pLastSource);
            if (!pLastSource->coalesce(pTemp))
                sources.push_back(pTemp);
        }

        /* optimize the elements in the pipeline */
        for(SourceContainer::iterator iter(sources.begin()),
                                      listEnd(sources.end());
                                    iter != listEnd;
                                    ++iter) {
            if (!*iter) {
                errmsg = "Pipeline received empty document as argument";
                return intrusive_ptr<Pipeline>();
            }

            (*iter)->optimize();
        }

        return pPipeline;
    }
Example #6
0
    intrusive_ptr<Pipeline> Pipeline::parseCommand(
        string &errmsg, BSONObj &cmdObj,
        const intrusive_ptr<ExpressionContext> &pCtx) {
        intrusive_ptr<Pipeline> pPipeline(new Pipeline(pCtx));
        vector<BSONElement> pipeline;

        /* gather the specification for the aggregation */
        for(BSONObj::iterator cmdIterator = cmdObj.begin();
                cmdIterator.more(); ) {
            BSONElement cmdElement(cmdIterator.next());
            const char *pFieldName = cmdElement.fieldName();

            /* look for the aggregation command */
            if (!strcmp(pFieldName, commandName)) {
                pPipeline->collectionName = cmdElement.String();
                continue;
            }

            /* check for the collection name */
            if (!strcmp(pFieldName, pipelineName)) {
                pipeline = cmdElement.Array();
                continue;
            }

            /* check for explain option */
            if (!strcmp(pFieldName, explainName)) {
                pPipeline->explain = cmdElement.Bool();
                continue;
            }

            /* if the request came from the router, we're in a shard */
            if (!strcmp(pFieldName, fromRouterName)) {
                pCtx->setInShard(cmdElement.Bool());
                continue;
            }

            /* check for debug options */
            if (!strcmp(pFieldName, splitMongodPipelineName)) {
                pPipeline->splitMongodPipeline = true;
                continue;
            }

            /* Ignore $auth information sent along with the command. The authentication system will
             * use it, it's not a part of the pipeline.
             */
            if (!strcmp(pFieldName, AuthenticationTable::fieldName.c_str())) {
                continue;
            }

            /* we didn't recognize a field in the command */
            ostringstream sb;
            sb <<
               "unrecognized field \"" <<
               cmdElement.fieldName();
            errmsg = sb.str();
            return intrusive_ptr<Pipeline>();
        }

        /*
          If we get here, we've harvested the fields we expect for a pipeline.

          Set up the specified document source pipeline.
        */
        SourceVector *pSourceVector = &pPipeline->sourceVector; // shorthand

        /* iterate over the steps in the pipeline */
        const size_t nSteps = pipeline.size();
        for(size_t iStep = 0; iStep < nSteps; ++iStep) {
            /* pull out the pipeline element as an object */
            BSONElement pipeElement(pipeline[iStep]);
            uassert(15942, str::stream() << "pipeline element " <<
                    iStep << " is not an object",
                    pipeElement.type() == Object);
            BSONObj bsonObj(pipeElement.Obj());

            // Parse a pipeline stage from 'bsonObj'.
            uassert(16435, "A pipeline stage specification object must contain exactly one field.",
                    bsonObj.nFields() == 1);
            BSONElement stageSpec = bsonObj.firstElement();
            const char* stageName = stageSpec.fieldName();

            // Create a DocumentSource pipeline stage from 'stageSpec'.
            StageDesc key;
            key.pName = stageName;
            const StageDesc* pDesc = (const StageDesc*)
                    bsearch(&key, stageDesc, nStageDesc, sizeof(StageDesc),
                            stageDescCmp);

            uassert(16436,
                    str::stream() << "Unrecognized pipeline stage name: '" << stageName << "'",
                    pDesc);
            intrusive_ptr<DocumentSource> stage = (*pDesc->pFactory)(&stageSpec, pCtx);
            verify(stage);
            stage->setPipelineStep(iStep);
            pSourceVector->push_back(stage);
        }

        /* if there aren't any pipeline stages, there's nothing more to do */
        if (!pSourceVector->size())
            return pPipeline;

        /*
          Move filters up where possible.

          CW TODO -- move filter past projections where possible, and noting
          corresponding field renaming.
        */

        /*
          Wherever there is a match immediately following a sort, swap them.
          This means we sort fewer items.  Neither changes the documents in
          the stream, so this transformation shouldn't affect the result.

          We do this first, because then when we coalesce operators below,
          any adjacent matches will be combined.
         */
        for(size_t srcn = pSourceVector->size(), srci = 1;
            srci < srcn; ++srci) {
            intrusive_ptr<DocumentSource> &pSource = pSourceVector->at(srci);
            if (dynamic_cast<DocumentSourceMatch *>(pSource.get())) {
                intrusive_ptr<DocumentSource> &pPrevious =
                    pSourceVector->at(srci - 1);
                if (dynamic_cast<DocumentSourceSort *>(pPrevious.get())) {
                    /* swap this item with the previous */
                    intrusive_ptr<DocumentSource> pTemp(pPrevious);
                    pPrevious = pSource;
                    pSource = pTemp;
                }
            }
        }

        /*
          Coalesce adjacent filters where possible.  Two adjacent filters
          are equivalent to one filter whose predicate is the conjunction of
          the two original filters' predicates.  For now, capture this by
          giving any DocumentSource the option to absorb it's successor; this
          will also allow adjacent projections to coalesce when possible.

          Run through the DocumentSources, and give each one the opportunity
          to coalesce with its successor.  If successful, remove the
          successor.

          Move all document sources to a temporary list.
        */
        SourceVector tempVector(*pSourceVector);
        pSourceVector->clear();

        /* move the first one to the final list */
        pSourceVector->push_back(tempVector[0]);

        /* run through the sources, coalescing them or keeping them */
        for(size_t tempn = tempVector.size(), tempi = 1;
            tempi < tempn; ++tempi) {
            /*
              If we can't coalesce the source with the last, then move it
              to the final list, and make it the new last.  (If we succeeded,
              then we're still on the same last, and there's no need to move
              or do anything with the source -- the destruction of tempVector
              will take care of the rest.)
            */
            intrusive_ptr<DocumentSource> &pLastSource = pSourceVector->back();
            intrusive_ptr<DocumentSource> &pTemp = tempVector.at(tempi);
            verify(pTemp && pLastSource);
            if (!pLastSource->coalesce(pTemp))
                pSourceVector->push_back(pTemp);
        }

        /* optimize the elements in the pipeline */
        for(SourceVector::iterator iter(pSourceVector->begin()),
                listEnd(pSourceVector->end()); iter != listEnd; ++iter) {
            if (!*iter) {
                errmsg = "Pipeline received empty document as argument";
                return intrusive_ptr<Pipeline>();
            }

            (*iter)->optimize();
        }

        return pPipeline;
    }
Example #7
0
    bool Pipeline::run(BSONObjBuilder &result, string &errmsg,
                       const intrusive_ptr<DocumentSource> &pInputSource) {

        /* chain together the sources we found */
        DocumentSource *pSource = pInputSource.get();
        for(SourceVector::iterator iter(sourceVector.begin()),
                listEnd(sourceVector.end()); iter != listEnd; ++iter) {
            intrusive_ptr<DocumentSource> pTemp(*iter);
            pTemp->setSource(pSource);
            pSource = pTemp.get();
        }
        /* pSource is left pointing at the last source in the chain */

        /*
          Iterate through the resulting documents, and add them to the result.
          We do this even if we're doing an explain, in order to capture
          the document counts and other stats.  However, we don't capture
          the result documents for explain.

          We wrap all the BSONObjBuilder calls with a try/catch in case the
          objects get too large and cause an exception.
        */
        try {
            if (explain) {
                if (!pCtx->getInRouter())
                    writeExplainShard(result, pInputSource);
                else {
                    writeExplainMongos(result, pInputSource);
                }
            }
            else
            {
                BSONArrayBuilder resultArray; // where we'll stash the results
                for(bool hasDocument = !pSource->eof(); hasDocument;
                    hasDocument = pSource->advance()) {
                    intrusive_ptr<Document> pDocument(pSource->getCurrent());

                    /* add the document to the result set */
                    BSONObjBuilder documentBuilder;
                    pDocument->toBson(&documentBuilder);
                    resultArray.append(documentBuilder.done());
                }

                result.appendArray("result", resultArray.arr());
            }
         } catch(AssertionException &ae) {
            /* 
               If its not the "object too large" error, rethrow.
               At time of writing, that error code comes from
               mongo/src/mongo/bson/util/builder.h
            */
            if (ae.getCode() != 13548)
                throw;

            /* throw the nicer human-readable error */
            uassert(16029, str::stream() <<
                    "aggregation result exceeds maximum document size limit ("
                    << (BSONObjMaxUserSize / (1024 * 1024)) << "MB)",
                    false);
         }

        return true;
    }
Example #8
0
Mat CHOG::get(const Mat &img, int nBins, SqNeighbourhood nbhd)
{
	DGM_ASSERT_MSG(nBins < CV_CN_MAX, "Number of bins (%d) exceeds the maximum allowed number (%d)", nBins, CV_CN_MAX);
	
	int	i;						// bins index
	int	x, y;
	int	width	= img.cols;
	int	height	= img.rows;

	// Converting to one channel image
	Mat	I;
	if (img.channels() != 1) cvtColor(img, I, cv::ColorConversionCodes::COLOR_RGB2GRAY);
	else img.copyTo(I);
	
	// Derivatives
	Mat Ix = CGradient::getDerivativeX(I);
	Mat Iy = CGradient::getDerivativeY(I);

	// Initializing bins and integrals
	vec_mat_t vTemp(nBins);
	vec_mat_t vBins(nBins);
	vec_mat_t vInts(nBins);

	for (i = 0; i < nBins; i++) {
		vTemp[i].create(img.size(), CV_8UC1);
		vBins[i].create(img.size(), CV_32FC1);	
		vBins[i].setTo(0);
	}

	std::vector<float *>	pBins(nBins);
	std::vector<double *>	pInts0(nBins);
	std::vector<double *>	pInts1(nBins);
	std::vector<byte *>		pTemp(nBins);
	
	// Caclculating the bins
	for (y = 0; y < height; y++) {
		float *pIx = Ix.ptr<float>(y);
		float *pIy = Iy.ptr<float>(y);
		for (i = 0; i < nBins; i++) pBins[i] = vBins[i].ptr<float>(y);
		for (x = 0; x < width; x++) {
			float ix = pIx[x];
			float iy = pIy[x];
			
			// gradient Magnitude
			float gMgn = sqrtf(ix*ix + iy*iy);

			// gradient Orientation
			if (fabs(ix) < FLT_EPSILON) ix = SIGN(ix) * FLT_EPSILON;
			float tg = iy / ix;
			float gOrt = (0.5f + atanf(tg) / (float)Pi) * 180.0f;			// [0°; 180°]

			// filling in the bins
			float gOrtStep = 180.0f / nBins;
			for (i = 0; i < nBins; i++)
				if (gOrt <= (i + 1) * gOrtStep) {
					pBins[i][x] = gMgn;
					break;
				}
		}
	}

	// Calculating the integrals
	for (i = 0; i < nBins; i++) integral(vBins[i], vInts[i]);
	
	for (y = 0; y < height; y++) {	
		int y0 = MAX(0, y - nbhd.upperGap);		
		int y1 = MIN(y + nbhd.lowerGap, height - 1);
		for (i = 0; i < nBins; i++) pInts0[i] = vInts[i].ptr<double>(y0);
		for (i = 0; i < nBins; i++) pInts1[i] = vInts[i].ptr<double>(y1 + 1);
		for (i = 0; i < nBins; i++) pTemp[i]  = vTemp[i].ptr<byte>(y);
		for (x = 0; x < width; x++) {
			int x0 = MAX(0, x - nbhd.leftGap);
			int x1 = MIN(x + nbhd.rightGap, width - 1);

			Mat HOGcell(cv::Size(nBins, 1), CV_64FC1);
			double *pHOGcell = HOGcell.ptr<double>(0);
			for (i = 0; i < nBins; i++) pHOGcell[i] = pInts1[i][x1 + 1] - pInts1[i][x0] - pInts0[i][x1 + 1] + pInts0[i][x0];
			normalize(HOGcell, HOGcell, 255, 0, cv::NormTypes::NORM_MINMAX);
			for (i = 0; i < nBins; i++) pTemp[i][x] = static_cast<byte>(pHOGcell[i]);
			HOGcell.release();
		} // x
	} // y

	Mat res;
	merge(vTemp, res);

	return res;	
}