Esempio n. 1
0
    string BSONObj::toString() const {
        if ( isEmpty() ) return "{}";

        stringstream s;
        s << "{ ";
        BSONObjIterator i(*this);
        bool first = true;
        while ( 1 ) {
            massert( "Object does not end with EOO", i.more() );
            BSONElement e = i.next( true );
            massert( "Invalid element size", e.size() > 0 );
            massert( "Element too large", e.size() < ( 1 << 30 ) );
            int offset = e.rawdata() - this->objdata();
            massert( "Element extends past end of object",
                    e.size() + offset <= this->objsize() );
            e.validate();
            bool end = ( e.size() + offset == this->objsize() );
            if ( e.eoo() ) {
                massert( "EOO Before end of object", end );
                break;
            }
            if ( first )
                first = false;
            else
                s << ", ";
            s << e.toString();
        }
        s << " }";
        return s.str();
    }
Esempio n. 2
0
    bool debug( const BSONObj& o , int depth=0) {
        string prefix = "";
        for ( int i=0; i<depth; i++ ) {
            prefix += "\t\t\t";
        }

        int read = 4;

        try {
            cout << prefix << "--- new object ---\n";
            cout << prefix << "\t size : " << o.objsize() << "\n";
            BSONObjIterator i(o);
            while ( i.more() ) {
                BSONElement e = i.next();
                cout << prefix << "\t\t " << e.fieldName() << "\n" << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() << endl;
                if ( ( read + e.size() ) > o.objsize() ) {
                    cout << prefix << " SIZE DOES NOT WORK" << endl;
                    return false;
                }
                read += e.size();
                try {
                    e.validate();
                    if ( e.isABSONObj() ) {
                        if ( ! debug( e.Obj() , depth + 1 ) ) {
                            //return false;
                            cout << prefix << "\t\t\t BAD BAD BAD" << endl;

                            if ( e.size() < 1000 ) {
                                cout << "---\n" << e.Obj().hexDump() << "\n---" << endl;
                            }
                        }
                    }
                    else if ( e.type() == String && ! isValidUTF8( e.valuestr() ) ) {
                        cout << prefix << "\t\t\t" << "bad utf8 String!" << endl;
                    }
                    else if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) ) {
                        cout << prefix << "\t\t\t" << e << endl;
                    }

                }
                catch ( std::exception& e ) {
                    cout << prefix << "\t\t\t bad value: " << e.what() << endl;
                }
            }
        }
        catch ( std::exception& e ) {
            cout << prefix << "\tbad\t" << e.what() << endl;
            cout << "----\n" << o.hexDump() << "\n---" << endl;
        }
        return true;
    }
Esempio n. 3
0
void BtreeKeyGenerator::getKeys(const BSONObj& obj,
                                BSONObjSet* keys,
                                MultikeyPaths* multikeyPaths) const {
    if (_isIdIndex) {
        // we special case for speed
        BSONElement e = obj["_id"];
        if (e.eoo()) {
            keys->insert(_nullKey);
        } else {
            int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */;
            BSONObjBuilder b(size);
            b.appendAs(e, "");
            keys->insert(b.obj());
            invariant(keys->begin()->objsize() == size);
        }
        return;
    }

    // '_fieldNames' and '_fixed' are passed by value so that they can be mutated as part of the
    // getKeys call.  :|
    getKeysImpl(_fieldNames, _fixed, obj, keys, multikeyPaths);
    if (keys->empty() && !_isSparse) {
        keys->insert(_nullKey);
    }
}
bool BSONElement::binaryEqual(const BSONElement& rhs) const {
    const int elemSize = size();

    if (elemSize != rhs.size()) {
        return false;
    }

    return (elemSize == 0) || (memcmp(data, rhs.rawdata(), elemSize) == 0);
}
Esempio n. 5
0
void BtreeKeyGenerator::getKeys(const BSONObj& obj,
                                BSONObjSet* keys,
                                MultikeyPaths* multikeyPaths) const {
    if (_isIdIndex) {
        // we special case for speed
        BSONElement e = obj["_id"];
        if (e.eoo()) {
            keys->insert(_nullKey);
        } else if (_collator) {
            BSONObjBuilder b;
            CollationIndexKey::collationAwareIndexKeyAppend(e, _collator, &b);

            // Insert a copy so its buffer size fits the object size.
            keys->insert(b.obj().copy());
        } else {
            int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */;
            BSONObjBuilder b(size);
            b.appendAs(e, "");
            keys->insert(b.obj());
            invariant(keys->begin()->objsize() == size);
        }

        // The {_id: 1} index can never be multikey because the _id field isn't allowed to be an
        // array value. We therefore always set 'multikeyPaths' as [ [ ] ].
        if (multikeyPaths) {
            multikeyPaths->resize(1);
        }
    } else {
        if (multikeyPaths) {
            invariant(multikeyPaths->empty());
            multikeyPaths->resize(_fieldNames.size());
        }
        // '_fieldNames' and '_fixed' are passed by value so that their copies can be mutated as
        // part of the _getKeysWithArray method.
        _getKeysWithArray(_fieldNames, _fixed, obj, keys, 0, _emptyPositionalInfo, multikeyPaths);
    }
    if (keys->empty() && !_isSparse) {
        keys->insert(_nullKey);
    }
}
Esempio n. 6
0
void BtreeKeyGeneratorV1::getKeysImpl(std::vector<const char*> fieldNames,
                                      std::vector<BSONElement> fixed,
                                      const BSONObj& obj,
                                      BSONObjSet* keys,
                                      MultikeyPaths* multikeyPaths) const {
    if (_isIdIndex) {
        // we special case for speed
        BSONElement e = obj["_id"];
        if (e.eoo()) {
            keys->insert(_nullKey);
        } else if (_collator) {
            BSONObjBuilder b;
            CollationIndexKey::collationAwareIndexKeyAppend(e, _collator, &b);

            // Insert a copy so its buffer size fits the object size.
            keys->insert(b.obj().copy());
        } else {
            int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */;
            BSONObjBuilder b(size);
            b.appendAs(e, "");
            keys->insert(b.obj());
            invariant(keys->begin()->objsize() == size);
        }

        // The {_id: 1} index can never be multikey because the _id field isn't allowed to be an
        // array value. We therefore always set 'multikeyPaths' as [ [ ] ].
        if (multikeyPaths) {
            multikeyPaths->resize(1);
        }
        return;
    }

    if (multikeyPaths) {
        invariant(multikeyPaths->empty());
        multikeyPaths->resize(fieldNames.size());
    }
    getKeysImplWithArray(
        std::move(fieldNames), std::move(fixed), obj, keys, 0, _emptyPositionalInfo, multikeyPaths);
}
Esempio n. 7
0
void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames,
                                      std::vector<BSONElement> fixed,
                                      const BSONObj& obj,
                                      BSONObjSet* keys,
                                      MultikeyPaths* multikeyPaths) const {
    if (_isIdIndex) {
        // we special case for speed
        BSONElement e = obj["_id"];
        if (e.eoo()) {
            keys->insert(_nullKey);
        } else {
            int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */;
            BSONObjBuilder b(size);
            b.appendAs(e, "");
            keys->insert(b.obj());
            invariant(keys->begin()->objsize() == size);
        }
        return;
    }

    BSONElement arrElt;
    unsigned arrIdx = ~0;
    unsigned numNotFound = 0;

    for (unsigned i = 0; i < fieldNames.size(); ++i) {
        if (*fieldNames[i] == '\0')
            continue;

        BSONElement e = dps::extractElementAtPathOrArrayAlongPath(obj, fieldNames[i]);

        if (e.eoo()) {
            e = nullElt;  // no matching field
            numNotFound++;
        }

        if (e.type() != Array)
            fieldNames[i] = "";  // no matching field or non-array match

        if (*fieldNames[i] == '\0')
            // no need for further object expansion (though array expansion still possible)
            fixed[i] = e;

        if (e.type() == Array && arrElt.eoo()) {
            // we only expand arrays on a single path -- track the path here
            arrIdx = i;
            arrElt = e;
        }

        // enforce single array path here
        if (e.type() == Array && e.rawdata() != arrElt.rawdata()) {
            assertParallelArrays(e.fieldName(), arrElt.fieldName());
        }
    }

    bool allFound = true;  // have we found elements for all field names in the key spec?
    for (std::vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end();
         ++i) {
        if (**i != '\0') {
            allFound = false;
            break;
        }
    }

    if (_isSparse && numNotFound == _fieldNames.size()) {
        // we didn't find any fields
        // so we're not going to index this document
        return;
    }

    bool insertArrayNull = false;

    if (allFound) {
        if (arrElt.eoo()) {
            // no terminal array element to expand
            BSONObjBuilder b(_sizeTracker);
            for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i)
                b.appendAs(*i, "");
            keys->insert(b.obj());
        } else {
            // terminal array element to expand, so generate all keys
            BSONObjIterator i(arrElt.embeddedObject());
            if (i.more()) {
                while (i.more()) {
                    BSONObjBuilder b(_sizeTracker);
                    for (unsigned j = 0; j < fixed.size(); ++j) {
                        if (j == arrIdx)
                            b.appendAs(i.next(), "");
                        else
                            b.appendAs(fixed[j], "");
                    }
                    keys->insert(b.obj());
                }
            } else if (fixed.size() > 1) {
                insertArrayNull = true;
            }
        }
    } else {
        // nonterminal array element to expand, so recurse
        verify(!arrElt.eoo());
        BSONObjIterator i(arrElt.embeddedObject());
        if (i.more()) {
            while (i.more()) {
                BSONElement e = i.next();
                if (e.type() == Object) {
                    getKeysImpl(fieldNames, fixed, e.embeddedObject(), keys, multikeyPaths);
                }
            }
        } else {
            insertArrayNull = true;
        }
    }

    if (insertArrayNull) {
        // x : [] - need to insert undefined
        BSONObjBuilder b(_sizeTracker);
        for (unsigned j = 0; j < fixed.size(); ++j) {
            if (j == arrIdx) {
                b.appendUndefined("");
            } else {
                BSONElement e = fixed[j];
                if (e.eoo())
                    b.appendNull("");
                else
                    b.appendAs(e, "");
            }
        }
        keys->insert(b.obj());
    }
}
Esempio n. 8
0
void FTSIndexFormat::getKeys(const FTSSpec& spec, const BSONObj& obj, BSONObjSet* keys) {
    int extraSize = 0;
    vector<BSONElement> extrasBefore;
    vector<BSONElement> extrasAfter;

    // compute the non FTS key elements
    for (unsigned i = 0; i < spec.numExtraBefore(); i++) {
        BSONElement e = obj.getFieldDotted(spec.extraBefore(i));
        if (e.eoo())
            e = nullElt;
        uassert(16675, "cannot have a multi-key as a prefix to a text index", e.type() != Array);
        extrasBefore.push_back(e);
        extraSize += e.size();
    }
    for (unsigned i = 0; i < spec.numExtraAfter(); i++) {
        BSONElement e = obj.getFieldDotted(spec.extraAfter(i));
        if (e.eoo())
            e = nullElt;
        extrasAfter.push_back(e);
        extraSize += e.size();
    }


    TermFrequencyMap term_freqs;
    spec.scoreDocument(obj, &term_freqs);

    // create index keys from raw scores
    // only 1 per string

    uassert(16732,
            mongolutils::str::stream() << "too many unique keys for a single document to"
                                      << " have a text index, max is " << term_freqs.size()
                                      << obj["_id"],
            term_freqs.size() <= 400000);

    long long keyBSONSize = 0;
    const int MaxKeyBSONSizeMB = 4;

    for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) {
        const string& term = i->first;
        double weight = i->second;

        // guess the total size of the btree entry based on the size of the weight, term tuple
        int guess = 5 /* bson overhead */ + 10 /* weight */ + 8 /* term overhead */ +
            /* term size (could be truncated/hashed) */
            guessTermSize(term, spec.getTextIndexVersion()) + extraSize;

        BSONObjBuilder b(guess);  // builds a BSON object with guess length.
        for (unsigned k = 0; k < extrasBefore.size(); k++) {
            b.appendAs(extrasBefore[k], "");
        }
        _appendIndexKey(b, weight, term, spec.getTextIndexVersion());
        for (unsigned k = 0; k < extrasAfter.size(); k++) {
            b.appendAs(extrasAfter[k], "");
        }
        BSONObj res = b.obj();

        verify(guess >= res.objsize());

        keys->insert(res);
        keyBSONSize += res.objsize();

        uassert(16733,
                mongolutils::str::stream()
                    << "trying to index text where term list is too big, max is "
                    << MaxKeyBSONSizeMB << "mb " << obj["_id"],
                keyBSONSize <= (MaxKeyBSONSizeMB * 1024 * 1024));
    }
}
Esempio n. 9
0
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result,
                 bool fromRepl ) {

            Timer t;
            string ns = dbname + '.' + cmdObj.firstElement().valuestr();

            string key = cmdObj["key"].valuestrsafe();
            BSONObj keyPattern = BSON( key << 1 );

            BSONObj query = getQuery( cmdObj );

            int bufSize = BSONObjMaxUserSize - 4096;
            BufBuilder bb( bufSize );
            char * start = bb.buf();

            BSONArrayBuilder arr( bb );
            BSONElementSet values;

            long long nscanned = 0; // locations looked at
            long long nscannedObjects = 0; // full objects looked at
            long long n = 0; // matches

            NamespaceDetails * d = nsdetails( ns );

            string cursorName;

            if (!d) {
                result.appendArray( "values" , BSONObj() );
                result.append("stats", BSON("n" << 0 <<
                                            "nscanned" << 0 <<
                                            "nscannedObjects" << 0));
                return true;
            }

            CanonicalQuery* cq;
            // XXX: project out just the field we're distinct-ing.  May be covered...
            if (!CanonicalQuery::canonicalize(ns, query, &cq).isOK()) {
                uasserted(17215, "Can't canonicalize query " + query.toString());
                return 0;
            }

            Runner* rawRunner;
            if (!getRunner(cq, &rawRunner).isOK()) {
                uasserted(17216, "Can't get runner for query " + query.toString());
                return 0;
            }

            auto_ptr<Runner> runner(rawRunner);
            auto_ptr<DeregisterEvenIfUnderlyingCodeThrows> safety;
            ClientCursor::registerRunner(runner.get());
            runner->setYieldPolicy(Runner::YIELD_AUTO);
            safety.reset(new DeregisterEvenIfUnderlyingCodeThrows(runner.get()));

            BSONObj obj;
            Runner::RunnerState state;
            while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) {
                BSONElementSet elts;
                obj.getFieldsDotted(key, elts);

                for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) {
                    BSONElement elt = *it;
                    if (values.count(elt)) { continue; }
                    int currentBufPos = bb.len();

                    uassert(17217, "distinct too big, 16mb cap",
                            (currentBufPos + elt.size() + 1024) < bufSize);

                    arr.append(elt);
                    BSONElement x(start + currentBufPos);
                    values.insert(x);
                }
            }
            TypeExplain* bareExplain;
            Status res = runner->getExplainPlan(&bareExplain);
            if (res.isOK()) {
                auto_ptr<TypeExplain> explain(bareExplain);
                if (explain->isCursorSet()) {
                    cursorName = explain->getCursor();
                }
                n = explain->getN();
                nscanned = explain->getNScanned();
                nscannedObjects = explain->getNScannedObjects();
            }

            verify( start == bb.buf() );

            result.appendArray( "values" , arr.done() );

            {
                BSONObjBuilder b;
                b.appendNumber( "n" , n );
                b.appendNumber( "nscanned" , nscanned );
                b.appendNumber( "nscannedObjects" , nscannedObjects );
                b.appendNumber( "timems" , t.millis() );
                b.append( "cursor" , cursorName );
                result.append( "stats" , b.obj() );
            }

            return true;
        }
Esempio n. 10
0
    bool run(OperationContext* txn,
             const string& dbname,
             BSONObj& cmdObj,
             int,
             string& errmsg,
             BSONObjBuilder& result) {
        Timer t;

        const string ns = parseNs(dbname, cmdObj);
        AutoGetCollectionForRead ctx(txn, ns);

        Collection* collection = ctx.getCollection();

        auto executor = getPlanExecutor(txn, collection, ns, cmdObj, false);
        if (!executor.isOK()) {
            return appendCommandStatus(result, executor.getStatus());
        }

        string key = cmdObj[kKeyField].valuestrsafe();

        int bufSize = BSONObjMaxUserSize - 4096;
        BufBuilder bb(bufSize);
        char* start = bb.buf();

        BSONArrayBuilder arr(bb);
        BSONElementSet values;

        BSONObj obj;
        PlanExecutor::ExecState state;
        while (PlanExecutor::ADVANCED == (state = executor.getValue()->getNext(&obj, NULL))) {
            // Distinct expands arrays.
            //
            // If our query is covered, each value of the key should be in the index key and
            // available to us without this.  If a collection scan is providing the data, we may
            // have to expand an array.
            BSONElementSet elts;
            obj.getFieldsDotted(key, elts);

            for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) {
                BSONElement elt = *it;
                if (values.count(elt)) {
                    continue;
                }
                int currentBufPos = bb.len();

                uassert(17217,
                        "distinct too big, 16mb cap",
                        (currentBufPos + elt.size() + 1024) < bufSize);

                arr.append(elt);
                BSONElement x(start + currentBufPos);
                values.insert(x);
            }
        }

        // Return an error if execution fails for any reason.
        if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
            const std::unique_ptr<PlanStageStats> stats(executor.getValue()->getStats());
            log() << "Plan executor error during distinct command: "
                  << PlanExecutor::statestr(state) << ", stats: " << Explain::statsToBSON(*stats);

            return appendCommandStatus(result,
                                       Status(ErrorCodes::OperationFailed,
                                              str::stream()
                                                  << "Executor error during distinct command: "
                                                  << WorkingSetCommon::toStatusString(obj)));
        }


        // Get summary information about the plan.
        PlanSummaryStats stats;
        Explain::getSummaryStats(*executor.getValue(), &stats);
        collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed);
        CurOp::get(txn)->debug().fromMultiPlanner = stats.fromMultiPlanner;
        CurOp::get(txn)->debug().replanned = stats.replanned;

        verify(start == bb.buf());

        result.appendArray("values", arr.done());

        {
            BSONObjBuilder b;
            b.appendNumber("n", stats.nReturned);
            b.appendNumber("nscanned", stats.totalKeysExamined);
            b.appendNumber("nscannedObjects", stats.totalDocsExamined);
            b.appendNumber("timems", t.millis());
            b.append("planSummary", Explain::getPlanSummary(executor.getValue().get()));
            result.append("stats", b.obj());
        }

        return true;
    }
Esempio n. 11
0
    bool run(OperationContext* txn,
             const string& dbname,
             BSONObj& cmdObj,
             int options,
             string& errmsg,
             BSONObjBuilder& result) {
        const string ns = parseNs(dbname, cmdObj);
        const NamespaceString nss(ns);

        const ExtensionsCallbackReal extensionsCallback(txn, &nss);
        auto parsedDistinct = ParsedDistinct::parse(txn, nss, cmdObj, extensionsCallback, false);
        if (!parsedDistinct.isOK()) {
            return appendCommandStatus(result, parsedDistinct.getStatus());
        }

        if (!parsedDistinct.getValue().getQuery()->getQueryRequest().getCollation().isEmpty() &&
            serverGlobalParams.featureCompatibility.version.load() ==
                ServerGlobalParams::FeatureCompatibility::Version::k32) {
            return appendCommandStatus(
                result,
                Status(ErrorCodes::InvalidOptions,
                       "The featureCompatibilityVersion must be 3.4 to use collation. See "
                       "http://dochub.mongodb.org/core/3.4-feature-compatibility."));
        }

        AutoGetCollectionOrViewForRead ctx(txn, ns);
        Collection* collection = ctx.getCollection();

        if (ctx.getView()) {
            ctx.releaseLocksForView();

            auto viewAggregation = parsedDistinct.getValue().asAggregationCommand();
            if (!viewAggregation.isOK()) {
                return appendCommandStatus(result, viewAggregation.getStatus());
            }
            BSONObjBuilder aggResult;

            (void)Command::findCommand("aggregate")
                ->run(txn, dbname, viewAggregation.getValue(), options, errmsg, aggResult);

            if (ResolvedView::isResolvedViewErrorResponse(aggResult.asTempObj())) {
                result.appendElements(aggResult.obj());
                return false;
            }

            ViewResponseFormatter formatter(aggResult.obj());
            Status formatStatus = formatter.appendAsDistinctResponse(&result);
            if (!formatStatus.isOK()) {
                return appendCommandStatus(result, formatStatus);
            }
            return true;
        }

        auto executor = getExecutorDistinct(
            txn, collection, ns, &parsedDistinct.getValue(), PlanExecutor::YIELD_AUTO);
        if (!executor.isOK()) {
            return appendCommandStatus(result, executor.getStatus());
        }

        {
            stdx::lock_guard<Client>(*txn->getClient());
            CurOp::get(txn)->setPlanSummary_inlock(
                Explain::getPlanSummary(executor.getValue().get()));
        }

        string key = cmdObj[ParsedDistinct::kKeyField].valuestrsafe();

        int bufSize = BSONObjMaxUserSize - 4096;
        BufBuilder bb(bufSize);
        char* start = bb.buf();

        BSONArrayBuilder arr(bb);
        BSONElementSet values(executor.getValue()->getCanonicalQuery()->getCollator());

        BSONObj obj;
        PlanExecutor::ExecState state;
        while (PlanExecutor::ADVANCED == (state = executor.getValue()->getNext(&obj, NULL))) {
            // Distinct expands arrays.
            //
            // If our query is covered, each value of the key should be in the index key and
            // available to us without this.  If a collection scan is providing the data, we may
            // have to expand an array.
            BSONElementSet elts;
            dps::extractAllElementsAlongPath(obj, key, elts);

            for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) {
                BSONElement elt = *it;
                if (values.count(elt)) {
                    continue;
                }
                int currentBufPos = bb.len();

                uassert(17217,
                        "distinct too big, 16mb cap",
                        (currentBufPos + elt.size() + 1024) < bufSize);

                arr.append(elt);
                BSONElement x(start + currentBufPos);
                values.insert(x);
            }
        }

        // Return an error if execution fails for any reason.
        if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
            log() << "Plan executor error during distinct command: "
                  << redact(PlanExecutor::statestr(state))
                  << ", stats: " << redact(Explain::getWinningPlanStats(executor.getValue().get()));

            return appendCommandStatus(result,
                                       Status(ErrorCodes::OperationFailed,
                                              str::stream()
                                                  << "Executor error during distinct command: "
                                                  << WorkingSetCommon::toStatusString(obj)));
        }


        auto curOp = CurOp::get(txn);

        // Get summary information about the plan.
        PlanSummaryStats stats;
        Explain::getSummaryStats(*executor.getValue(), &stats);
        if (collection) {
            collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed);
        }
        curOp->debug().setPlanSummaryMetrics(stats);

        if (curOp->shouldDBProfile()) {
            BSONObjBuilder execStatsBob;
            Explain::getWinningPlanStats(executor.getValue().get(), &execStatsBob);
            curOp->debug().execStats = execStatsBob.obj();
        }

        verify(start == bb.buf());

        result.appendArray("values", arr.done());

        return true;
    }
Esempio n. 12
0
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
            Timer t;
            string ns = dbname + '.' + cmdObj.firstElement().valuestr();

            string key = cmdObj["key"].valuestrsafe();
            BSONObj keyPattern = BSON( key << 1 );

            BSONObj query = getQuery( cmdObj );

            int bufSize = BSONObjMaxUserSize - 4096;
            BufBuilder bb( bufSize );
            char * start = bb.buf();

            BSONArrayBuilder arr( bb );
            BSONElementSet values;

            long long nscanned = 0; // locations looked at
            long long nscannedObjects = 0; // full objects looked at
            long long n = 0; // matches
            MatchDetails md;

            NamespaceDetails * d = nsdetails( ns );

            if ( ! d ) {
                result.appendArray( "values" , BSONObj() );
                result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) );
                return true;
            }

            shared_ptr<Cursor> cursor;
            if ( ! query.isEmpty() ) {
                cursor = getOptimizedCursor( ns.c_str(), query, BSONObj() );
            }
            else {

                // query is empty, so lets see if we can find an index
                // with the key so we don't have to hit the raw data
                NamespaceDetails::IndexIterator ii = d->ii();
                while ( ii.more() ) {
                    IndexDetails& idx = ii.next();

                    if ( d->isMultikey( ii.pos() - 1 ) )
                        continue;

                    if ( idx.inKeyPattern( key ) ) {
                        cursor = getBestGuessCursor( ns.c_str(), BSONObj(), idx.keyPattern() );
                        if( cursor.get() ) break;
                    }

                }

                if ( ! cursor.get() )
                    cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() );

            }

            
            verify( cursor );
            string cursorName = cursor->toString();
            
            auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns));

            // map from indexed field to offset in key object
            map<string, int> indexedFields;  
            if (!cursor->modifiedKeys()) {
                // store index information so we can decide if we can
                // get something out of the index key rather than full object

                int x = 0;
                BSONObjIterator i( cursor->indexKeyPattern() );
                while ( i.more() ) {
                    BSONElement e = i.next();
                    if ( e.isNumber() ) {
                        // only want basic index fields, not "2d" etc
                        indexedFields[e.fieldName()] = x;
                    }
                    x++;
                }
            }

            while ( cursor->ok() ) {
                nscanned++;
                bool loadedRecord = false;

                if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currLoc() ) ) {
                    n++;

                    BSONObj holder;
                    BSONElementSet temp;
                    // Try to get the record from the key fields.
                    loadedRecord = !getFieldsDotted(indexedFields, cursor, key, temp, holder);

                    for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) {
                        BSONElement e = *i;
                        if ( values.count( e ) )
                            continue;

                        int now = bb.len();

                        uassert(10044,  "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize );

                        arr.append( e );
                        BSONElement x( start + now );

                        values.insert( x );
                    }
                }

                if ( loadedRecord || md.hasLoadedRecord() )
                    nscannedObjects++;

                cursor->advance();

                if (!cc->yieldSometimes( ClientCursor::MaybeCovered )) {
                    cc.release();
                    break;
                }

                RARELY killCurrentOp.checkForInterrupt();
            }

            verify( start == bb.buf() );

            result.appendArray( "values" , arr.done() );

            {
                BSONObjBuilder b;
                b.appendNumber( "n" , n );
                b.appendNumber( "nscanned" , nscanned );
                b.appendNumber( "nscannedObjects" , nscannedObjects );
                b.appendNumber( "timems" , t.millis() );
                b.append( "cursor" , cursorName );
                result.append( "stats" , b.obj() );
            }

            return true;
        }
Esempio n. 13
0
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result,
                 bool fromRepl ) {

            Timer t;
            string ns = dbname + '.' + cmdObj.firstElement().valuestr();

            string key = cmdObj["key"].valuestrsafe();
            BSONObj keyPattern = BSON( key << 1 );

            BSONObj query = getQuery( cmdObj );

            int bufSize = BSONObjMaxUserSize - 4096;
            BufBuilder bb( bufSize );
            char * start = bb.buf();

            BSONArrayBuilder arr( bb );
            BSONElementSet values;

            long long nscanned = 0; // locations looked at
            long long nscannedObjects = 0; // full objects looked at
            long long n = 0; // matches

            Collection* collection = cc().database()->getCollection( ns );

            if (!collection) {
                result.appendArray( "values" , BSONObj() );
                result.append("stats", BSON("n" << 0 <<
                                            "nscanned" << 0 <<
                                            "nscannedObjects" << 0));
                return true;
            }

            Runner* rawRunner;
            Status status = getRunnerDistinct(collection, query, key, &rawRunner);
            if (!status.isOK()) {
                uasserted(17216, mongoutils::str::stream() << "Can't get runner for query "
                              << query << ": " << status.toString());
                return 0;
            }

            auto_ptr<Runner> runner(rawRunner);
            const ScopedRunnerRegistration safety(runner.get());
            runner->setYieldPolicy(Runner::YIELD_AUTO);

            string cursorName;
            BSONObj obj;
            Runner::RunnerState state;
            while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) {
                // Distinct expands arrays.
                //
                // If our query is covered, each value of the key should be in the index key and
                // available to us without this.  If a collection scan is providing the data, we may
                // have to expand an array.
                BSONElementSet elts;
                obj.getFieldsDotted(key, elts);

                for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) {
                    BSONElement elt = *it;
                    if (values.count(elt)) { continue; }
                    int currentBufPos = bb.len();

                    uassert(17217, "distinct too big, 16mb cap",
                            (currentBufPos + elt.size() + 1024) < bufSize);

                    arr.append(elt);
                    BSONElement x(start + currentBufPos);
                    values.insert(x);
                }
            }
            TypeExplain* bareExplain;
            Status res = runner->getInfo(&bareExplain, NULL);
            if (res.isOK()) {
                auto_ptr<TypeExplain> explain(bareExplain);
                if (explain->isCursorSet()) {
                    cursorName = explain->getCursor();
                }
                n = explain->getN();
                nscanned = explain->getNScanned();
                nscannedObjects = explain->getNScannedObjects();
            }

            verify( start == bb.buf() );

            result.appendArray( "values" , arr.done() );

            {
                BSONObjBuilder b;
                b.appendNumber( "n" , n );
                b.appendNumber( "nscanned" , nscanned );
                b.appendNumber( "nscannedObjects" , nscannedObjects );
                b.appendNumber( "timems" , t.millis() );
                b.append( "cursor" , cursorName );
                result.append( "stats" , b.obj() );
            }

            return true;
        }
Esempio n. 14
0
    bool debug( const BSONObj& o , int depth=0) {
        string prefix = "";
        for ( int i=0; i<depth; i++ ) {
            prefix += "\t\t\t";
        }

        int read = 4;

        try {
            cout << prefix << "--- new object ---\n";
            cout << prefix << "\t size : " << o.objsize() << "\n";

            // Note: this will recursively check each level of the bson and will also be called by
            // this function at each level. While inefficient, it shouldn't effect correctness.
            const Status status = validateBSON(o.objdata(), o.objsize());
            if (!status.isOK()) {
                cout << prefix << "\t OBJECT IS INVALID: " << status.reason() << '\n'
                     << prefix << "\t attempting to print as much as possible" << endl;
            }
            
            BSONObjIterator i(o);
            while ( i.more() ) {
                // This call verifies it is safe to call size() and fieldName() but doesn't check
                // whether the element extends past the end of the object. That is done below.
                BSONElement e = i.next(/*checkEnd=*/true);

                cout << prefix << "\t\t " << e.fieldName() << "\n"
                     << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size()
                     << endl;

                if ( ( read + e.size() ) > o.objsize() ) {
                    cout << prefix << " SIZE DOES NOT WORK" << endl;
                    return false;
                }
                read += e.size();
                try {
                    if ( e.isABSONObj() ) {
                        if ( ! debug( e.Obj() , depth + 1 ) ) {
                            //return false;
                            cout << prefix << "\t\t\t BAD BAD BAD" << endl;
                            
                            if ( e.size() < 1000 ) {
                                cout << "---\n" << e.Obj().hexDump() << "\n---" << endl;
                            }
                        }
                    }
                    else if ( e.type() == String && ! isValidUTF8( e.valuestr() ) ) {
                        cout << prefix << "\t\t\t" << "bad utf8 String!" << endl;
                    }
                    else if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) ) {
                        cout << prefix << "\t\t\t" << e << endl;
                    }
                }
                catch ( std::exception& e ) {
                    cout << prefix << "\t\t\t bad value: " << e.what() << endl;
                }
            }
        }
        catch ( std::exception& e ) {
            cout << prefix << "\tbad\t" << e.what() << endl;
            cout << "----\n" << o.hexDump() << "\n---" << endl;
        }
        return true;
    }
Esempio n. 15
0
        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
            Timer t;
            string ns = dbname + '.' + cmdObj.firstElement().valuestr();

            string key = cmdObj["key"].valuestrsafe();
            BSONObj keyPattern = BSON( key << 1 );

            BSONObj query = getQuery( cmdObj );

            int bufSize = BSONObjMaxUserSize - 4096;
            BufBuilder bb( bufSize );
            char * start = bb.buf();

            BSONArrayBuilder arr( bb );
            BSONElementSet values;

            long long nscanned = 0; // locations looked at
            long long nscannedObjects = 0; // full objects looked at
            long long n = 0; // matches
            MatchDetails md;

            NamespaceDetails * d = nsdetails( ns.c_str() );

            if ( ! d ) {
                result.appendArray( "values" , BSONObj() );
                result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) );
                return true;
            }

            shared_ptr<Cursor> cursor;
            if ( ! query.isEmpty() ) {
                cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query , BSONObj() );
            }
            else {

                // query is empty, so lets see if we can find an index
                // with the key so we don't have to hit the raw data
                NamespaceDetails::IndexIterator ii = d->ii();
                while ( ii.more() ) {
                    IndexDetails& idx = ii.next();

                    if ( d->isMultikey( ii.pos() - 1 ) )
                        continue;

                    if ( idx.inKeyPattern( key ) ) {
                        cursor = bestGuessCursor( ns.c_str() , BSONObj() , idx.keyPattern() );
                        if( cursor.get() ) break;
                    }

                }

                if ( ! cursor.get() )
                    cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query , BSONObj() );

            }

            
            assert( cursor );
            string cursorName = cursor->toString();
            
            auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns));

            while ( cursor->ok() ) {
                nscanned++;
                bool loadedObject = false;

                if ( ( !cursor->matcher() || cursor->matcher()->matchesCurrent( cursor.get() , &md ) ) &&
                    !cursor->getsetdup( cursor->currLoc() ) ) {
                    n++;

                    BSONElementSet temp;
                    loadedObject = ! cc->getFieldsDotted( key , temp );

                    for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) {
                        BSONElement e = *i;
                        if ( values.count( e ) )
                            continue;

                        int now = bb.len();

                        uassert(10044,  "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize );

                        arr.append( e );
                        BSONElement x( start + now );

                        values.insert( x );
                    }
                }

                if ( loadedObject || md._loadedObject )
                    nscannedObjects++;

                cursor->advance();

                if (!cc->yieldSometimes( ClientCursor::MaybeCovered )) {
                    cc.release();
                    break;
                }

                RARELY killCurrentOp.checkForInterrupt();
            }

            assert( start == bb.buf() );

            result.appendArray( "values" , arr.done() );

            {
                BSONObjBuilder b;
                b.appendNumber( "n" , n );
                b.appendNumber( "nscanned" , nscanned );
                b.appendNumber( "nscannedObjects" , nscannedObjects );
                b.appendNumber( "timems" , t.millis() );
                b.append( "cursor" , cursorName );
                result.append( "stats" , b.obj() );
            }

            return true;
        }
Esempio n. 16
0
        /**
         * actually applies a reduce, to a list of tuples (key, value).
         * After the call, tuples will hold a single tuple {"0": key, "1": value}
         */
        void JSReducer::_reduce( const BSONList& tuples , BSONObj& key , int& endSizeEstimate ) {
            uassert( 10074 ,  "need values" , tuples.size() );

            int sizeEstimate = ( tuples.size() * tuples.begin()->getField( "value" ).size() ) + 128;

            // need to build the reduce args: ( key, [values] )
            BSONObjBuilder reduceArgs( sizeEstimate );
            boost::scoped_ptr<BSONArrayBuilder>  valueBuilder;
            int sizeSoFar = 0;
            unsigned n = 0;
            for ( ; n<tuples.size(); n++ ) {
                BSONObjIterator j(tuples[n]);
                BSONElement keyE = j.next();
                if ( n == 0 ) {
                    reduceArgs.append( keyE );
                    key = keyE.wrap();
                    sizeSoFar = 5 + keyE.size();
                    valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "tuples" ) ));
                }

                BSONElement ee = j.next();

                uassert( 13070 , "value too large to reduce" , ee.size() < ( BSONObjMaxUserSize / 2 ) );

                if ( sizeSoFar + ee.size() > BSONObjMaxUserSize ) {
                    assert( n > 1 ); // if not, inf. loop
                    break;
                }

                valueBuilder->append( ee );
                sizeSoFar += ee.size();
            }
            assert(valueBuilder);
            valueBuilder->done();
            BSONObj args = reduceArgs.obj();

            Scope * s = _func.scope();

            s->invokeSafe( _func.func() , args );

            if ( s->type( "return" ) == Array ) {
                uasserted( 10075 , "reduce -> multiple not supported yet");
                return;
            }

            endSizeEstimate = key.objsize() + ( args.objsize() / tuples.size() );

            if ( n == tuples.size() )
                return;

            // the input list was too large, add the rest of elmts to new tuples and reduce again
            // note: would be better to use loop instead of recursion to avoid stack overflow
            BSONList x;
            for ( ; n < tuples.size(); n++ ) {
                x.push_back( tuples[n] );
            }
            BSONObjBuilder temp( endSizeEstimate );
            temp.append( key.firstElement() );
            s->append( temp , "1" , "return" );
            x.push_back( temp.obj() );
            _reduce( x , key , endSizeEstimate );
        }
Esempio n. 17
0
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
            Timer t;
            string ns = dbname + '.' + cmdObj.firstElement().valuestr();

            string key = cmdObj["key"].valuestrsafe();
            BSONObj keyPattern = BSON( key << 1 );

            BSONObj query = getQuery( cmdObj );

            int bufSize = BSONObjMaxUserSize - 4096;
            BufBuilder bb( bufSize );
            char * start = bb.buf();

            BSONArrayBuilder arr( bb );
            BSONElementSet values;

            long long nscanned = 0; // locations looked at
            long long nscannedObjects = 0; // full objects looked at
            long long n = 0; // matches
            MatchDetails md;

            Collection *cl = getCollection( ns );

            if ( ! cl ) {
                result.appendArray( "values" , BSONObj() );
                result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) );
                return true;
            }

            shared_ptr<Cursor> cursor;
            if ( ! query.isEmpty() ) {
                cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() );
            }
            else {

                // query is empty, so lets see if we can find an index
                // with the key so we don't have to hit the raw data
                for (int i = 0; i < cl->nIndexes(); i++) {
                    IndexDetails &idx = cl->idx(i);
                    if (cl->isMultikey(i)) {
                        continue;
                    }

                    if ( idx.inKeyPattern( key ) ) {
                        cursor = getBestGuessCursor( ns.c_str() ,
                                                     BSONObj() ,
                                                     idx.keyPattern() );
                        if( cursor.get() ) break;
                    }

                }

                if ( ! cursor.get() ) {
                    cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() );
                }

            }

            
            verify( cursor );
            string cursorName = cursor->toString();
            
            auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns));

            for ( ; cursor->ok(); cursor->advance() ) {
                nscanned++;
                bool loadedRecord = false;

                if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currPK() ) ) {
                    n++;

                    BSONObj holder;
                    BSONElementSet temp;
                    loadedRecord = ! cc->getFieldsDotted( key , temp, holder );

                    for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) {
                        BSONElement e = *i;
                        if ( values.count( e ) )
                            continue;

                        int now = bb.len();

                        uassert(10044,  "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize );

                        arr.append( e );
                        BSONElement x( start + now );

                        values.insert( x );
                    }
                }

                if ( loadedRecord || md.hasLoadedRecord() )
                    nscannedObjects++;

                RARELY killCurrentOp.checkForInterrupt();
            }

            verify( start == bb.buf() );

            result.appendArray( "values" , arr.done() );

            {
                BSONObjBuilder b;
                b.appendNumber( "n" , n );
                b.appendNumber( "nscanned" , nscanned );
                b.appendNumber( "nscannedObjects" , nscannedObjects );
                b.appendNumber( "timems" , t.millis() );
                b.append( "cursor" , cursorName );
                result.append( "stats" , b.obj() );
            }

            return true;
        }
Esempio n. 18
0
    bool run(OperationContext* txn,
             const string& dbname,
             BSONObj& cmdObj,
             int,
             string& errmsg,
             BSONObjBuilder& result) {
        Timer t;

        // ensure that the key is a string
        uassert(18510,
                mongoutils::str::stream() << "The first argument to the distinct command "
                                          << "must be a string but was a "
                                          << typeName(cmdObj["key"].type()),
                cmdObj["key"].type() == mongo::String);

        // ensure that the where clause is a document
        if (cmdObj["query"].isNull() == false && cmdObj["query"].eoo() == false) {
            uassert(18511,
                    mongoutils::str::stream() << "The query for the distinct command must be a "
                                              << "document but was a "
                                              << typeName(cmdObj["query"].type()),
                    cmdObj["query"].type() == mongo::Object);
        }

        string key = cmdObj["key"].valuestrsafe();
        BSONObj keyPattern = BSON(key << 1);

        BSONObj query = getQuery(cmdObj);

        int bufSize = BSONObjMaxUserSize - 4096;
        BufBuilder bb(bufSize);
        char* start = bb.buf();

        BSONArrayBuilder arr(bb);
        BSONElementSet values;

        const string ns = parseNs(dbname, cmdObj);
        AutoGetCollectionForRead ctx(txn, ns);

        Collection* collection = ctx.getCollection();
        if (!collection) {
            result.appendArray("values", BSONObj());
            result.append("stats", BSON("n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0));
            return true;
        }

        auto statusWithPlanExecutor =
            getExecutorDistinct(txn, collection, query, key, PlanExecutor::YIELD_AUTO);
        if (!statusWithPlanExecutor.isOK()) {
            uasserted(17216,
                      mongoutils::str::stream() << "Can't get executor for query " << query << ": "
                                                << statusWithPlanExecutor.getStatus().toString());
            return 0;
        }

        unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue());

        BSONObj obj;
        PlanExecutor::ExecState state;
        while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
            // Distinct expands arrays.
            //
            // If our query is covered, each value of the key should be in the index key and
            // available to us without this.  If a collection scan is providing the data, we may
            // have to expand an array.
            BSONElementSet elts;
            obj.getFieldsDotted(key, elts);

            for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) {
                BSONElement elt = *it;
                if (values.count(elt)) {
                    continue;
                }
                int currentBufPos = bb.len();

                uassert(17217,
                        "distinct too big, 16mb cap",
                        (currentBufPos + elt.size() + 1024) < bufSize);

                arr.append(elt);
                BSONElement x(start + currentBufPos);
                values.insert(x);
            }
        }

        // Get summary information about the plan.
        PlanSummaryStats stats;
        Explain::getSummaryStats(*exec, &stats);

        verify(start == bb.buf());

        result.appendArray("values", arr.done());

        {
            BSONObjBuilder b;
            b.appendNumber("n", stats.nReturned);
            b.appendNumber("nscanned", stats.totalKeysExamined);
            b.appendNumber("nscannedObjects", stats.totalDocsExamined);
            b.appendNumber("timems", t.millis());
            b.append("planSummary", Explain::getPlanSummary(exec.get()));
            result.append("stats", b.obj());
        }

        return true;
    }