Ejemplo n.º 1
0
    bool CoveredIndexMatcher::matches( const BSONObj& key, const DiskLoc& recLoc,
                                       MatchDetails* details, bool keyUsable ) const {

        LOG(5) << "CoveredIndexMatcher::matches() " << key.toString() << ' ' << recLoc.toString() << ' ' << keyUsable << endl;

        dassert( key.isValid() );

        if ( details )
            details->resetOutput();

        if ( keyUsable ) {
            if ( !_keyMatcher.matches(key, details ) ) {
                return false;
            }
            bool needRecordForDetails = details && details->needRecord();
            if ( !_needRecord && !needRecordForDetails ) {
                return true;
            }
        }

        if ( details )
            details->setLoadedRecord( true );

        BSONObj obj = recLoc.obj();
        bool res =
            _docMatcher->matches( obj, details ) &&
            !isOrClauseDup( obj );
        LOG(5) << "CoveredIndexMatcher _docMatcher->matches() returns " << res << endl;
        return res;
    }
Ejemplo n.º 2
0
    bool Helpers::findById(Client& c, const char *ns, BSONObj query, BSONObj& result ,
                           bool * nsFound , bool * indexFound ) {
        Lock::assertAtLeastReadLocked(ns);
        Database *database = c.database();
        verify( database );
        NamespaceDetails *d = database->namespaceIndex.details(ns);
        if ( ! d )
            return false;
        if ( nsFound )
            *nsFound = 1;

        int idxNo = d->findIdIndex();
        if ( idxNo < 0 )
            return false;
        if ( indexFound )
            *indexFound = 1;

        IndexDetails& i = d->idx( idxNo );

        BSONObj key = i.getKeyFromQuery( query );

        DiskLoc loc = QueryRunner::fastFindSingle(i, key);
        if ( loc.isNull() )
            return false;
        result = loc.obj();
        return true;
    }
Ejemplo n.º 3
0
    DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc ){
        LogIndentLevel lil;
        
        if ( eLoc.getOfs() <= 0 ){
            error() << "invalid extent ofs: " << eLoc.getOfs() << endl;
            return DiskLoc();
        }
        

        MongoDataFile * mdf = db->getFile( eLoc.a() );

        Extent * e = mdf->debug_getExtent( eLoc );
        if ( ! e->isOk() ){
            warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl;
        }
        
        log() << "length:" << e->length << endl;
        
        LogIndentLevel lil2;
        
        DiskLoc loc = forward ? e->firstRecord : e->lastRecord;
        while ( ! loc.isNull() ){
            if ( loc.getOfs() <= 0 ){
                error() << "offset is 0 for record which should be impossible" << endl;
                break;
            }
            log() << loc << endl;
            Record* rec = loc.rec();
            log() << loc.obj() << endl;
            loc = forward ? rec->getNext( loc ) : rec->getPrev( loc );
        }
        return forward ? e->xnext : e->xprev;
        
    }
Ejemplo n.º 4
0
 // Consider the point in loc, and keep it if it's within _maxDistance (and we have space for
 // it)
 void consider(const DiskLoc& loc) {
     if (limitReached()) return;
     Point p(loc.obj().getFieldDotted(_geoField));
     if (distance(_near, p) > _maxDistance)
         return;
     _locs.push_back(loc);
 }
Ejemplo n.º 5
0
 /* fetch a single object from collection ns that matches query
    set your db SavedContext first
 */
 bool Helpers::findOne(Collection* collection, const BSONObj &query, BSONObj& result, bool requireIndex) {
     DiskLoc loc = findOne( collection, query, requireIndex );
     if ( loc.isNull() )
         return false;
     result = loc.obj();
     return true;
 }
Ejemplo n.º 6
0
    PlanStage::StageState TextStage::returnResults(WorkingSetID* out) {
        if (_scoreIterator == _scores.end()) {
            _internalState = DONE;
            return PlanStage::IS_EOF;
        }

        // Filter for phrases and negative terms, score and truncate.
        DiskLoc loc = _scoreIterator->first;
        double score = _scoreIterator->second;
        _scoreIterator++;

        // Ignore non-matched documents.
        if (score < 0) {
            return PlanStage::NEED_TIME;
        }

        // Filter for phrases and negated terms
        if (_params.query.hasNonTermPieces()) {
            if (!_ftsMatcher.matchesNonTerm(loc.obj())) {
                return PlanStage::NEED_TIME;
            }
        }

        *out = _ws->allocate();
        WorkingSetMember* member = _ws->get(*out);
        member->loc = loc;
        member->obj = member->loc.obj();
        member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
        member->addComputed(new TextScoreComputedData(score));
        return PlanStage::ADVANCED;
    }
Ejemplo n.º 7
0
 /* fetch a single object from collection ns that matches query
    set your db SavedContext first
 */
 bool Helpers::findOne(const StringData& ns, const BSONObj &query, BSONObj& result, bool requireIndex) {
     DiskLoc loc = findOne( ns, query, requireIndex );
     if ( loc.isNull() )
         return false;
     result = loc.obj();
     return true;
 }
Ejemplo n.º 8
0
    long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback, bool fromMigrate ) {
        BSONObj keya , keyb;
        BSONObj minClean = toKeyFormat( min , keya );
        BSONObj maxClean = toKeyFormat( max , keyb );
        verify( keya == keyb );

        Client::Context ctx(ns);

        shared_ptr<Cursor> c;
        auto_ptr<ClientCursor> cc;
        {
            NamespaceDetails* nsd = nsdetails( ns.c_str() );
            if ( ! nsd )
                return 0;
            
            int ii = nsd->findIndexByKeyPattern( keya );
            verify( ii >= 0 );
            
            IndexDetails& i = nsd->idx( ii );
            
            c.reset( BtreeCursor::make( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) );
            cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
            cc->setDoingDeletes( true );
        }

        long long num = 0;

        while ( cc->ok() ) {

            if ( yield && ! cc->yieldSometimes( ClientCursor::WillNeed) ) {
                // cursor got finished by someone else, so we're done
                cc.release(); // if the collection/db is dropped, cc may be deleted
                break;
            }

            if ( ! cc->ok() )
                break;

            DiskLoc rloc = cc->currLoc();

            if ( callback )
                callback->goingToDelete( cc->current() );

            cc->advance();
            c->prepareToTouchEarlierIterate();

            logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() , 0 , 0 , fromMigrate );
            theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc);
            num++;

            c->recoverFromTouchingEarlierIterate();

            getDur().commitIfNeeded();


        }

        return num;
    }
Ejemplo n.º 9
0
 void IndexSpec::reset( const DiskLoc& loc ){
     info = loc.obj();
     keyPattern = info["key"].embeddedObjectUserCheck();
     if ( keyPattern.objsize() == 0 ) {
         out() << info.toString() << endl;
         assert(false);
     }
     _init();
 }
Ejemplo n.º 10
0
    void ClientCursor::storeOpForSlave( DiskLoc last ) {
        if ( ! ( _queryOptions & QueryOption_OplogReplay ))
            return;

        if ( last.isNull() )
            return;

        BSONElement e = last.obj()["ts"];
        if ( e.type() == Date || e.type() == Timestamp )
            _slaveReadTill = e._opTime();
    }
Ejemplo n.º 11
0
    /**
     * analyzeDiskStorage helper which processes a single record.
     */
    void processRecord(const DiskLoc& dl, const DiskLoc& prevDl, const Record* r, int extentOfs,
                       const AnalyzeParams& params, vector<DiskStorageData>& sliceData,
                       BSONArrayBuilder* recordsArrayBuilder) {
        killCurrentOp.checkForInterrupt();

        BSONObj obj = dl.obj();
        int recBytes = r->lengthWithHeaders();
        double characteristicFieldValue = 0;
        bool hasCharacteristicField = extractCharacteristicFieldValue(obj, params,
                                                                      characteristicFieldValue);
        bool isLocatedBeforePrevious = dl.a() < prevDl.a();

        RecPos pos = RecPos::from(dl.getOfs(), recBytes, extentOfs, params);
        bool spansRequestedArea = false;
        for (RecPos::SliceIterator it = pos.iterateSlices(); !it.end(); ++it) {
            spansRequestedArea = true;
            DiskStorageData& slice = sliceData[it->sliceNum];
            slice.numEntries += it->ratioHere;
            slice.recBytes += it->sizeHere;
            slice.bsonBytes += static_cast<long long>(it->ratioHere * obj.objsize());
            if (hasCharacteristicField) {
                slice.characteristicCount += it->ratioHere;
                slice.characteristicSum += it->ratioHere * characteristicFieldValue;
            }
            if (isLocatedBeforePrevious) {
                slice.outOfOrderRecs += it->ratioHere;
            }
        }

        if (recordsArrayBuilder != NULL && spansRequestedArea) {
            DEV {
                int startsAt = dl.getOfs() - extentOfs;
                int endsAt = startsAt + recBytes;
                verify((startsAt < params.startOfs && endsAt > params.startOfs) ||
                       (startsAt < params.endOfs && endsAt >= params.endOfs) ||
                       (startsAt >= params.startOfs && endsAt < params.endOfs));
            }
            BSONObjBuilder recordBuilder(recordsArrayBuilder->subobjStart());
            recordBuilder.append("ofs", dl.getOfs() - extentOfs);
            recordBuilder.append("recBytes", recBytes);
            recordBuilder.append("bsonBytes", obj.objsize());
            recordBuilder.append("_id", obj["_id"]);
            if (hasCharacteristicField) {
                recordBuilder.append("characteristic", characteristicFieldValue);
            }
            recordBuilder.doneFast();
        }
Ejemplo n.º 12
0
    long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback ) {
        BSONObj keya , keyb;
        BSONObj minClean = toKeyFormat( min , keya );
        BSONObj maxClean = toKeyFormat( max , keyb );
        assert( keya == keyb );

        Client::Context ctx(ns);
        NamespaceDetails* nsd = nsdetails( ns.c_str() );
        if ( ! nsd )
            return 0;

        int ii = nsd->findIndexByKeyPattern( keya );
        assert( ii >= 0 );

        long long num = 0;

        IndexDetails& i = nsd->idx( ii );

        shared_ptr<Cursor> c( new BtreeCursor( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) );
        auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
        cc->setDoingDeletes( true );

        while ( c->ok() ) {
            DiskLoc rloc = c->currLoc();
            BSONObj key = c->currKey();

            if ( callback )
                callback->goingToDelete( c->current() );

            c->advance();
            c->noteLocation();

            logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() );
            theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc);
            num++;

            c->checkLocation();

            if ( yield && ! cc->yieldSometimes() ) {
                // cursor got finished by someone else, so we're done
                cc.release(); // if the collection/db is dropped, cc may be deleted
                break;
            }
        }

        return num;
    }
Ejemplo n.º 13
0
    bool BtreeBasedAccessMethod::removeOneKey(const BSONObj& key, const DiskLoc& loc) {
        bool ret = false;

        try {
            ret = _newInterface->unindex(key, loc);
        } catch (AssertionException& e) {
            problem() << "Assertion failure: _unindex failed "
                << _descriptor->indexNamespace() << endl;
            out() << "Assertion failure: _unindex failed: " << e.what() << '\n';
            out() << "  obj:" << loc.obj().toString() << '\n';
            out() << "  key:" << key.toString() << '\n';
            out() << "  dl:" << loc.toString() << endl;
            logContext();
        }

        return ret;
    }
Ejemplo n.º 14
0
    /**
     * @param loc the location in system.indexes where the index spec is
     */
    void NOINLINE_DECL insert_makeIndex(Collection* collectionToIndex,
                                        const DiskLoc& loc,
                                        bool mayInterrupt) {
        uassert(13143,
                "can't create index on system.indexes",
                collectionToIndex->ns().coll() != "system.indexes");

        BSONObj info = loc.obj();
        std::string idxName = info["name"].valuestr();

        // Set curop description before setting indexBuildInProg, so that there's something
        // commands can find and kill as soon as indexBuildInProg is set. Only set this if it's a
        // killable index, so we don't overwrite commands in currentOp.
        if (mayInterrupt) {
            cc().curop()->setQuery(info);
        }

        IndexCatalog::IndexBuildBlock indexBuildBlock( collectionToIndex->getIndexCatalog(), idxName, loc );
        verify( indexBuildBlock.indexDetails() );

        try {
            buildAnIndex( collectionToIndex->ns(), collectionToIndex->details(),
                          *indexBuildBlock.indexDetails(), mayInterrupt);
            indexBuildBlock.success();
        }
        catch (DBException& e) {
            // save our error msg string as an exception or dropIndexes will overwrite our message
            LastError *le = lastError.get();
            int savecode = 0;
            string saveerrmsg;
            if ( le ) {
                savecode = le->code;
                saveerrmsg = le->msg;
            }
            else {
                savecode = e.getCode();
                saveerrmsg = e.what();
            }

            verify(le && !saveerrmsg.empty());
            setLastError(savecode,saveerrmsg.c_str());
            throw;
        }
    }
Ejemplo n.º 15
0
    // Remove the provided doc from the index.
    Status BtreeBasedAccessMethod::remove(const BSONObj &obj, const DiskLoc& loc,
        const InsertDeleteOptions &options, int64_t* numDeleted) {

        BSONObjSet keys;
        getKeys(obj, &keys);
        *numDeleted = 0;

        for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
            bool thisKeyOK = removeOneKey(*i, loc);

            if (thisKeyOK) {
                ++*numDeleted;
            } else if (options.logIfError) {
                log() << "unindex failed (key too big?) " << _descriptor->indexNamespace()
                      << " key: " << *i << " " << loc.obj()["_id"] << endl;
            }
        }

        return Status::OK();
    }
Ejemplo n.º 16
0
    bool CoveredIndexMatcher::matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details , bool keyUsable ) {
        if ( details )
            details->reset();

        if ( keyUsable ) {

            if ( !_keyMatcher.matches(key, details ) ) {
                return false;
            }

            if ( ! _needRecord ) {
                return true;
            }

        }

        if ( details )
            details->loadedObject = true;

        return _docMatcher->matches(recLoc.obj() , details );
    }
Ejemplo n.º 17
0
    /* ns:      namespace, e.g. <database>.<collection>
       pattern: the "where" clause / criteria
       justOne: stop after 1 match
       god:     allow access to system namespaces, and don't yield
    */
    long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) {
        if( !god ) {
            if ( strstr(ns, ".system.") ) {
                /* note a delete from system.indexes would corrupt the db
                if done here, as there are pointers into those objects in
                NamespaceDetails.
                */
                uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) );
            }
            if ( strchr( ns , '$' ) ) {
                log() << "cannot delete from collection with reserved $ in name: " << ns << endl;
                uassert( 10100 ,  "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 );
            }
        }

        {
            NamespaceDetails *d = nsdetails( ns );
            if ( ! d )
                return 0;
            uassert( 10101 ,  "can't remove from a capped collection" , ! d->capped );
        }

        long long nDeleted = 0;

        shared_ptr< Cursor > creal = NamespaceDetailsTransient::getCursor( ns, pattern, BSONObj(), false, 0 );

        if( !creal->ok() )
            return nDeleted;

        shared_ptr< Cursor > cPtr = creal;
        auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) );
        cc->setDoingDeletes( true );

        CursorId id = cc->cursorid();

        bool justOne = justOneOrig;
        bool canYield = !god && !(creal->matcher() && creal->matcher()->docMatcher().atomic());

        do {
            // TODO: we can generalize this I believe
            //       
            bool willNeedRecord = (creal->matcher() && creal->matcher()->needRecord()) || pattern.isEmpty() || isSimpleIdQuery( pattern );
            if ( ! willNeedRecord ) {
                // TODO: this is a total hack right now
                // check if the index full encompasses query
                
                if ( pattern.nFields() == 1 && 
                     str::equals( pattern.firstElement().fieldName() , creal->indexKeyPattern().firstElement().fieldName() ) )
                    willNeedRecord = true;
            }
            
            if ( canYield && ! cc->yieldSometimes( willNeedRecord ? ClientCursor::WillNeed : ClientCursor::MaybeCovered ) ) {
                cc.release(); // has already been deleted elsewhere
                // TODO should we assert or something?
                break;
            }
            if ( !cc->ok() ) {
                break; // if we yielded, could have hit the end
            }

            // this way we can avoid calling updateLocation() every time (expensive)
            // as well as some other nuances handled
            cc->setDoingDeletes( true );

            DiskLoc rloc = cc->currLoc();
            BSONObj key = cc->currKey();

            bool match = creal->currentMatches();
            bool dup = cc->c()->getsetdup(rloc);

            if ( ! cc->advance() )
                justOne = true;

            if ( ! match )
                continue;

            assert( !dup ); // can't be a dup, we deleted it!

            if ( !justOne ) {
                /* NOTE: this is SLOW.  this is not good, noteLocation() was designed to be called across getMore
                    blocks.  here we might call millions of times which would be bad.
                    */
                cc->c()->prepareToTouchEarlierIterate();
            }

            if ( logop ) {
                BSONElement e;
                if( BSONObj( rloc.rec() ).getObjectID( e ) ) {
                    BSONObjBuilder b;
                    b.append( e );
                    bool replJustOne = true;
                    logOp( "d", ns, b.done(), 0, &replJustOne );
                }
                else {
                    problem() << "deleted object without id, not logging" << endl;
                }
            }

            if ( rs )
                rs->goingToDelete( rloc.obj() /*cc->c->current()*/ );

            theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc);
            nDeleted++;
            if ( justOne ) {
                break;
            }
            cc->c()->recoverFromTouchingEarlierIterate();
         
            if( !god ) 
                getDur().commitIfNeeded();

            if( debug && god && nDeleted == 100 ) 
                log() << "warning high number of deletes with god=true which could use significant memory" << endl;
        }
        while ( cc->ok() );

        if ( cc.get() && ClientCursor::find( id , false ) == 0 ) {
            // TODO: remove this and the id declaration above if this doesn't trigger
            //       if it does, then i'm very confused (ERH 06/2011)
            error() << "this should be impossible" << endl;
            printStackTrace();
            cc.release();
        }

        return nDeleted;
    }
Ejemplo n.º 18
0
    DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc , Writer& w ){
        LogIndentLevel lil;
        
        if ( eLoc.getOfs() <= 0 ){
            error() << "invalid extent ofs: " << eLoc.getOfs() << endl;
            return DiskLoc();
        }
        

        MongoDataFile * mdf = db->getFile( eLoc.a() );

        Extent * e = mdf->debug_getExtent( eLoc );
        if ( ! e->isOk() ){
            warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl;
        }
        
        log() << "length:" << e->length << endl;
        
        LogIndentLevel lil2;
        
        set<DiskLoc> seen;

        DiskLoc loc = forward ? e->firstRecord : e->lastRecord;
        while ( ! loc.isNull() ){
            
            if ( ! seen.insert( loc ).second ) {
                error() << "infinite loop in extend, seen: " << loc << " before" << endl;
                break;
            }

            if ( loc.getOfs() <= 0 ){
                error() << "offset is 0 for record which should be impossible" << endl;
                break;
            }
            log(1) << loc << endl;
            Record* rec = loc.rec();
            BSONObj obj;
            try {
                obj = loc.obj();
                assert( obj.valid() );
                LOG(1) << obj << endl;
                w( obj );
            }
            catch ( std::exception& e ) {
                log() << "found invalid document @ " << loc << " " << e.what() << endl;
                if ( ! obj.isEmpty() ) {
                    try {
                        BSONElement e = obj.firstElement();
                        stringstream ss;
                        ss << "first element: " << e;
                        log() << ss.str();
                    }
                    catch ( std::exception& ) {
                    }
                }
            }
            loc = forward ? rec->getNext( loc ) : rec->getPrev( loc );
        }
        return forward ? e->xnext : e->xprev;
        
    }
Ejemplo n.º 19
0
    PlanStage::StageState TextStage::fillOutResults() {
        Database* db = cc().database();
        Collection* collection = db->getCollection( _params.ns );
        if (NULL == collection) {
            warning() << "TextStage params namespace error";
            return PlanStage::FAILURE;
        }
        vector<IndexDescriptor*> idxMatches;
        collection->getIndexCatalog()->findIndexByType("text", idxMatches);
        if (1 != idxMatches.size()) {
            warning() << "Expected exactly one text index";
            return PlanStage::FAILURE;
        }

        // Get all the index scans for each term in our query.
        OwnedPointerVector<PlanStage> scanners;
        for (size_t i = 0; i < _params.query.getTerms().size(); i++) {
            const string& term = _params.query.getTerms()[i];
            IndexScanParams params;
            params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT, term,
                                                                 _params.indexPrefix);
            params.bounds.endKey = FTSIndexFormat::getIndexKey(0, term, _params.indexPrefix);
            params.bounds.endKeyInclusive = true;
            params.bounds.isSimpleRange = true;
            params.descriptor = idxMatches[0];
            params.direction = -1;
            IndexScan* ixscan = new IndexScan(params, _ws, NULL);
            scanners.mutableVector().push_back(ixscan);
        }

        // Map: diskloc -> aggregate score for doc.
        typedef unordered_map<DiskLoc, double, DiskLoc::Hasher> ScoreMap;
        ScoreMap scores;

        // For each index scan, read all results and store scores.
        size_t currentIndexScanner = 0;
        while (currentIndexScanner < scanners.size()) {
            BSONObj keyObj;
            DiskLoc loc;

            WorkingSetID id;
            PlanStage::StageState state = scanners.vector()[currentIndexScanner]->work(&id);

            if (PlanStage::ADVANCED == state) {
                WorkingSetMember* wsm = _ws->get(id);
                IndexKeyDatum& keyDatum = wsm->keyData.back();
                filterAndScore(keyDatum.keyData, wsm->loc, &scores[wsm->loc]);
                _ws->free(id);
            }
            else if (PlanStage::IS_EOF == state) {
                // Done with this scan.
                ++currentIndexScanner;
            }
            else if (PlanStage::NEED_FETCH == state) {
                // We're calling work() on ixscans and they have no way to return a fetch.
                verify(false);
            }
            else if (PlanStage::NEED_TIME == state) {
                // We are a blocking stage, so ignore scanner's request for more time.
            }
            else {
                verify(PlanStage::FAILURE == state);
                warning() << "error from index scan during text stage: invalid FAILURE state";
                return PlanStage::FAILURE;
            }
        }

        // Filter for phrases and negative terms, score and truncate.
        for (ScoreMap::iterator i = scores.begin(); i != scores.end(); ++i) {
            DiskLoc loc = i->first;
            double score = i->second;

            // Ignore non-matched documents.
            if (score < 0) {
                continue;
            }

            // Filter for phrases and negated terms
            if (_params.query.hasNonTermPieces()) {
                if (!_ftsMatcher.matchesNonTerm(loc.obj())) {
                    continue;
                }
            }

            // Add results to working set as LOC_AND_UNOWNED_OBJ initially.
            // On invalidation, we copy the object and change the state to
            // OWNED_OBJ.
            // Fill out a WSM.
            WorkingSetID id = _ws->allocate();
            WorkingSetMember* member = _ws->get(id);
            member->loc = loc;
            member->obj = member->loc.obj();
            member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
            member->addComputed(new TextScoreComputedData(score));

            _results.push_back(id);
            _wsidByDiskLoc[member->loc] = id;
        }

        _filledOutResults = true;

        if (_results.size() == 0) {
            return PlanStage::IS_EOF;
        }
        return PlanStage::NEED_TIME;
    }
Ejemplo n.º 20
0
    /* note: this is only (as-is) called for

             - not multi
             - not mods is indexed
             - not upsert
    */
    static UpdateResult _updateById(bool isOperatorUpdate,
                                    int idIdxNo,
                                    ModSet* mods,
                                    NamespaceDetails* d,
                                    NamespaceDetailsTransient *nsdt,
                                    bool su,
                                    const char* ns,
                                    const BSONObj& updateobj,
                                    BSONObj patternOrig,
                                    bool logop,
                                    OpDebug& debug,
                                    bool fromMigrate = false) {

        DiskLoc loc;
        {
            IndexDetails& i = d->idx(idIdxNo);
            BSONObj key = i.getKeyFromQuery( patternOrig );
            loc = QueryRunner::fastFindSingle(i, key);
            if( loc.isNull() ) {
                // no upsert support in _updateById yet, so we are done.
                return UpdateResult( 0 , 0 , 0 , BSONObj() );
            }
        }
        Record* r = loc.rec();

        if ( cc().allowedToThrowPageFaultException() && ! r->likelyInPhysicalMemory() ) {
            throw PageFaultException( r );
        }

        /* look for $inc etc.  note as listed here, all fields to inc must be this type, you can't set some
           regular ones at the moment. */
        BSONObj newObj;
        if ( isOperatorUpdate ) {
            const BSONObj& onDisk = loc.obj();
            auto_ptr<ModSetState> mss = mods->prepare( onDisk, false /* not an insertion */ );

            if( mss->canApplyInPlace() ) {
                mss->applyModsInPlace(true);
                debug.fastmod = true;
                DEBUGUPDATE( "\t\t\t updateById doing in place update" );

                newObj = onDisk;
            }
            else {
                newObj = mss->createNewFromMods();
                checkTooLarge(newObj);
                verify(nsdt);
                theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug);
            }

            if ( logop ) {
                DEV verify( mods->size() );
                BSONObj pattern = patternOrig;
                BSONObj logObj = mss->getOpLogRewrite();
                DEBUGUPDATE( "\t rewrite update: " << logObj );

                // It is possible that the entire mod set was a no-op over this document.  We
                // would have an empty log record in that case. If we call logOp, with an empty
                // record, that would be replicated as "clear this record", which is not what
                // we want. Therefore, to get a no-op in the replica, we simply don't log.
                if ( logObj.nFields() ) {
                    logOp("u", ns, logObj, &pattern, 0, fromMigrate, &newObj );
                }
            }
            return UpdateResult( 1 , 1 , 1 , BSONObj() );

        } // end $operator update

        // regular update
        BSONElementManipulator::lookForTimestamps( updateobj );
        checkNoMods( updateobj );
        verify(nsdt);
        theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug );
        if ( logop ) {
            logOp("u", ns, updateobj, &patternOrig, 0, fromMigrate, &updateobj );
        }
        return UpdateResult( 1 , 0 , 1 , BSONObj() );
    }
Ejemplo n.º 21
0
    UpdateResult update(const UpdateRequest& request, OpDebug* opDebug, UpdateDriver* driver) {

        LOG(3) << "processing update : " << request;
        const NamespaceString& nsString = request.getNamespaceString();

        validateUpdate( nsString.ns().c_str(), request.getUpdates(), request.getQuery() );

        NamespaceDetails* nsDetails = nsdetails( nsString.ns() );
        NamespaceDetailsTransient* nsDetailsTransient =
            &NamespaceDetailsTransient::get( nsString.ns().c_str() );

        // TODO: This seems a bit circuitious.
        opDebug->updateobj = request.getUpdates();

        driver->refreshIndexKeys( nsDetailsTransient->indexKeys() );

        shared_ptr<Cursor> cursor = getOptimizedCursor(
            nsString.ns(), request.getQuery(), BSONObj(), request.getQueryPlanSelectionPolicy() );

        // If the update was marked with '$isolated' (a.k.a '$atomic'), we are not allowed to
        // yield while evaluating the update loop below.
        //
        // TODO: Old code checks this repeatedly within the update loop. Is that necessary? It seems
        // that once atomic should be always atomic.
        const bool isolated =
            cursor->ok() &&
            cursor->matcher() &&
            cursor->matcher()->docMatcher().atomic();

        // The 'cursor' the optimizer gave us may contain query plans that generate duplicate
        // diskloc's. We set up here the mechanims that will prevent us from processing those
        // twice if we see them. We also set up a 'ClientCursor' so that we can support
        // yielding.
        //
        // TODO: Is it valid to call this on a non-ok cursor?
        const bool dedupHere = cursor->autoDedup();

        //
        // We'll start assuming we have one or more documents for this update. (Othwerwise,
        // we'll fallback to upserting.)
        //

        // We record that this will not be an upsert, in case a mod doesn't want to be applied
        // when in strict update mode.
        driver->setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT );

        // Let's fetch each of them and pipe them through the update expression, making sure to
        // keep track of the necessary stats. Recall that we'll be pulling documents out of
        // cursors and some of them do not deduplicate the entries they generate. We have
        // deduping logic in here, too -- for now.
        unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs;
        int numMatched = 0;

        // Reset these counters on each call. We might re-enter this function to retry this
        // update if we throw a page fault exception below, and we rely on these counters
        // reflecting only the actions taken locally. In particlar, we must have the no-op
        // counter reset so that we can meaningfully comapre it with numMatched above.
        opDebug->nscanned = 0;
        opDebug->nupdateNoops = 0;

        Client& client = cc();

        mutablebson::Document doc;
        mutablebson::DamageVector damages;

        // If we are going to be yielding, we will need a ClientCursor scoped to this loop. We
        // only loop as long as the underlying cursor is OK.
        for ( auto_ptr<ClientCursor> clientCursor; cursor->ok(); ) {

            // If we haven't constructed a ClientCursor, and if the client allows us to throw
            // page faults, and if we are referring to a location that is likely not in
            // physical memory, then throw a PageFaultException. The entire operation will be
            // restarted.
            if ( clientCursor.get() == NULL &&
                 client.allowedToThrowPageFaultException() &&
                 !cursor->currLoc().isNull() &&
                 !cursor->currLoc().rec()->likelyInPhysicalMemory() ) {

                // We should never throw a PFE if we have already updated items. The numMatched
                // variable includes no-ops, which do not prevent us from raising a PFE, so if
                // numMatched is non-zero, we are still OK to throw as long all matched items
                // resulted in a no-op.
                dassert((numMatched == 0) || (numMatched == opDebug->nupdateNoops));

                throw PageFaultException( cursor->currLoc().rec() );
            }

            if ( !isolated && opDebug->nscanned != 0 ) {

                // We are permitted to yield. To do so we need a ClientCursor, so create one
                // now if we have not yet done so.
                if ( !clientCursor.get() )
                    clientCursor.reset(
                        new ClientCursor( QueryOption_NoCursorTimeout, cursor, nsString.ns() ) );

                // Ask the client cursor to yield. We get two bits of state back: whether or not
                // we yielded, and whether or not we correctly recovered from yielding.
                bool yielded = false;
                const bool recovered = clientCursor->yieldSometimes(
                    ClientCursor::WillNeed, &yielded );

                if ( !recovered ) {
                    // If we failed to recover from the yield, then the ClientCursor is already
                    // gone. Release it so we don't destroy it a second time.
                    clientCursor.release();
                    break;
                }

                if ( !cursor->ok() ) {
                    // If the cursor died while we were yielded, just get out of the update loop.
                    break;
                }

                if ( yielded ) {
                    // We yielded and recovered OK, and our cursor is still good. Details about
                    // our namespace may have changed while we were yielded, so we re-acquire
                    // them here. If we can't do so, escape the update loop. Otherwise, refresh
                    // the driver so that it knows about what is currently indexed.
                    nsDetails = nsdetails( nsString.ns() );
                    if ( !nsDetails )
                        break;
                    nsDetailsTransient = &NamespaceDetailsTransient::get( nsString.ns().c_str() );

                    // TODO: This copies the index keys, but it may not need to do so.
                    driver->refreshIndexKeys( nsDetailsTransient->indexKeys() );
                }

            }

            // Let's fetch the next candidate object for this update.
            Record* record = cursor->_current();
            DiskLoc loc = cursor->currLoc();
            const BSONObj oldObj = loc.obj();

            // We count how many documents we scanned even though we may skip those that are
            // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for
            // that reason.
            opDebug->nscanned++;

            // Skips this document if it:
            // a) doesn't match the query portion of the update
            // b) was deemed duplicate by the underlying cursor machinery
            //
            // Now, if we are going to update the document,
            // c) we don't want to do so while the cursor is at it, as that may invalidate
            // the cursor. So, we advance to next document, before issuing the update.
            MatchDetails matchDetails;
            matchDetails.requestElemMatchKey();
            if ( !cursor->currentMatches( &matchDetails ) ) {
                // a)
                cursor->advance();
                continue;
            }
            else if ( cursor->getsetdup( loc ) && dedupHere ) {
                // b)
                cursor->advance();
                continue;
            }
            else if (!driver->isDocReplacement() && request.isMulti()) {
                // c)
                cursor->advance();
                if ( dedupHere ) {
                    if ( seenLocs.count( loc ) ) {
                        continue;
                    }
                }

                // There are certain kind of cursors that hold multiple pointers to data
                // underneath. $or cursors is one example. In a $or cursor, it may be the case
                // that when we did the last advance(), we finished consuming documents from
                // one of $or child and started consuming the next one. In that case, it is
                // possible that the last document of the previous child is the same as the
                // first document of the next (see SERVER-5198 and jstests/orp.js).
                //
                // So we advance the cursor here until we see a new diskloc.
                //
                // Note that we won't be yielding, and we may not do so for a while if we find
                // a particularly duplicated sequence of loc's. That is highly unlikely,
                // though.  (See SERVER-5725, if curious, but "stage" based $or will make that
                // ticket moot).
                while( cursor->ok() && loc == cursor->currLoc() ) {
                    cursor->advance();
                }
            }

            // For some (unfortunate) historical reasons, not all cursors would be valid after
            // a write simply because we advanced them to a document not affected by the write.
            // To protect in those cases, not only we engaged in the advance() logic above, but
            // we also tell the cursor we're about to write a document that we've just seen.
            // prepareToTouchEarlierIterate() requires calling later
            // recoverFromTouchingEarlierIterate(), so we make a note here to do so.
            bool touchPreviousDoc = request.isMulti() && cursor->ok();
            if ( touchPreviousDoc ) {
                if ( clientCursor.get() )
                    clientCursor->setDoingDeletes( true );
                cursor->prepareToTouchEarlierIterate();
            }

            // Found a matching document
            numMatched++;

            // Ask the driver to apply the mods. It may be that the driver can apply those "in
            // place", that is, some values of the old document just get adjusted without any
            // change to the binary layout on the bson layer. It may be that a whole new
            // document is needed to accomodate the new bson layout of the resulting document.
            doc.reset( oldObj, mutablebson::Document::kInPlaceEnabled );
            BSONObj logObj;

            // If there was a matched field, obtain it.
            string matchedField;
            if (matchDetails.hasElemMatchKey())
                matchedField = matchDetails.elemMatchKey();

            Status status = driver->update( matchedField, &doc, &logObj );
            if ( !status.isOK() ) {
                uasserted( 16837, status.reason() );
            }

            // If the driver applied the mods in place, we can ask the mutable for what
            // changed. We call those changes "damages". :) We use the damages to inform the
            // journal what was changed, and then apply them to the original document
            // ourselves. If, however, the driver applied the mods out of place, we ask it to
            // generate a new, modified document for us. In that case, the file manager will
            // take care of the journaling details for us.
            //
            // This code flow is admittedly odd. But, right now, journaling is baked in the file
            // manager. And if we aren't using the file manager, we have to do jounaling
            // ourselves.
            bool objectWasChanged = false;
            BSONObj newObj;
            const char* source = NULL;
            bool inPlace = doc.getInPlaceUpdates(&damages, &source);
            if ( inPlace && !driver->modsAffectIndices() ) {
                // If a set of modifiers were all no-ops, we are still 'in place', but there is
                // no work to do, in which case we want to consider the object unchanged.
                if (!damages.empty() ) {
                    nsDetails->paddingFits();

                    // All updates were in place. Apply them via durability and writing pointer.
                    mutablebson::DamageVector::const_iterator where = damages.begin();
                    const mutablebson::DamageVector::const_iterator end = damages.end();
                    for( ; where != end; ++where ) {
                        const char* sourcePtr = source + where->sourceOffset;
                        void* targetPtr = getDur().writingPtr(
                            const_cast<char*>(oldObj.objdata()) + where->targetOffset,
                            where->size);
                        std::memcpy(targetPtr, sourcePtr, where->size);
                    }
                    objectWasChanged = true;
                    opDebug->fastmod = true;
                }
                newObj = oldObj;
            }
            else {

                // The updates were not in place. Apply them through the file manager.
                newObj = doc.getObject();
                DiskLoc newLoc = theDataFileMgr.updateRecord(nsString.ns().c_str(),
                                                             nsDetails,
                                                             nsDetailsTransient,
                                                             record,
                                                             loc,
                                                             newObj.objdata(),
                                                             newObj.objsize(),
                                                             *opDebug);

                // If we've moved this object to a new location, make sure we don't apply
                // that update again if our traversal picks the objecta again.
                //
                // We also take note that the diskloc if the updates are affecting indices.
                // Chances are that we're traversing one of them and they may be multi key and
                // therefore duplicate disklocs.
                if ( newLoc != loc || driver->modsAffectIndices()  ) {
                    seenLocs.insert( newLoc );
                }

                objectWasChanged = true;
            }

            // Log Obj
            if ( request.shouldUpdateOpLog() ) {
                if ( driver->isDocReplacement() || !logObj.isEmpty() ) {
                    BSONObj idQuery = driver->makeOplogEntryQuery(newObj, request.isMulti());
                    logOp("u", nsString.ns().c_str(), logObj , &idQuery,
                          NULL, request.isFromMigration(), &newObj);
                }
            }

            // If it was noop since the document didn't change, record that.
            if (!objectWasChanged)
                opDebug->nupdateNoops++;

            if (!request.isMulti()) {
                break;
            }

            // If we used the cursor mechanism that prepares an earlier seen document for a
            // write we need to tell such mechanisms that the write is over.
            if ( touchPreviousDoc ) {
                cursor->recoverFromTouchingEarlierIterate();
            }

            getDur().commitIfNeeded();

        }

        // TODO: Can this be simplified?
        if ((numMatched > 0) || (numMatched == 0 && !request.isUpsert()) ) {
            opDebug->nupdated = numMatched;
            return UpdateResult( numMatched > 0 /* updated existing object(s) */,
                                 !driver->isDocReplacement() /* $mod or obj replacement */,
                                 numMatched /* # of docments update, even no-ops */,
                                 BSONObj() );
        }

        //
        // We haven't found any existing document so an insert is done
        // (upsert is true).
        //
        opDebug->upsert = true;

        // Since this is an insert (no docs found and upsert:true), we will be logging it
        // as an insert in the oplog. We don't need the driver's help to build the
        // oplog record, then. We also set the context of the update driver to the INSERT_CONTEXT.
        // Some mods may only work in that context (e.g. $setOnInsert).
        driver->setLogOp( false );
        driver->setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT );

        BSONObj baseObj;

        // Reset the document we will be writing to
        doc.reset( baseObj, mutablebson::Document::kInPlaceDisabled );
        if ( request.getQuery().hasElement("_id") ) {
            uassertStatusOK(doc.root().appendElement(request.getQuery().getField("_id")));
        }


        // If this is a $mod base update, we need to generate a document by examining the
        // query and the mods. Otherwise, we can use the object replacement sent by the user
        // update command that was parsed by the driver before.
        // In the following block we handle the query part, and then do the regular mods after.
        if ( *request.getUpdates().firstElementFieldName() == '$' ) {
            uassertStatusOK(UpdateDriver::createFromQuery(request.getQuery(), doc));
            opDebug->fastmodinsert = true;
        }

        // Apply the update modifications and then log the update as an insert manually.
        Status status = driver->update( StringData(), &doc, NULL /* no oplog record */);
        if ( !status.isOK() ) {
            uasserted( 16836, status.reason() );
        }

        BSONObj newObj = doc.getObject();
        theDataFileMgr.insertWithObjMod( nsString.ns().c_str(), newObj, false, request.isGod() );
        if ( request.shouldUpdateOpLog() ) {
            logOp( "i", nsString.ns().c_str(), newObj,
                   NULL, NULL, request.isFromMigration(), &newObj );
        }

        opDebug->nupdated = 1;
        return UpdateResult( false /* updated a non existing document */,
                             !driver->isDocReplacement() /* $mod or obj replacement? */,
                             1 /* count of updated documents */,
                             newObj /* object that was upserted */ );
    }
Ejemplo n.º 22
0
    DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc , Writer& w ){
        LogIndentLevel lil;
        
        if ( eLoc.getOfs() <= 0 ){
            toolError() << "invalid extent ofs: " << eLoc.getOfs() << std::endl;
            return DiskLoc();
        }

        const ExtentManager& extentManager = db->getExtentManager();

        Extent* e = extentManager.getExtent( eLoc, false );
        if ( ! e->isOk() ){
            toolError() << "Extent not ok magic: " << e->magic << " going to try to continue"
                      << std::endl;
        }

        toolInfoLog() << "length:" << e->length << std::endl;
        
        LogIndentLevel lil2;
        
        set<DiskLoc> seen;

        DiskLoc loc = forward ? e->firstRecord : e->lastRecord;
        while ( ! loc.isNull() ){
            
            if ( ! seen.insert( loc ).second ) {
                toolError() << "infinite loop in extent, seen: " << loc << " before" << std::endl;
                break;
            }

            if ( loc.getOfs() <= 0 ){
                toolError() << "offset is 0 for record which should be impossible" << std::endl;
                break;
            }
            if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))) {
                toolInfoLog() << loc << std::endl;
            }
            BSONObj obj;
            try {
                obj = loc.obj();
                verify( obj.valid() );
                if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))) {
                    toolInfoLog() << obj << std::endl;
                }
                w( obj );
            }
            catch ( std::exception& e ) {
                toolError() << "found invalid document @ " << loc << " " << e.what() << std::endl;
                if ( ! obj.isEmpty() ) {
                    try {
                        BSONElement e = obj.firstElement();
                        stringstream ss;
                        ss << "first element: " << e;
                        toolError() << ss.str() << std::endl;
                    }
                    catch ( std::exception& ) {
                        toolError() << "unable to log invalid document @ " << loc << std::endl;
                    }
                }
            }
            loc = forward ?
                extentManager.getNextRecordInExtent( loc )
                : extentManager.getPrevRecordInExtent( loc );

            // break when new loc is outside current extent boundary
            if ( loc.isNull() ) {
                break;
            }
        }
        toolInfoLog() << "wrote " << seen.size() << " documents" << std::endl;
        return forward ? e->xnext : e->xprev;
    }
Ejemplo n.º 23
0
    Runner::RunnerState IDHackRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return Runner::RUNNER_DEAD; }
        if (_done) { return Runner::RUNNER_EOF; }

        // Use the index catalog to get the id index.
        const IndexCatalog* catalog = _collection->getIndexCatalog();

        // Find the index we use.
        IndexDescriptor* idDesc = catalog->findIdIndex();
        if (NULL == idDesc) {
            _done = true;
            return Runner::RUNNER_EOF;
        }

        // This may not be valid always.  See SERVER-12397.
        const BtreeBasedAccessMethod* accessMethod =
            static_cast<const BtreeBasedAccessMethod*>(catalog->getIndex(idDesc));

        // Look up the key by going directly to the Btree.
        DiskLoc loc = accessMethod->findSingle( _key );

        _done = true;

        // Key not found.
        if (loc.isNull()) {
            return Runner::RUNNER_EOF;
        }

        _nscanned++;

        // Set out parameters and note that we're done w/lookup.
        if (NULL == objOut) {
            // No object requested - nothing to do.
        }
        else if (hasIDProjection(_query.get())) {
            // Covered query on _id field only.
            // Set object to search key.
            // Search key is retrieved from the canonical query at
            // construction and always contains the _id field name.
            // It is possible to construct the ID hack runner with just the collection
            // and the key object (which could be {"": my_obj_id}) but _query would be null
            // in that case and the query would never be seen as covered.
            *objOut = _key.getOwned();
        }
        else {
            invariant(!hasIDProjection(_query.get()));

            // Fetch object from storage.
            Record* record = loc.rec();

            _nscannedObjects++;

            // If the record isn't in memory...
            if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) {
                // And we're allowed to yield ourselves...
                if (Runner::YIELD_AUTO == _policy) {
                    // Note what we're yielding to fetch so that we don't crash if the loc is
                    // deleted during a yield.
                    _locFetching = loc;
                    // Yield.  TODO: Do we want to bother yielding if micros < 0?
                    int micros = ClientCursor::suggestYieldMicros();
                    ClientCursor::staticYield(micros, "", record);
                    // This can happen when we're yielded for various reasons (e.g. db/idx dropped).
                    if (_killed) {
                        return Runner::RUNNER_DEAD;
                    }
                }
            }

            // Either the data was in memory or we paged it in.
            *objOut = loc.obj();

            // If we're sharded make sure the key belongs to us.  We need the object to do this.
            if (shardingState.needCollectionMetadata(_collection->ns().ns())) {
                CollectionMetadataPtr m = shardingState.getCollectionMetadata(_collection->ns().ns());
                if (m) {
                    KeyPattern kp(m->getKeyPattern());
                    if (!m->keyBelongsToMe( kp.extractSingleKey(*objOut))) {
                        // We have something with a matching _id but it doesn't belong to me.
                        return Runner::RUNNER_EOF;
                    }
                }
            }
        }

        // Return the DiskLoc if the caller wants it.
        if (NULL != dlOut) {
            *dlOut = loc;
        }

        return Runner::RUNNER_ADVANCED;
    }
Ejemplo n.º 24
0
    Runner::RunnerState IDHackRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return Runner::RUNNER_DEAD; }
        if (_done) { return Runner::RUNNER_EOF; }

        // Use the index catalog to get the id index.
        IndexCatalog* catalog = _collection->getIndexCatalog();

        // Find the index we use.
        const IndexDescriptor* idDesc = catalog->findIdIndex();
        if (NULL == idDesc) {
            _done = true;
            return Runner::RUNNER_EOF;
        }

        BtreeBasedAccessMethod* accessMethod = catalog->getBtreeBasedIndex( idDesc );

        BSONObj key = _query->getQueryObj()["_id"].wrap();

        // Look up the key by going directly to the Btree.
        DiskLoc loc = accessMethod->findSingle( key );

        _done = true;

        // Key not found.
        if (loc.isNull()) {
            return Runner::RUNNER_EOF;
        }

        // Set out parameters and note that we're done w/lookup.
        if (NULL != objOut) {
            Record* record = loc.rec();

            // If the record isn't in memory...
            if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) {
                // And we're allowed to yield ourselves...
                if (Runner::YIELD_AUTO == _policy) {
                    // Note what we're yielding to fetch so that we don't crash if the loc is
                    // deleted during a yield.
                    _locFetching = loc;
                    // Yield.  TODO: Do we want to bother yielding if micros < 0?
                    int micros = ClientCursor::suggestYieldMicros();
                    ClientCursor::staticYield(micros, "", record);
                    // This can happen when we're yielded for various reasons (e.g. db/idx dropped).
                    if (_killed) {
                        return Runner::RUNNER_DEAD;
                    }
                }
            }

            // Either the data was in memory or we paged it in.
            *objOut = loc.obj();

            // If we're sharded make sure the key belongs to us.  We need the object to do this.
            if (shardingState.needCollectionMetadata(_query->ns())) {
                CollectionMetadataPtr m = shardingState.getCollectionMetadata(_query->ns());
                if (m) {
                    KeyPattern kp(m->getKeyPattern());
                    if (!m->keyBelongsToMe( kp.extractSingleKey(*objOut))) {
                        // We have something with a matching _id but it doesn't belong to me.
                        return Runner::RUNNER_EOF;
                    }
                }
            }

            // If there is a projection...
            if (NULL != _query->getProj()) {
                // Create something to execute it.
                auto_ptr<ProjectionExec> projExec(new ProjectionExec(_query->getParsed().getProj(),
                                                                     _query->root()));
                projExec->transform(*objOut, objOut);
            }
        }

        // Return the DiskLoc if the caller wants it.
        if (NULL != dlOut) {
            *dlOut = loc;
        }

        return Runner::RUNNER_ADVANCED;
    }
Ejemplo n.º 25
0
    long long Helpers::removeRange( const string& ns ,
                                    const BSONObj& min ,
                                    const BSONObj& max ,
                                    const BSONObj& keyPattern ,
                                    bool maxInclusive ,
                                    bool secondaryThrottle ,
                                    RemoveCallback * callback,
                                    bool fromMigrate ) {
        
        Client& c = cc();

        long long numDeleted = 0;
        PageFaultRetryableSection pgrs;
        
        long long millisWaitingForReplication = 0;

        while ( 1 ) {
            try {

                Client::WriteContext ctx(ns);
                
                scoped_ptr<Cursor> c;
                
                {
                    NamespaceDetails* nsd = nsdetails( ns.c_str() );
                    if ( ! nsd )
                        break;
                    
                    int ii = nsd->findIndexByKeyPattern( keyPattern );
                    verify( ii >= 0 );
                    
                    IndexDetails& i = nsd->idx( ii );

                    // Extend min to get (min, MinKey, MinKey, ....)
                    BSONObj newMin = Helpers::modifiedRangeBound( min , keyPattern , -1 );
                    // If upper bound is included, extend max to get (max, MaxKey, MaxKey, ...)
                    // If not included, extend max to get (max, MinKey, MinKey, ....)
                    int minOrMax = maxInclusive ? 1 : -1;
                    BSONObj newMax = Helpers::modifiedRangeBound( max , keyPattern , minOrMax );
                    
                    c.reset( BtreeCursor::make( nsd , ii , i , newMin , newMax , maxInclusive , 1 ) );
                }
                
                if ( ! c->ok() ) {
                    // we're done
                    break;
                }
                
                DiskLoc rloc = c->currLoc();
                BSONObj obj = c->current();
                
                // this is so that we don't have to handle this cursor in the delete code
                c.reset(0);
                
                if ( callback )
                    callback->goingToDelete( obj );
                
                logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() , 0 , 0 , fromMigrate );
                theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc);
                numDeleted++;
            }
            catch( PageFaultException& e ) {
                e.touch();
                continue;
            }

            Timer secondaryThrottleTime;

            if ( secondaryThrottle ) {
                if ( ! waitForReplication( c.getLastOp(), 2, 60 /* seconds to wait */ ) ) {
                    warning() << "replication to secondaries for removeRange at least 60 seconds behind" << endl;
                }
                millisWaitingForReplication += secondaryThrottleTime.millis();
            }
            
            if ( ! Lock::isLocked() ) {
                int micros = ( 2 * Client::recommendedYieldMicros() ) - secondaryThrottleTime.micros();
                if ( micros > 0 ) {
                    LOG(1) << "Helpers::removeRangeUnlocked going to sleep for " << micros << " micros" << endl;
                    sleepmicros( micros );
                }
            }
                
        }
        
        if ( secondaryThrottle )
            log() << "Helpers::removeRangeUnlocked time spent waiting for replication: "  
                  << millisWaitingForReplication << "ms" << endl;
        
        return numDeleted;
    }
Ejemplo n.º 26
0
    UpdateResult _updateObjects( bool su,
                                 const char* ns,
                                 const BSONObj& updateobj,
                                 const BSONObj& patternOrig,
                                 bool upsert,
                                 bool multi,
                                 bool logop ,
                                 OpDebug& debug,
                                 RemoveSaver* rs,
                                 bool fromMigrate,
                                 const QueryPlanSelectionPolicy& planPolicy,
                                 bool forReplication ) {

        DEBUGUPDATE( "update: " << ns
                     << " update: " << updateobj
                     << " query: " << patternOrig
                     << " upsert: " << upsert << " multi: " << multi );

        Client& client = cc();

        debug.updateobj = updateobj;

        // The idea with these here it to make them loop invariant for
        // multi updates, and thus be a bit faster for that case.  The
        // pointers may be left invalid on a failed or terminal yield
        // recovery.
        NamespaceDetails* d = nsdetails(ns); // can be null if an upsert...
        NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get(ns);

        auto_ptr<ModSet> mods;
        bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$';
        int modsIsIndexed = false; // really the # of indexes
        if ( isOperatorUpdate ) {
            mods.reset( new ModSet(updateobj, nsdt->indexKeys(), forReplication) );
            modsIsIndexed = mods->maxNumIndexUpdated();
        }

        if( planPolicy.permitOptimalIdPlan() && !multi && isSimpleIdQuery(patternOrig) && d &&
           !modsIsIndexed ) {
            int idxNo = d->findIdIndex();
            if( idxNo >= 0 ) {
                debug.idhack = true;

                UpdateResult result = _updateById( isOperatorUpdate,
                                                   idxNo,
                                                   mods.get(),
                                                   d,
                                                   nsdt,
                                                   su,
                                                   ns,
                                                   updateobj,
                                                   patternOrig,
                                                   logop,
                                                   debug,
                                                   fromMigrate);
                if ( result.existing || ! upsert ) {
                    return result;
                }
                else if ( upsert && ! isOperatorUpdate ) {
                    // this handles repl inserts
                    checkNoMods( updateobj );
                    debug.upsert = true;
                    BSONObj no = updateobj;
                    theDataFileMgr.insertWithObjMod(ns, no, false, su);
                    if ( logop )
                        logOp( "i", ns, no, 0, 0, fromMigrate, &no );

                    return UpdateResult( 0 , 0 , 1 , no );
                }
            }
        }

        int numModded = 0;
        debug.nscanned = 0;
        shared_ptr<Cursor> c = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy );
        d = nsdetails(ns);
        nsdt = &NamespaceDetailsTransient::get(ns);
        bool autoDedup = c->autoDedup();

        if( c->ok() ) {
            set<DiskLoc> seenObjects;
            MatchDetails details;
            auto_ptr<ClientCursor> cc;
            do {

                if ( cc.get() == 0 &&
                     client.allowedToThrowPageFaultException() &&
                     ! c->currLoc().isNull() &&
                     ! c->currLoc().rec()->likelyInPhysicalMemory() ) {
                    throw PageFaultException( c->currLoc().rec() );
                }

                bool atomic = c->matcher() && c->matcher()->docMatcher().atomic();

                if ( ! atomic && debug.nscanned > 0 ) {
                    // we need to use a ClientCursor to yield
                    if ( cc.get() == 0 ) {
                        shared_ptr< Cursor > cPtr = c;
                        cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) );
                    }

                    bool didYield;
                    if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) {
                        cc.release();
                        break;
                    }
                    if ( !c->ok() ) {
                        break;
                    }

                    if ( didYield ) {
                        d = nsdetails(ns);
                        if ( ! d )
                            break;
                        nsdt = &NamespaceDetailsTransient::get(ns);
                        if ( mods.get() ) {
                            mods->setIndexedStatus( nsdt->indexKeys() );
                            modsIsIndexed = mods->maxNumIndexUpdated();
                        }

                    }

                } // end yielding block

                debug.nscanned++;

                if ( mods.get() && mods->hasDynamicArray() ) {
                    details.requestElemMatchKey();
                }

                if ( !c->currentMatches( &details ) ) {
                    c->advance();
                    continue;
                }

                Record* r = c->_current();
                DiskLoc loc = c->currLoc();

                if ( c->getsetdup( loc ) && autoDedup ) {
                    c->advance();
                    continue;
                }

                BSONObj js = BSONObj::make(r);

                BSONObj pattern = patternOrig;

                if ( logop ) {
                    BSONObjBuilder idPattern;
                    BSONElement id;
                    // NOTE: If the matching object lacks an id, we'll log
                    // with the original pattern.  This isn't replay-safe.
                    // It might make sense to suppress the log instead
                    // if there's no id.
                    if ( js.getObjectID( id ) ) {
                        idPattern.append( id );
                        pattern = idPattern.obj();
                    }
                    else {
                        uassert( 10157 ,  "multi-update requires all modified objects to have an _id" , ! multi );
                    }
                }

                /* look for $inc etc.  note as listed here, all fields to inc must be this type, you can't set some
                    regular ones at the moment. */
                if ( isOperatorUpdate ) {

                    if ( multi ) {
                        // go to next record in case this one moves
                        c->advance();

                        // Update operations are deduped for cursors that implement their own
                        // deduplication.  In particular, some geo cursors are excluded.
                        if ( autoDedup ) {

                            if ( seenObjects.count( loc ) ) {
                                continue;
                            }

                            // SERVER-5198 Advance past the document to be modified, provided
                            // deduplication is enabled, but see SERVER-5725.
                            while( c->ok() && loc == c->currLoc() ) {
                                c->advance();
                            }
                        }
                    }

                    const BSONObj& onDisk = loc.obj();

                    ModSet* useMods = mods.get();

                    auto_ptr<ModSet> mymodset;
                    if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) {
                        useMods = mods->fixDynamicArray( details.elemMatchKey() );
                        mymodset.reset( useMods );
                    }

                    auto_ptr<ModSetState> mss = useMods->prepare( onDisk,
                                                                  false /* not an insertion */ );

                    bool willAdvanceCursor = multi && c->ok() && ( modsIsIndexed || ! mss->canApplyInPlace() );

                    if ( willAdvanceCursor ) {
                        if ( cc.get() ) {
                            cc->setDoingDeletes( true );
                        }
                        c->prepareToTouchEarlierIterate();
                    }

                    // If we've made it this far, "ns" must contain a valid collection name, and so
                    // is of the form "db.collection".  Therefore, the following expression must
                    // always be valid.  "system.users" updates must never be done in place, in
                    // order to ensure that they are validated inside DataFileMgr::updateRecord(.).
                    bool isSystemUsersMod = (NamespaceString(ns).coll == "system.users");

                    BSONObj newObj;
                    if ( !mss->isUpdateIndexed() && mss->canApplyInPlace() && !isSystemUsersMod ) {
                        mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) );

                        DEBUGUPDATE( "\t\t\t doing in place update" );
                        if ( !multi )
                            debug.fastmod = true;

                        if ( modsIsIndexed ) {
                            seenObjects.insert( loc );
                        }
                        newObj = loc.obj();
                        d->paddingFits();
                    }
                    else {
                        newObj = mss->createNewFromMods();
                        checkTooLarge(newObj);
                        DiskLoc newLoc = theDataFileMgr.updateRecord(ns,
                                                                     d,
                                                                     nsdt,
                                                                     r,
                                                                     loc,
                                                                     newObj.objdata(),
                                                                     newObj.objsize(),
                                                                     debug);

                        if ( newLoc != loc || modsIsIndexed ){
                            // log() << "Moved obj " << newLoc.obj()["_id"] << " from " << loc << " to " << newLoc << endl;
                            // object moved, need to make sure we don' get again
                            seenObjects.insert( newLoc );
                        }

                    }

                    if ( logop ) {
                        DEV verify( mods->size() );
                        BSONObj logObj = mss->getOpLogRewrite();
                        DEBUGUPDATE( "\t rewrite update: " << logObj );

                        // It is possible that the entire mod set was a no-op over this
                        // document.  We would have an empty log record in that case. If we
                        // call logOp, with an empty record, that would be replicated as "clear
                        // this record", which is not what we want. Therefore, to get a no-op
                        // in the replica, we simply don't log.
                        if ( logObj.nFields() ) {
                            logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj );
                        }
                    }
                    numModded++;
                    if ( ! multi )
                        return UpdateResult( 1 , 1 , numModded , BSONObj() );
                    if ( willAdvanceCursor )
                        c->recoverFromTouchingEarlierIterate();

                    getDur().commitIfNeeded();

                    continue;
                }

                uassert( 10158 ,  "multi update only works with $ operators" , ! multi );

                BSONElementManipulator::lookForTimestamps( updateobj );
                checkNoMods( updateobj );
                theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, su);
                if ( logop ) {
                    DEV wassert( !su ); // super used doesn't get logged, this would be bad.
                    logOp("u", ns, updateobj, &pattern, 0, fromMigrate, &updateobj );
                }
                return UpdateResult( 1 , 0 , 1 , BSONObj() );
            } while ( c->ok() );
        } // endif

        if ( numModded )
            return UpdateResult( 1 , 1 , numModded , BSONObj() );

        if ( upsert ) {
            if ( updateobj.firstElementFieldName()[0] == '$' ) {
                // upsert of an $operation. build a default object
                BSONObj newObj = mods->createNewFromQuery( patternOrig );
                checkNoMods( newObj );
                debug.fastmodinsert = true;
                theDataFileMgr.insertWithObjMod(ns, newObj, false, su);
                if ( logop )
                    logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj );

                return UpdateResult( 0 , 1 , 1 , newObj );
            }
            uassert( 10159 ,  "multi update only works with $ operators" , ! multi );
            checkNoMods( updateobj );
            debug.upsert = true;
            BSONObj no = updateobj;
            theDataFileMgr.insertWithObjMod(ns, no, false, su);
            if ( logop )
                logOp( "i", ns, no, 0, 0, fromMigrate, &no );
            return UpdateResult( 0 , 0 , 1 , no );
        }

        return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() );
    }
Ejemplo n.º 27
0
 void got( const DiskLoc& loc ) {
     Point p( loc.obj().getFieldDotted( _geoField ) );
     if ( _near.distance( p ) > _maxDistance )
         return;
     _locs.push_back( loc );
 }
Ejemplo n.º 28
0
    UpdateResult _updateObjectsNEW( bool su,
                                    const char* ns,
                                    const BSONObj& updateobj,
                                    const BSONObj& patternOrig,
                                    bool upsert,
                                    bool multi,
                                    bool logop ,
                                    OpDebug& debug,
                                    RemoveSaver* rs,
                                    bool fromMigrate,
                                    const QueryPlanSelectionPolicy& planPolicy,
                                    bool forReplication ) {

        // TODO
        // + Separate UpdateParser from UpdateRunner (the latter should be "stage-y")
        //   + All the yield and deduplicate logic would move to the query stage
        //     portion of it
        //
        // + Replication related
        //   + fast path for update for query by _id
        //   + support for relaxing viable path constraint in replication
        //
        // + Field Management
        //   + Force all upsert to contain _id
        //   + Prevent changes to immutable fields (_id, and those mentioned by sharding)
        //
        // + Yiedling related
        //   + $atomic support (or better, support proper yielding if not)
        //   + page fault support

        debug.updateobj = updateobj;

        NamespaceDetails* d = nsdetails( ns );
        NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get( ns );

        // TODO: Put this logic someplace central and check based on constants (maybe using the
        // list of actually excluded config collections, and not global for the config db).
        NamespaceString nsStr( ns );

        // Should the modifiers validdate their embedded docs via okForStorage
        bool shouldValidate = true;

        // Config db docs shouldn't get checked for valid field names since the shard key can have
        // a dot (".") in it. Therefore we disable validation for storage.
        if ( nsStr.db() == "config" ) {
            LOG(0) << "disabling okForStorage on config db";
            shouldValidate = false;
        }

        UpdateDriver::Options opts;
        opts.multi = multi;
        opts.upsert = upsert;
        opts.logOp = logop;
        opts.modOptions = ModifierInterface::Options( forReplication, shouldValidate );
        UpdateDriver driver( opts );

        // TODO: This copies the index keys, but we may not actually need to.
        Status status = driver.parse( nsdt->indexKeys(), updateobj );
        if ( !status.isOK() ) {
            uasserted( 16840, status.reason() );
        }

        shared_ptr<Cursor> cursor = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy );

        // If the update was marked with '$isolated' (a.k.a '$atomic'), we are not allowed to
        // yield while evaluating the update loop below.
        //
        // TODO: Old code checks this repeatedly within the update loop. Is that necessary? It seems
        // that once atomic should be always atomic.
        const bool canYield =
            cursor->ok() &&
            cursor->matcher() &&
            cursor->matcher()->docMatcher().atomic();

        // The 'cursor' the optimizer gave us may contain query plans that generate duplicate
        // diskloc's. We set up here the mechanims that will prevent us from processing those
        // twice if we see them. We also set up a 'ClientCursor' so that we can support
        // yielding.
        //
        // TODO: Is it valid to call this on a non-ok cursor?
        const bool dedupHere = cursor->autoDedup();

        //
        // We'll start assuming we have one or more documents for this update. (Othwerwise,
        // we'll fallback to upserting.)
        //

        // We record that this will not be an upsert, in case a mod doesn't want to be applied
        // when in strict update mode.
        driver.setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT );

        // Let's fetch each of them and pipe them through the update expression, making sure to
        // keep track of the necessary stats. Recall that we'll be pulling documents out of
        // cursors and some of them do not deduplicate the entries they generate. We have
        // deduping logic in here, too -- for now.
        unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs;
        int numUpdated = 0;
        debug.nscanned = 0;

        Client& client = cc();

        mutablebson::Document doc;

        // If we are going to be yielding, we will need a ClientCursor scoped to this loop. We
        // only loop as long as the underlying cursor is OK.
        for ( auto_ptr<ClientCursor> clientCursor; cursor->ok(); ) {

            // If we haven't constructed a ClientCursor, and if the client allows us to throw
            // page faults, and if we are referring to a location that is likely not in
            // physical memory, then throw a PageFaultException. The entire operation will be
            // restarted.
            if ( clientCursor.get() == NULL &&
                 client.allowedToThrowPageFaultException() &&
                 !cursor->currLoc().isNull() &&
                 !cursor->currLoc().rec()->likelyInPhysicalMemory() ) {
                // We should never throw a PFE if we have already updated items.
                dassert(numUpdated == 0);
                throw PageFaultException( cursor->currLoc().rec() );
            }

            if ( !canYield && debug.nscanned != 0 ) {

                // We are permitted to yield. To do so we need a ClientCursor, so create one
                // now if we have not yet done so.
                if ( !clientCursor.get() )
                    clientCursor.reset(
                        new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) );

                // Ask the client cursor to yield. We get two bits of state back: whether or not
                // we yielded, and whether or not we correctly recovered from yielding.
                bool yielded = false;
                const bool recovered = clientCursor->yieldSometimes(
                    ClientCursor::WillNeed, &yielded );

                // If we couldn't recover from the yield, or if the cursor died while we were
                // yielded, get out of the update loop right away. We don't need to reset
                // 'clientCursor' since we are leaving the scope.
                if ( !recovered || !cursor->ok() )
                    break;

                if ( yielded ) {
                    // Details about our namespace may have changed while we were yielded, so
                    // we re-acquire them here. If we can't do so, escape the update
                    // loop. Otherwise, refresh the driver so that it knows about what is
                    // currently indexed.
                    d = nsdetails( ns );
                    if ( !d )
                        break;
                    nsdt = &NamespaceDetailsTransient::get( ns );

                    // TODO: This copies the index keys, but it may not need to do so.
                    driver.refreshIndexKeys( nsdt->indexKeys() );
                }

            }

            // Let's fetch the next candidate object for this update.
            Record* r = cursor->_current();
            DiskLoc loc = cursor->currLoc();
            const BSONObj oldObj = loc.obj();

            // We count how many documents we scanned even though we may skip those that are
            // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for
            // that reason.
            debug.nscanned++;

            // Skips this document if it:
            // a) doesn't match the query portion of the update
            // b) was deemed duplicate by the underlying cursor machinery
            //
            // Now, if we are going to update the document,
            // c) we don't want to do so while the cursor is at it, as that may invalidate
            // the cursor. So, we advance to next document, before issuing the update.
            MatchDetails matchDetails;
            matchDetails.requestElemMatchKey();
            if ( !cursor->currentMatches( &matchDetails ) ) {
                // a)
                cursor->advance();
                continue;
            }
            else if ( cursor->getsetdup( loc ) && dedupHere ) {
                // b)
                cursor->advance();
                continue;
            }
            else if (driver.dollarModMode() && multi) {
                // c)
                cursor->advance();
                if ( dedupHere ) {
                    if ( seenLocs.count( loc ) ) {
                        continue;
                    }
                }

                // There are certain kind of cursors that hold multiple pointers to data
                // underneath. $or cursors is one example. In a $or cursor, it may be the case
                // that when we did the last advance(), we finished consuming documents from
                // one of $or child and started consuming the next one. In that case, it is
                // possible that the last document of the previous child is the same as the
                // first document of the next (see SERVER-5198 and jstests/orp.js).
                //
                // So we advance the cursor here until we see a new diskloc.
                //
                // Note that we won't be yielding, and we may not do so for a while if we find
                // a particularly duplicated sequence of loc's. That is highly unlikely,
                // though.  (See SERVER-5725, if curious, but "stage" based $or will make that
                // ticket moot).
                while( cursor->ok() && loc == cursor->currLoc() ) {
                    cursor->advance();
                }
            }

            // For some (unfortunate) historical reasons, not all cursors would be valid after
            // a write simply because we advanced them to a document not affected by the write.
            // To protect in those cases, not only we engaged in the advance() logic above, but
            // we also tell the cursor we're about to write a document that we've just seen.
            // prepareToTouchEarlierIterate() requires calling later
            // recoverFromTouchingEarlierIterate(), so we make a note here to do so.
            bool touchPreviousDoc = multi && cursor->ok();
            if ( touchPreviousDoc ) {
                if ( clientCursor.get() )
                    clientCursor->setDoingDeletes( true );
                cursor->prepareToTouchEarlierIterate();
            }

            // Ask the driver to apply the mods. It may be that the driver can apply those "in
            // place", that is, some values of the old document just get adjusted without any
            // change to the binary layout on the bson layer. It may be that a whole new
            // document is needed to accomodate the new bson layout of the resulting document.
            doc.reset( oldObj, mutablebson::Document::kInPlaceEnabled );
            BSONObj logObj;
            StringData matchedField = matchDetails.hasElemMatchKey() ?
                                                    matchDetails.elemMatchKey():
                                                    StringData();
            status = driver.update( matchedField, &doc, &logObj );
            if ( !status.isOK() ) {
                uasserted( 16837, status.reason() );
            }

            // If the driver applied the mods in place, we can ask the mutable for what
            // changed. We call those changes "damages". :) We use the damages to inform the
            // journal what was changed, and then apply them to the original document
            // ourselves. If, however, the driver applied the mods out of place, we ask it to
            // generate a new, modified document for us. In that case, the file manager will
            // take care of the journaling details for us.
            //
            // This code flow is admittedly odd. But, right now, journaling is baked in the file
            // manager. And if we aren't using the file manager, we have to do jounaling
            // ourselves.
            bool objectWasChanged = false;
            BSONObj newObj;
            const char* source = NULL;
            mutablebson::DamageVector damages;
            bool inPlace = doc.getInPlaceUpdates(&damages, &source);
            if ( inPlace && !damages.empty() && !driver.modsAffectIndices() ) {
                d->paddingFits();

                // All updates were in place. Apply them via durability and writing pointer.
                mutablebson::DamageVector::const_iterator where = damages.begin();
                const mutablebson::DamageVector::const_iterator end = damages.end();
                for( ; where != end; ++where ) {
                    const char* sourcePtr = source + where->sourceOffset;
                    void* targetPtr = getDur().writingPtr(
                        const_cast<char*>(oldObj.objdata()) + where->targetOffset,
                        where->size);
                    std::memcpy(targetPtr, sourcePtr, where->size);
                }
                newObj = oldObj;
                debug.fastmod = true;

                objectWasChanged = true;
            }
            else {

                // The updates were not in place. Apply them through the file manager.
                newObj = doc.getObject();
                DiskLoc newLoc = theDataFileMgr.updateRecord(ns,
                                                             d,
                                                             nsdt,
                                                             r,
                                                             loc,
                                                             newObj.objdata(),
                                                             newObj.objsize(),
                                                             debug);

                // If we've moved this object to a new location, make sure we don't apply
                // that update again if our traversal picks the objecta again.
                //
                // We also take note that the diskloc if the updates are affecting indices.
                // Chances are that we're traversing one of them and they may be multi key and
                // therefore duplicate disklocs.
                if ( newLoc != loc || driver.modsAffectIndices()  ) {
                    seenLocs.insert( newLoc );
                }

                objectWasChanged = true;
            }

            // Log Obj
            if ( logop ) {
                if ( !logObj.isEmpty() ) {
                    BSONObj idQuery = driver.makeOplogEntryQuery(newObj, multi);
                    logOp("u", ns, logObj , &idQuery, 0, fromMigrate, &newObj);
                }
            }

            // If we applied any in-place updates, or asked the DataFileMgr to write for us,
            // then count this as an update.
            if (objectWasChanged)
                numUpdated++;

            if (!multi) {
                break;
            }

            // If we used the cursor mechanism that prepares an earlier seen document for a
            // write we need to tell such mechanisms that the write is over.
            if ( touchPreviousDoc ) {
                cursor->recoverFromTouchingEarlierIterate();
            }

            getDur().commitIfNeeded();

        }

        if (numUpdated > 0) {
            return UpdateResult( true /* updated existing object(s) */,
                                 driver.dollarModMode() /* $mod or obj replacement */,
                                 numUpdated /* # of docments update */,
                                 BSONObj() );
        }
        else if (numUpdated == 0 && !upsert) {
            return UpdateResult( false /* no object updated */,
                                 driver.dollarModMode() /* $mod or obj replacement */,
                                 0 /* no updates */,
                                 BSONObj() );
        }

        //
        // We haven't succeeded updating any existing document but upserts are allowed.
        //

        // If this is a $mod base update, we need to generate a document by examining the
        // query and the mods. Otherwise, we can use the object replacement sent by the user
        // update command that was parsed by the driver before.
        BSONObj oldObj;
        if ( *updateobj.firstElementFieldName() == '$' ) {
            if ( !driver.createFromQuery( patternOrig, &oldObj ) ) {
                uasserted( 16835, "cannot create object to update" );
            }
            debug.fastmodinsert = true;
        }
        else {
            // Copy the _id
            if (patternOrig.hasElement("_id")) {
                oldObj = patternOrig.getField("_id").wrap();
            }
            debug.upsert = true;
        }

        // Since this is an upsert, we will be oplogging it as an insert. We don't
        // need the driver's help to build the oplog record, then. We also set the
        // context of the update driver to an "upsert". Some mods may only work in that
        // context (e.g. $setOnInsert).
        driver.setLogOp( false );
        driver.setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT );

        doc.reset( oldObj, mutablebson::Document::kInPlaceDisabled );
        status = driver.update( StringData(), &doc, NULL /* no oplog record */);
        if ( !status.isOK() ) {
            uasserted( 16836, status.reason() );
        }
        BSONObj newObj = doc.getObject();

        theDataFileMgr.insertWithObjMod( ns, newObj, false, su );

        if ( logop ) {
            logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj );
        }

        return UpdateResult( false /* updated a non existing document */,
                             driver.dollarModMode() /* $mod or obj replacement? */,
                             1 /* count of updated documents */,
                             newObj /* object that was upserted */ );
    }
Ejemplo n.º 29
0
    UpdateResult _updateObjectsNEW( bool su,
                                    const char* ns,
                                    const BSONObj& updateobj,
                                    const BSONObj& patternOrig,
                                    bool upsert,
                                    bool multi,
                                    bool logop ,
                                    OpDebug& debug,
                                    RemoveSaver* rs,
                                    bool fromMigrate,
                                    const QueryPlanSelectionPolicy& planPolicy,
                                    bool forReplication ) {

        // TODO
        // + Separate UpdateParser from UpdateRunner (the latter should be "stage-y")
        //   + All the yield and deduplicate logic would move to the query stage
        //     portion of it
        //
        // + Replication related
        //   + fast path for update for query by _id
        //   + support for relaxing viable path constraint in replication
        //
        // + Field Management
        //   + Force all upsert to contain _id
        //   + Prevent changes to immutable fields (_id, and those mentioned by sharding)
        //
        // + Yiedling related
        //   + $atomic support (or better, support proper yielding if not)
        //   + page fault support

        debug.updateobj = updateobj;

        NamespaceDetails* d = nsdetails( ns );
        NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get( ns );

        UpdateDriver::Options opts;
        opts.multi = multi;
        opts.upsert = upsert;
        opts.logOp = logop;
        UpdateDriver driver( opts );
        Status status = driver.parse( nsdt->indexKeys(), updateobj );
        if ( !status.isOK() ) {
            uasserted( 16840, status.reason() );
        }

        shared_ptr<Cursor> cursor = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy );

        // The 'cursor' the optimizer gave us may contain query plans that generate duplicate
        // diskloc's. We set up here the mechanims that will prevent us from processing those
        // twice if we see them. We also set up a 'ClientCursor' so that we can support
        // yielding.
        const bool dedupHere = cursor->autoDedup();
        shared_ptr<Cursor> cPtr = cursor;
        auto_ptr<ClientCursor> clientCursor( new ClientCursor( QueryOption_NoCursorTimeout,
                                                               cPtr,
                                                               ns ) );

        //
        // Upsert Logic
        //

        // We may or may not have documents for this update. If we don't, then try to upsert,
        // if allowed.
        if ( !cursor->ok() && upsert ) {

            // If this is a $mod base update, we need to generate a document by examining the
            // query and the mods. Otherwise, we can use the object replacement sent by the user
            // update command that was parsed by the driver before.
            BSONObj oldObj;
            if ( *updateobj.firstElementFieldName() == '$' ) {
                if ( !driver.createFromQuery( patternOrig, &oldObj ) ) {
                    uasserted( 16835, "cannot create object to update" );
                }
                debug.fastmodinsert = true;
            }
            else {
                debug.upsert = true;
            }

            // Since this is an upsert, we will be oplogging it as an insert. We don't
            // need the driver's help to build the oplog record, then. We also set the
            // context of the update driver to an "upsert". Some mods may only work in that
            // context (e.g. $setOnInsert).
            driver.setLogOp( false );
            driver.setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT );

            mutablebson::Document doc( oldObj, mutablebson::Document::kInPlaceDisabled );
            status = driver.update( StringData(), &doc, NULL /* no oplog record */);
            if ( !status.isOK() ) {
                uasserted( 16836, status.reason() );
            }
            BSONObj newObj = doc.getObject();

            theDataFileMgr.insertWithObjMod( ns, newObj, false, su );

            if ( logop ) {
                logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj );
            }

            return UpdateResult( false /* updated a non existing document */,
                                 driver.dollarModMode() /* $mod or obj replacement? */,
                                 1 /* count of updated documents */,
                                 newObj /* object that was upserted */ );
        }

        //
        // We have one or more documents for this update.
        //

        // We record that this will not be an upsert, in case a mod doesn't want to be applied
        // when in strict update mode.
        driver.setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT );

        // Let's fetch each of them and pipe them through the update expression, making sure to
        // keep track of the necessary stats. Recall that we'll be pulling documents out of
        // cursors and some of them do not deduplicate the entries they generate. We have
        // deduping logic in here, too -- for now.
        unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs;
        int numUpdated = 0;
        debug.nscanned = 0;
        while ( cursor->ok() ) {

            // Let's fetch the next candidate object for this update.
            Record* r = cursor->_current();
            DiskLoc loc = cursor->currLoc();
            const BSONObj oldObj = loc.obj();

            // We count how many documents we scanned even though we may skip those that are
            // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for
            // that reason.
            debug.nscanned++;

            // Skips this document if it:
            // a) doesn't match the query portion of the update
            // b) was deemed duplicate by the underlying cursor machinery
            //
            // Now, if we are going to update the document,
            // c) we don't want to do so while the cursor is at it, as that may invalidate
            // the cursor. So, we advance to next document, before issuing the update.
            MatchDetails matchDetails;
            matchDetails.requestElemMatchKey();
            if ( !cursor->currentMatches( &matchDetails ) ) {
                // a)
                cursor->advance();
                continue;
            }
            else if ( cursor->getsetdup( loc ) && dedupHere ) {
                // b)
                cursor->advance();
                continue;
            }
            else if (driver.dollarModMode() && multi) {
                // c)
                cursor->advance();
                if ( dedupHere ) {
                    if ( seenLocs.count( loc ) ) {
                        continue;
                    }
                }

                // There are certain kind of cursors that hold multiple pointers to data
                // underneath. $or cursors is one example. In a $or cursor, it may be the case
                // that when we did the last advance(), we finished consuming documents from
                // one of $or child and started consuming the next one. In that case, it is
                // possible that the last document of the previous child is the same as the
                // first document of the next (see SERVER-5198 and jstests/orp.js).
                //
                // So we advance the cursor here until we see a new diskloc.
                //
                // Note that we won't be yielding, and we may not do so for a while if we find
                // a particularly duplicated sequence of loc's. That is highly unlikely,
                // though.  (See SERVER-5725, if curious, but "stage" based $or will make that
                // ticket moot).
                while( cursor->ok() && loc == cursor->currLoc() ) {
                    cursor->advance();
                }
            }

            // For some (unfortunate) historical reasons, not all cursors would be valid after
            // a write simply because we advanced them to a document not affected by the write.
            // To protect in those cases, not only we engaged in the advance() logic above, but
            // we also tell the cursor we're about to write a document that we've just seen.
            // prepareToTouchEarlierIterate() requires calling later
            // recoverFromTouchingEarlierIterate(), so we make a note here to do so.
            bool touchPreviousDoc = multi && cursor->ok();
            if ( touchPreviousDoc  ) {
                clientCursor->setDoingDeletes( true );
                cursor->prepareToTouchEarlierIterate();
            }

            // Ask the driver to apply the mods. It may be that the driver can apply those "in
            // place", that is, some values of the old document just get adjusted without any
            // change to the binary layout on the bson layer. It may be that a whole new
            // document is needed to accomodate the new bson layout of the resulting document.
            mutablebson::Document doc( oldObj, mutablebson::Document::kInPlaceEnabled );
            BSONObj logObj;
            StringData matchedField = matchDetails.hasElemMatchKey() ?
                                                    matchDetails.elemMatchKey():
                                                    StringData();
            status = driver.update( matchedField, &doc, &logObj );
            if ( !status.isOK() ) {
                uasserted( 16837, status.reason() );
            }

            // If the driver applied the mods in place, we can ask the mutable for what
            // changed. We call those changes "damages". :) We use the damages to inform the
            // journal what was changed, and then apply them to the original document
            // ourselves. If, however, the driver applied the mods out of place, we ask it to
            // generate a new, modified document for us. In that case, the file manager will
            // take care of the journaling details for us.
            //
            // This code flow is admittedly odd. But, right now, journaling is baked in the file
            // manager. And if we aren't using the file manager, we have to do jounaling
            // ourselves.
            BSONObj newObj;
            const char* source = NULL;
            mutablebson::DamageVector damages;
            bool inPlace = doc.getInPlaceUpdates(&damages, &source);
            if ( inPlace && !driver.modsAffectIndices() ) {

                // All updates were in place. Apply them via durability and writing pointer.
                mutablebson::DamageVector::const_iterator where = damages.begin();
                const mutablebson::DamageVector::const_iterator end = damages.end();
                for( ; where != end; ++where ) {
                    const char* sourcePtr = source + where->sourceOffset;
                    void* targetPtr = getDur().writingPtr(
                        const_cast<char*>(oldObj.objdata()) + where->targetOffset,
                        where->size);
                    std::memcpy(targetPtr, sourcePtr, where->size);
                }
                newObj = oldObj;
                debug.fastmod = true;
            }
            else {

                // The updates were not in place. Apply them through the file manager.
                newObj = doc.getObject();
                DiskLoc newLoc = theDataFileMgr.updateRecord(ns,
                                                             d,
                                                             nsdt,
                                                             r,
                                                             loc,
                                                             newObj.objdata(),
                                                             newObj.objsize(),
                                                             debug);

                // If we've moved this object to a new location, make sure we don't apply
                // that update again if our traversal picks the objecta again.
                //
                // We also take note that the diskloc if the updates are affecting indices.
                // Chances are that we're traversing one of them and they may be multi key and
                // therefore duplicate disklocs.
                if ( newLoc != loc || driver.modsAffectIndices()  ) {
                    seenLocs.insert( newLoc );
                }
            }

            // Log Obj
            if ( logop ) {
                if ( !logObj.isEmpty() ) {
                    BSONObj pattern = patternOrig;
                    logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj );
                }
            }

            // One more document updated.
            numUpdated++;

            if (!multi) {
                break;
            }

            // If we used the cursor mechanism that prepares an earlier seen document for a
            // write we need to tell such mechanisms that the write is over.
            if ( touchPreviousDoc ) {
                cursor->recoverFromTouchingEarlierIterate();
            }

            getDur().commitIfNeeded();

        }

        return UpdateResult( true /* updated existing object(s) */,
                             driver.dollarModMode() /* $mod or obj replacement */,
                             numUpdated /* # of docments update */,
                             BSONObj() );
    }
Ejemplo n.º 30
0
    /* note: this is only (as-is) called for

             - not multi
             - not mods is indexed
             - not upsert
    */
    static UpdateResult _updateById(bool isOperatorUpdate,
                                    int idIdxNo,
                                    ModSet* mods,
                                    int profile,
                                    NamespaceDetails* d,
                                    NamespaceDetailsTransient *nsdt,
                                    bool su,
                                    const char* ns,
                                    const BSONObj& updateobj,
                                    BSONObj patternOrig,
                                    bool logop,
                                    OpDebug& debug,
                                    bool fromMigrate = false) {

        DiskLoc loc;
        {
            IndexDetails& i = d->idx(idIdxNo);
            BSONObj key = i.getKeyFromQuery( patternOrig );
            loc = i.idxInterface().findSingle(i, i.head, key);
            if( loc.isNull() ) {
                // no upsert support in _updateById yet, so we are done.
                return UpdateResult( 0 , 0 , 0 , BSONObj() );
            }
        }
        Record* r = loc.rec();

        if ( cc().allowedToThrowPageFaultException() && ! r->likelyInPhysicalMemory() ) {
            throw PageFaultException( r );
        }

        /* look for $inc etc.  note as listed here, all fields to inc must be this type, you can't set some
           regular ones at the moment. */
        if ( isOperatorUpdate ) {
            const BSONObj& onDisk = loc.obj();
            auto_ptr<ModSetState> mss = mods->prepare( onDisk );

            if( mss->canApplyInPlace() ) {
                mss->applyModsInPlace(true);
                DEBUGUPDATE( "\t\t\t updateById doing in place update" );
            }
            else {
                BSONObj newObj = mss->createNewFromMods();
                checkTooLarge(newObj);
                verify(nsdt);
                theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug);
            }

            if ( logop ) {
                DEV verify( mods->size() );

                BSONObj pattern = patternOrig;
                if ( mss->haveArrayDepMod() ) {
                    BSONObjBuilder patternBuilder;
                    patternBuilder.appendElements( pattern );
                    mss->appendSizeSpecForArrayDepMods( patternBuilder );
                    pattern = patternBuilder.obj();
                }

                if( mss->needOpLogRewrite() ) {
                    DEBUGUPDATE( "\t rewrite update: " << mss->getOpLogRewrite() );
                    logOp("u", ns, mss->getOpLogRewrite() ,
                          &pattern, 0, fromMigrate );
                }
                else {
                    logOp("u", ns, updateobj, &pattern, 0, fromMigrate );
                }
            }
            return UpdateResult( 1 , 1 , 1 , BSONObj() );
        } // end $operator update

        // regular update
        BSONElementManipulator::lookForTimestamps( updateobj );
        checkNoMods( updateobj );
        verify(nsdt);
        theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug );
        if ( logop ) {
            logOp("u", ns, updateobj, &patternOrig, 0, fromMigrate );
        }
        return UpdateResult( 1 , 0 , 1 , BSONObj() );
    }