Пример #1
0
    long long runCount( const char *ns, const BSONObj &cmd, string &err, int &errCode ) {
        Collection *cl = getCollection( ns );
        if (cl == NULL) {
            err = "ns missing";
            return -1;
        }
        BSONObj query = cmd.getObjectField("query");

        long long count = 0;
        long long skip = cmd["skip"].numberLong();
        long long limit = cmd["limit"].numberLong();

        if ( limit < 0 ) {
            limit  = -limit;
        }

        Client::WithOpSettings wos(OpSettings().setQueryCursorMode(DEFAULT_LOCK_CURSOR).setBulkFetch(true));

        Lock::assertAtLeastReadLocked(ns);
        try {
            for (shared_ptr<Cursor> cursor = getOptimizedCursor( ns, query, BSONObj(), _countPlanPolicies );
                 cursor->ok() ; cursor->advance() ) {
                if ( cursor->currentMatches() && !cursor->getsetdup( cursor->currPK() ) ) {
                    if ( skip > 0 ) {
                        --skip;
                    }
                    else {
                        ++count;
                        if ( limit > 0 && count >= limit ) {
                            break;
                        }
                    }
                }
            }
            return count;
        }
        catch ( const DBException &e ) {
            err = e.toString();
            errCode = e.getCode();
            count = -2;
        }
        catch ( const std::exception &e ) {
            err = e.what();
            errCode = 0;
            count = -2;
        }
        if ( count == -2 ) {
            // Historically we have returned zero in many count assertion cases - see SERVER-2291.
            log() << "Count with ns: " << ns << " and query: " << query
                  << " failed with exception: " << err << " code: " << errCode
                  << endl;
        }
        return count;
    }
Пример #2
0
 /* fetch a single object from collection ns that matches query
    set your db SavedContext first
 */
 DiskLoc Helpers::findOne(const StringData& ns, const BSONObj &query, bool requireIndex) {
     shared_ptr<Cursor> c =
         getOptimizedCursor( ns,
                             query,
                             BSONObj(),
                             requireIndex ?
                                 QueryPlanSelectionPolicy::indexOnly() :
                                 QueryPlanSelectionPolicy::any() );
     while( c->ok() ) {
         if ( c->currentMatches() && !c->getsetdup( c->currLoc() ) ) {
             return c->currLoc();
         }
         c->advance();
     }
     return DiskLoc();
 }
Пример #3
0
    vector<BSONObj> Helpers::findAll( const string& ns , const BSONObj& query ) {
        Lock::assertAtLeastReadLocked( ns );

        vector<BSONObj> all;

        Client::Context tx( ns );
        
        shared_ptr<Cursor> c = getOptimizedCursor( ns.c_str(), query );

        while( c->ok() ) {
            if ( c->currentMatches() && !c->getsetdup( c->currLoc() ) ) {
                all.push_back( c->current() );
            }
            c->advance();
        }

        return all;
    }
Пример #4
0
            void run() {
                client().insert( ns(), fromjson( "{ a:[ {}, { b:1 } ] }" ) );
                
                Client::Transaction transaction(DB_SERIALIZABLE);
                Client::ReadContext context( ns(), mongo::unittest::EMPTY_STRING );

                CoveredIndexMatcher matcher( BSON( "a.b" << 1 ), BSON( "$natural" << 1 ) );
                MatchDetails details;
                details.requestElemMatchKey();
                boost::shared_ptr<Cursor> cursor = getOptimizedCursor( ns(), BSONObj() );
                // Verify that the cursor is unindexed.
                ASSERT_EQUALS( "BasicCursor", cursor->toString() );
                ASSERT( matcher.matchesCurrent( cursor.get(), &details ) );
                // The '1' entry of the 'a' array is matched.
                ASSERT( details.hasElemMatchKey() );
                ASSERT_EQUALS( string( "1" ), details.elemMatchKey() );
                transaction.commit();
            }
Пример #5
0
 void run() {
     client().ensureIndex( ns(), BSON( "a.b" << 1 ) );
     client().insert( ns(), fromjson( "{ a:[ { b:1 } ] }" ) );
     
     Client::Transaction transaction(DB_SERIALIZABLE);
     Client::ReadContext context( ns(), mongo::unittest::EMPTY_STRING );
     
     BSONObj query = BSON( "a.b" << 1 );
     CoveredIndexMatcher matcher( query, BSON( "a.b" << 1 ) );
     MatchDetails details;
     details.requestElemMatchKey();
     boost::shared_ptr<Cursor> cursor = getOptimizedCursor( ns(), query );
     // Verify that the cursor is indexed.
     ASSERT_EQUALS( "IndexCursor a.b_1", cursor->toString() );
     // Verify that the cursor is not multikey.
     ASSERT( !cursor->isMultiKey() );
     ASSERT( matcher.matchesCurrent( cursor.get(), &details ) );
     // The '0' entry of the 'a' array is matched.
     ASSERT( details.hasElemMatchKey() );
     ASSERT_EQUALS( string( "0" ), details.elemMatchKey() );
     transaction.commit();
 }
Пример #6
0
        bool group( const std::string& realdbname,
                    const std::string& ns,
                    const BSONObj& query,
                    BSONObj keyPattern,
                    const std::string& keyFunctionCode,
                    const std::string& reduceCode,
                    const char * reduceScope,
                    BSONObj initial,
                    const std::string& finalize,
                    string& errmsg,
                    BSONObjBuilder& result ) {

            auto_ptr<Scope> s = globalScriptEngine->getPooledScope( realdbname, "group");

            if ( reduceScope )
                s->init( reduceScope );

            s->setObject( "$initial" , initial , true );

            s->exec( "$reduce = " + reduceCode , "$group reduce setup" , false , true , true , 100 );
            s->exec( "$arr = [];" , "$group reduce setup 2" , false , true , true , 100 );
            ScriptingFunction f = s->createFunction(
                                      "function(){ "
                                      "  if ( $arr[n] == null ){ "
                                      "    next = {}; "
                                      "    Object.extend( next , $key ); "
                                      "    Object.extend( next , $initial , true ); "
                                      "    $arr[n] = next; "
                                      "    next = null; "
                                      "  } "
                                      "  $reduce( obj , $arr[n] ); "
                                      "}" );

            ScriptingFunction keyFunction = 0;
            if ( keyFunctionCode.size() ) {
                keyFunction = s->createFunction( keyFunctionCode.c_str() );
            }


            double keysize = keyPattern.objsize() * 3;
            double keynum = 1;

            map<BSONObj,int,BSONObjCmp> map;
            list<BSONObj> blah;

            shared_ptr<Cursor> cursor = getOptimizedCursor(ns.c_str() , query);
            ClientCursor::Holder ccPointer( new ClientCursor( QueryOption_NoCursorTimeout, cursor,
                                                             ns ) );

            while ( cursor->ok() ) {
                
                if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered ) ||
                    !cursor->ok() ) {
                    break;
                }
                
                if ( !cursor->currentMatches() || cursor->getsetdup( cursor->currLoc() ) ) {
                    cursor->advance();
                    continue;
                }

                if ( !ccPointer->yieldSometimes( ClientCursor::WillNeed ) ||
                    !cursor->ok() ) {
                    break;
                }
                
                BSONObj obj = cursor->current();
                cursor->advance();

                BSONObj key = getKey( obj , keyPattern , keyFunction , keysize / keynum , s.get() );
                keysize += key.objsize();
                keynum++;

                int& n = map[key];
                if ( n == 0 ) {
                    n = map.size();
                    s->setObject( "$key" , key , true );

                    uassert( 10043 ,  "group() can't handle more than 20000 unique keys" , n <= 20000 );
                }

                s->setObject( "obj" , obj , true );
                s->setNumber( "n" , n - 1 );
                if ( s->invoke( f , 0, 0 , 0 , true ) ) {
                    throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() );
                }
            }
            ccPointer.reset();

            if (!finalize.empty()) {
                s->exec( "$finalize = " + finalize , "$group finalize define" , false , true , true , 100 );
                ScriptingFunction g = s->createFunction(
                                          "function(){ "
                                          "  for(var i=0; i < $arr.length; i++){ "
                                          "  var ret = $finalize($arr[i]); "
                                          "  if (ret !== undefined) "
                                          "    $arr[i] = ret; "
                                          "  } "
                                          "}" );
                s->invoke( g , 0, 0 , 0 , true );
            }

            result.appendArray( "retval" , s->getObject( "$arr" ) );
            result.append( "count" , keynum - 1 );
            result.append( "keys" , (int)(map.size()) );
            s->exec( "$arr = [];" , "$group reduce setup 2" , false , true , true , 100 );
            s->gc();

            return true;
        }
Пример #7
0
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
            Timer t;
            string ns = dbname + '.' + cmdObj.firstElement().valuestr();

            string key = cmdObj["key"].valuestrsafe();
            BSONObj keyPattern = BSON( key << 1 );

            BSONObj query = getQuery( cmdObj );

            int bufSize = BSONObjMaxUserSize - 4096;
            BufBuilder bb( bufSize );
            char * start = bb.buf();

            BSONArrayBuilder arr( bb );
            BSONElementSet values;

            long long nscanned = 0; // locations looked at
            long long nscannedObjects = 0; // full objects looked at
            long long n = 0; // matches
            MatchDetails md;

            Collection *cl = getCollection( ns );

            if ( ! cl ) {
                result.appendArray( "values" , BSONObj() );
                result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) );
                return true;
            }

            shared_ptr<Cursor> cursor;
            if ( ! query.isEmpty() ) {
                cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() );
            }
            else {

                // query is empty, so lets see if we can find an index
                // with the key so we don't have to hit the raw data
                for (int i = 0; i < cl->nIndexes(); i++) {
                    IndexDetails &idx = cl->idx(i);
                    if (cl->isMultikey(i)) {
                        continue;
                    }

                    if ( idx.inKeyPattern( key ) ) {
                        cursor = getBestGuessCursor( ns.c_str() ,
                                                     BSONObj() ,
                                                     idx.keyPattern() );
                        if( cursor.get() ) break;
                    }

                }

                if ( ! cursor.get() ) {
                    cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() );
                }

            }

            
            verify( cursor );
            string cursorName = cursor->toString();
            
            auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns));

            for ( ; cursor->ok(); cursor->advance() ) {
                nscanned++;
                bool loadedRecord = false;

                if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currPK() ) ) {
                    n++;

                    BSONObj holder;
                    BSONElementSet temp;
                    loadedRecord = ! cc->getFieldsDotted( key , temp, holder );

                    for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) {
                        BSONElement e = *i;
                        if ( values.count( e ) )
                            continue;

                        int now = bb.len();

                        uassert(10044,  "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize );

                        arr.append( e );
                        BSONElement x( start + now );

                        values.insert( x );
                    }
                }

                if ( loadedRecord || md.hasLoadedRecord() )
                    nscannedObjects++;

                RARELY killCurrentOp.checkForInterrupt();
            }

            verify( start == bb.buf() );

            result.appendArray( "values" , arr.done() );

            {
                BSONObjBuilder b;
                b.appendNumber( "n" , n );
                b.appendNumber( "nscanned" , nscanned );
                b.appendNumber( "nscannedObjects" , nscannedObjects );
                b.appendNumber( "timems" , t.millis() );
                b.append( "cursor" , cursorName );
                result.append( "stats" , b.obj() );
            }

            return true;
        }
Пример #8
0
    UpdateResult _updateObjects( const char* ns,
                                 const BSONObj& updateobj,
                                 const BSONObj& patternOrig,
                                 bool upsert,
                                 bool multi,
                                 bool logop ,
                                 OpDebug& debug,
                                 bool fromMigrate,
                                 const QueryPlanSelectionPolicy& planPolicy ) {

        TOKULOG(2) << "update: " << ns
                   << " update: " << updateobj
                   << " query: " << patternOrig
                   << " upsert: " << upsert << " multi: " << multi << endl;

        debug.updateobj = updateobj;

        NamespaceDetails *d = getAndMaybeCreateNS(ns, logop);

        auto_ptr<ModSet> mods;
        const bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$';
        bool modsAreIndexed = false;

        if ( isOperatorUpdate ) {
            if ( d->indexBuildInProgress() ) {
                set<string> bgKeys;
                d->inProgIdx().keyPattern().getFieldNames(bgKeys);
                mods.reset( new ModSet(updateobj, d->indexKeys(), &bgKeys) );
            }
            else {
                mods.reset( new ModSet(updateobj, d->indexKeys()) );
            }
            modsAreIndexed = mods->isIndexed();
        }


        int idIdxNo = -1;
        if ( planPolicy.permitOptimalIdPlan() && !multi && !modsAreIndexed &&
             (idIdxNo = d->findIdIndex()) >= 0 && mayUpdateById(d, patternOrig) ) {
            debug.idhack = true;
            IndexDetails &idx = d->idx(idIdxNo);
            BSONObj pk = idx.getKeyFromQuery(patternOrig);
            TOKULOG(3) << "_updateObjects using simple _id query, pattern " << patternOrig << ", pk " << pk << endl;
            UpdateResult result = _updateById( pk,
                                               isOperatorUpdate,
                                               mods.get(),
                                               d,
                                               ns,
                                               updateobj,
                                               patternOrig,
                                               logop,
                                               debug,
                                               fromMigrate);
            if ( result.existing || ! upsert ) {
                return result;
            }
            else if ( upsert && ! isOperatorUpdate && ! logop) {
                debug.upsert = true;
                BSONObj objModified = updateobj;
                insertAndLog( ns, d, objModified, logop, fromMigrate );
                return UpdateResult( 0 , 0 , 1 , updateobj );
            }
        }

        int numModded = 0;
        debug.nscanned = 0;
        shared_ptr<Cursor> c = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy );

        if( c->ok() ) {
            set<BSONObj> seenObjects;
            MatchDetails details;
            auto_ptr<ClientCursor> cc;
            do {

                debug.nscanned++;

                if ( mods.get() && mods->hasDynamicArray() ) {
                    // The Cursor must have a Matcher to record an elemMatchKey.  But currently
                    // a modifier on a dynamic array field may be applied even if there is no
                    // elemMatchKey, so a matcher cannot be required.
                    //verify( c->matcher() );
                    details.requestElemMatchKey();
                }

                if ( !c->currentMatches( &details ) ) {
                    c->advance();
                    continue;
                }

                BSONObj currPK = c->currPK();
                if ( c->getsetdup( currPK ) ) {
                    c->advance();
                    continue;
                }

                BSONObj currentObj = c->current();
                BSONObj pattern = patternOrig;

                if ( logop ) {
                    BSONObjBuilder idPattern;
                    BSONElement id;
                    // NOTE: If the matching object lacks an id, we'll log
                    // with the original pattern.  This isn't replay-safe.
                    // It might make sense to suppress the log instead
                    // if there's no id.
                    if ( currentObj.getObjectID( id ) ) {
                        idPattern.append( id );
                        pattern = idPattern.obj();
                    }
                    else {
                        uassert( 10157 ,  "multi-update requires all modified objects to have an _id" , ! multi );
                    }
                }

                /* look for $inc etc.  note as listed here, all fields to inc must be this type, you can't set some
                   regular ones at the moment. */
                struct LogOpUpdateDetails loud;
                loud.logop = logop;
                loud.ns = ns;
                loud.fromMigrate = fromMigrate;
                if ( isOperatorUpdate ) {

                    if ( multi ) {
                        // Make our own copies of the currPK and currentObj before we invalidate
                        // them by advancing the cursor.
                        currPK = currPK.copy();
                        currentObj = currentObj.copy();

                        // Advance past the document to be modified. This used to be because of SERVER-5198,
                        // but TokuMX does it because we want to avoid needing to do manual deduplication
                        // of this PK on the next iteration if the current update modifies the next
                        // entry in the index. For example, an index scan over a:1 with mod {$inc: {a:1}}
                        // would cause every other key read to be a duplicate if we didn't advance here.
                        while ( c->ok() && currPK == c->currPK() ) {
                            c->advance();
                        }

                        // Multi updates need to do their own deduplication because updates may modify the
                        // keys the cursor is in the process of scanning over.
                        if ( seenObjects.count( currPK ) ) {
                            continue;
                        } else {
                            seenObjects.insert( currPK );
                        }
                    }

                    ModSet* useMods = mods.get();

                    auto_ptr<ModSet> mymodset;
                    if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) {
                        useMods = mods->fixDynamicArray( details.elemMatchKey() );
                        mymodset.reset( useMods );
                    }

                    auto_ptr<ModSetState> mss = useMods->prepare( currentObj );
                    updateUsingMods( d, currPK, currentObj, *mss, &loud );

                    numModded++;
                    if ( ! multi )
                        return UpdateResult( 1 , 1 , numModded , BSONObj() );

                    continue;
                } // end if operator is update

                uassert( 10158 ,  "multi update only works with $ operators" , ! multi );

                updateNoMods( d, currPK, currentObj, updateobj, &loud );

                return UpdateResult( 1 , 0 , 1 , BSONObj() );
            } while ( c->ok() );
        } // endif

        if ( numModded )
            return UpdateResult( 1 , 1 , numModded , BSONObj() );

        if ( upsert ) {
            BSONObj newObj = updateobj;
            if ( updateobj.firstElementFieldName()[0] == '$' ) {
                // upsert of an $operation. build a default object
                BSONObj newObj = mods->createNewFromQuery( patternOrig );
                debug.fastmodinsert = true;
                insertAndLog( ns, d, newObj, logop, fromMigrate );
                return UpdateResult( 0 , 1 , 1 , newObj );
            }
            uassert( 10159 ,  "multi update only works with $ operators" , ! multi );
            debug.upsert = true;
            insertAndLog( ns, d, newObj, logop, fromMigrate );
            return UpdateResult( 0 , 0 , 1 , newObj );
        }

        return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() );
    }
Пример #9
0
    /**
     * Run a query with a cursor provided by the query optimizer, or FindingStartCursor.
     * @yields the db lock.
     */
    string queryWithQueryOptimizer( int queryOptions, const string& ns,
                                    const BSONObj &jsobj, CurOp& curop,
                                    const BSONObj &query, const BSONObj &order,
                                    const shared_ptr<ParsedQuery> &pq_shared,
                                    const BSONObj &oldPlan,
                                    const ChunkVersion &shardingVersionAtStart,
                                    scoped_ptr<PageFaultRetryableSection>& parentPageFaultSection,
                                    scoped_ptr<NoPageFaultsAllowed>& noPageFault,
                                    Message &result ) {

        const ParsedQuery &pq( *pq_shared );
        shared_ptr<Cursor> cursor;
        QueryPlanSummary queryPlan;
        
        if ( pq.hasOption( QueryOption_OplogReplay ) ) {
            cursor = FindingStartCursor::getCursor( ns.c_str(), query, order );
        }
        else {
            cursor = getOptimizedCursor( ns.c_str(),
                                         query,
                                         order,
                                         QueryPlanSelectionPolicy::any(),
                                         pq_shared,
                                         false,
                                         &queryPlan );
        }
        verify( cursor );
        
        scoped_ptr<QueryResponseBuilder> queryResponseBuilder
                ( QueryResponseBuilder::make( pq, cursor, queryPlan, oldPlan ) );
        bool saveClientCursor = false;
        OpTime slaveReadTill;
        ClientCursorHolder ccPointer( new ClientCursor( QueryOption_NoCursorTimeout, cursor,
                                                         ns ) );
        
        for( ; cursor->ok(); cursor->advance() ) {

            bool yielded = false;
            if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered, &yielded ) ||
                !cursor->ok() ) {
                cursor.reset();
                queryResponseBuilder->noteYield();
                // !!! TODO The queryResponseBuilder still holds cursor.  Currently it will not do
                // anything unsafe with the cursor in handoff(), but this is very fragile.
                //
                // We don't fail the query since we're fine with returning partial data if the
                // collection was dropped.
                // NOTE see SERVER-2454.
                // TODO This is wrong.  The cursor could be gone if the closeAllDatabases command
                // just ran.
                break;
            }

            if ( yielded ) {
                queryResponseBuilder->noteYield();
            }
            
            if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) {
                break;
            }
            
            if ( !queryResponseBuilder->addMatch() ) {
                continue;
            }
            
            // Note slave's position in the oplog.
            if ( pq.hasOption( QueryOption_OplogReplay ) ) {
                BSONObj current = cursor->current();
                BSONElement e = current["ts"];
                if ( e.type() == Date || e.type() == Timestamp ) {
                    slaveReadTill = e._opTime();
                }
            }
            
            if ( !cursor->supportGetMore() || pq.isExplain() ) {
                if ( queryResponseBuilder->enoughTotalResults() ) {
                    break;
                }
            }
            else if ( queryResponseBuilder->enoughForFirstBatch() ) {
                // if only 1 requested, no cursor saved for efficiency...we assume it is findOne()
                if ( pq.wantMore() && pq.getNumToReturn() != 1 ) {
                    queryResponseBuilder->finishedFirstBatch();
                    if ( cursor->advance() ) {
                        saveClientCursor = true;
                    }
                }
                break;
            }
        }
        
        if ( cursor ) {
            if ( pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1 ) {
                cursor->setTailable();
            }
            
            // If the tailing request succeeded.
            if ( cursor->tailable() ) {
                saveClientCursor = true;
            }
        }
        
        if ( ! shardingState.getVersion( ns ).isWriteCompatibleWith( shardingVersionAtStart ) ) {
            // if the version changed during the query
            // we might be missing some data
            // and its safe to send this as mongos can resend
            // at this point
            throw SendStaleConfigException(ns, "version changed during initial query",
                                           shardingVersionAtStart,
                                           shardingState.getVersion(ns));
        }
        
        parentPageFaultSection.reset(0);
        noPageFault.reset( new NoPageFaultsAllowed() );

        int nReturned = queryResponseBuilder->handoff( result );

        ccPointer.reset();
        long long cursorid = 0;
        if ( saveClientCursor ) {
            // Create a new ClientCursor, with a default timeout.
            ccPointer.reset( new ClientCursor( queryOptions, cursor, ns,
                                              jsobj.getOwned() ) );
            cursorid = ccPointer->cursorid();
            DEV { MONGO_TLOG(2) << "query has more, cursorid: " << cursorid << endl; }
            if ( cursor->supportYields() ) {
                ClientCursor::YieldData data;
                ccPointer->prepareToYield( data );
            }
            else {
                ccPointer->c()->noteLocation();
            }
            
            // Save slave's position in the oplog.
            if ( pq.hasOption( QueryOption_OplogReplay ) && !slaveReadTill.isNull() ) {
                ccPointer->slaveReadTill( slaveReadTill );
            }
            
            if ( !ccPointer->ok() && ccPointer->c()->tailable() ) {
                DEV {
                    MONGO_TLOG(0) << "query has no more but tailable, cursorid: " << cursorid <<
                        endl;
                }
            }
            
            if( queryOptions & QueryOption_Exhaust ) {
                curop.debug().exhaust = true;
            }
            
            // Set attributes for getMore.
            ccPointer->setCollMetadata( queryResponseBuilder->collMetadata() );
            ccPointer->setPos( nReturned );
            ccPointer->pq = pq_shared;
            ccPointer->fields = pq.getFieldPtr();

            // If the query had a time limit, remaining time is "rolled over" to the cursor (for
            // use by future getmore ops).
            ccPointer->setLeftoverMaxTimeMicros( curop.getRemainingMaxTimeMicros() );

            ccPointer.release();
        }
Пример #10
0
    UpdateResult _updateObjects( bool su,
                                 const char* ns,
                                 const BSONObj& updateobj,
                                 const BSONObj& patternOrig,
                                 bool upsert,
                                 bool multi,
                                 bool logop ,
                                 OpDebug& debug,
                                 RemoveSaver* rs,
                                 bool fromMigrate,
                                 const QueryPlanSelectionPolicy& planPolicy,
                                 bool forReplication ) {

        DEBUGUPDATE( "update: " << ns
                     << " update: " << updateobj
                     << " query: " << patternOrig
                     << " upsert: " << upsert << " multi: " << multi );

        Client& client = cc();

        debug.updateobj = updateobj;

        // The idea with these here it to make them loop invariant for
        // multi updates, and thus be a bit faster for that case.  The
        // pointers may be left invalid on a failed or terminal yield
        // recovery.
        NamespaceDetails* d = nsdetails(ns); // can be null if an upsert...
        NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get(ns);

        auto_ptr<ModSet> mods;
        bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$';
        int modsIsIndexed = false; // really the # of indexes
        if ( isOperatorUpdate ) {
            mods.reset( new ModSet(updateobj, nsdt->indexKeys(), forReplication) );
            modsIsIndexed = mods->maxNumIndexUpdated();
        }

        if( planPolicy.permitOptimalIdPlan() && !multi && isSimpleIdQuery(patternOrig) && d &&
           !modsIsIndexed ) {
            int idxNo = d->findIdIndex();
            if( idxNo >= 0 ) {
                debug.idhack = true;

                UpdateResult result = _updateById( isOperatorUpdate,
                                                   idxNo,
                                                   mods.get(),
                                                   d,
                                                   nsdt,
                                                   su,
                                                   ns,
                                                   updateobj,
                                                   patternOrig,
                                                   logop,
                                                   debug,
                                                   fromMigrate);
                if ( result.existing || ! upsert ) {
                    return result;
                }
                else if ( upsert && ! isOperatorUpdate ) {
                    // this handles repl inserts
                    checkNoMods( updateobj );
                    debug.upsert = true;
                    BSONObj no = updateobj;
                    theDataFileMgr.insertWithObjMod(ns, no, false, su);
                    if ( logop )
                        logOp( "i", ns, no, 0, 0, fromMigrate, &no );

                    return UpdateResult( 0 , 0 , 1 , no );
                }
            }
        }

        int numModded = 0;
        debug.nscanned = 0;
        shared_ptr<Cursor> c = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy );
        d = nsdetails(ns);
        nsdt = &NamespaceDetailsTransient::get(ns);
        bool autoDedup = c->autoDedup();

        if( c->ok() ) {
            set<DiskLoc> seenObjects;
            MatchDetails details;
            auto_ptr<ClientCursor> cc;
            do {

                if ( cc.get() == 0 &&
                     client.allowedToThrowPageFaultException() &&
                     ! c->currLoc().isNull() &&
                     ! c->currLoc().rec()->likelyInPhysicalMemory() ) {
                    throw PageFaultException( c->currLoc().rec() );
                }

                bool atomic = c->matcher() && c->matcher()->docMatcher().atomic();

                if ( ! atomic && debug.nscanned > 0 ) {
                    // we need to use a ClientCursor to yield
                    if ( cc.get() == 0 ) {
                        shared_ptr< Cursor > cPtr = c;
                        cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) );
                    }

                    bool didYield;
                    if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) {
                        cc.release();
                        break;
                    }
                    if ( !c->ok() ) {
                        break;
                    }

                    if ( didYield ) {
                        d = nsdetails(ns);
                        if ( ! d )
                            break;
                        nsdt = &NamespaceDetailsTransient::get(ns);
                        if ( mods.get() ) {
                            mods->setIndexedStatus( nsdt->indexKeys() );
                            modsIsIndexed = mods->maxNumIndexUpdated();
                        }

                    }

                } // end yielding block

                debug.nscanned++;

                if ( mods.get() && mods->hasDynamicArray() ) {
                    details.requestElemMatchKey();
                }

                if ( !c->currentMatches( &details ) ) {
                    c->advance();
                    continue;
                }

                Record* r = c->_current();
                DiskLoc loc = c->currLoc();

                if ( c->getsetdup( loc ) && autoDedup ) {
                    c->advance();
                    continue;
                }

                BSONObj js = BSONObj::make(r);

                BSONObj pattern = patternOrig;

                if ( logop ) {
                    BSONObjBuilder idPattern;
                    BSONElement id;
                    // NOTE: If the matching object lacks an id, we'll log
                    // with the original pattern.  This isn't replay-safe.
                    // It might make sense to suppress the log instead
                    // if there's no id.
                    if ( js.getObjectID( id ) ) {
                        idPattern.append( id );
                        pattern = idPattern.obj();
                    }
                    else {
                        uassert( 10157 ,  "multi-update requires all modified objects to have an _id" , ! multi );
                    }
                }

                /* look for $inc etc.  note as listed here, all fields to inc must be this type, you can't set some
                    regular ones at the moment. */
                if ( isOperatorUpdate ) {

                    if ( multi ) {
                        // go to next record in case this one moves
                        c->advance();

                        // Update operations are deduped for cursors that implement their own
                        // deduplication.  In particular, some geo cursors are excluded.
                        if ( autoDedup ) {

                            if ( seenObjects.count( loc ) ) {
                                continue;
                            }

                            // SERVER-5198 Advance past the document to be modified, provided
                            // deduplication is enabled, but see SERVER-5725.
                            while( c->ok() && loc == c->currLoc() ) {
                                c->advance();
                            }
                        }
                    }

                    const BSONObj& onDisk = loc.obj();

                    ModSet* useMods = mods.get();

                    auto_ptr<ModSet> mymodset;
                    if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) {
                        useMods = mods->fixDynamicArray( details.elemMatchKey() );
                        mymodset.reset( useMods );
                    }

                    auto_ptr<ModSetState> mss = useMods->prepare( onDisk,
                                                                  false /* not an insertion */ );

                    bool willAdvanceCursor = multi && c->ok() && ( modsIsIndexed || ! mss->canApplyInPlace() );

                    if ( willAdvanceCursor ) {
                        if ( cc.get() ) {
                            cc->setDoingDeletes( true );
                        }
                        c->prepareToTouchEarlierIterate();
                    }

                    // If we've made it this far, "ns" must contain a valid collection name, and so
                    // is of the form "db.collection".  Therefore, the following expression must
                    // always be valid.  "system.users" updates must never be done in place, in
                    // order to ensure that they are validated inside DataFileMgr::updateRecord(.).
                    bool isSystemUsersMod = (NamespaceString(ns).coll == "system.users");

                    BSONObj newObj;
                    if ( !mss->isUpdateIndexed() && mss->canApplyInPlace() && !isSystemUsersMod ) {
                        mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) );

                        DEBUGUPDATE( "\t\t\t doing in place update" );
                        if ( !multi )
                            debug.fastmod = true;

                        if ( modsIsIndexed ) {
                            seenObjects.insert( loc );
                        }
                        newObj = loc.obj();
                        d->paddingFits();
                    }
                    else {
                        newObj = mss->createNewFromMods();
                        checkTooLarge(newObj);
                        DiskLoc newLoc = theDataFileMgr.updateRecord(ns,
                                                                     d,
                                                                     nsdt,
                                                                     r,
                                                                     loc,
                                                                     newObj.objdata(),
                                                                     newObj.objsize(),
                                                                     debug);

                        if ( newLoc != loc || modsIsIndexed ){
                            // log() << "Moved obj " << newLoc.obj()["_id"] << " from " << loc << " to " << newLoc << endl;
                            // object moved, need to make sure we don' get again
                            seenObjects.insert( newLoc );
                        }

                    }

                    if ( logop ) {
                        DEV verify( mods->size() );
                        BSONObj logObj = mss->getOpLogRewrite();
                        DEBUGUPDATE( "\t rewrite update: " << logObj );

                        // It is possible that the entire mod set was a no-op over this
                        // document.  We would have an empty log record in that case. If we
                        // call logOp, with an empty record, that would be replicated as "clear
                        // this record", which is not what we want. Therefore, to get a no-op
                        // in the replica, we simply don't log.
                        if ( logObj.nFields() ) {
                            logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj );
                        }
                    }
                    numModded++;
                    if ( ! multi )
                        return UpdateResult( 1 , 1 , numModded , BSONObj() );
                    if ( willAdvanceCursor )
                        c->recoverFromTouchingEarlierIterate();

                    getDur().commitIfNeeded();

                    continue;
                }

                uassert( 10158 ,  "multi update only works with $ operators" , ! multi );

                BSONElementManipulator::lookForTimestamps( updateobj );
                checkNoMods( updateobj );
                theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, su);
                if ( logop ) {
                    DEV wassert( !su ); // super used doesn't get logged, this would be bad.
                    logOp("u", ns, updateobj, &pattern, 0, fromMigrate, &updateobj );
                }
                return UpdateResult( 1 , 0 , 1 , BSONObj() );
            } while ( c->ok() );
        } // endif

        if ( numModded )
            return UpdateResult( 1 , 1 , numModded , BSONObj() );

        if ( upsert ) {
            if ( updateobj.firstElementFieldName()[0] == '$' ) {
                // upsert of an $operation. build a default object
                BSONObj newObj = mods->createNewFromQuery( patternOrig );
                checkNoMods( newObj );
                debug.fastmodinsert = true;
                theDataFileMgr.insertWithObjMod(ns, newObj, false, su);
                if ( logop )
                    logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj );

                return UpdateResult( 0 , 1 , 1 , newObj );
            }
            uassert( 10159 ,  "multi update only works with $ operators" , ! multi );
            checkNoMods( updateobj );
            debug.upsert = true;
            BSONObj no = updateobj;
            theDataFileMgr.insertWithObjMod(ns, no, false, su);
            if ( logop )
                logOp( "i", ns, no, 0, 0, fromMigrate, &no );
            return UpdateResult( 0 , 0 , 1 , no );
        }

        return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() );
    }
Пример #11
0
    UpdateResult update(const UpdateRequest& request, OpDebug* opDebug, UpdateDriver* driver) {

        LOG(3) << "processing update : " << request;
        const NamespaceString& nsString = request.getNamespaceString();

        validateUpdate( nsString.ns().c_str(), request.getUpdates(), request.getQuery() );

        NamespaceDetails* nsDetails = nsdetails( nsString.ns() );
        NamespaceDetailsTransient* nsDetailsTransient =
            &NamespaceDetailsTransient::get( nsString.ns().c_str() );

        // TODO: This seems a bit circuitious.
        opDebug->updateobj = request.getUpdates();

        driver->refreshIndexKeys( nsDetailsTransient->indexKeys() );

        shared_ptr<Cursor> cursor = getOptimizedCursor(
            nsString.ns(), request.getQuery(), BSONObj(), request.getQueryPlanSelectionPolicy() );

        // If the update was marked with '$isolated' (a.k.a '$atomic'), we are not allowed to
        // yield while evaluating the update loop below.
        //
        // TODO: Old code checks this repeatedly within the update loop. Is that necessary? It seems
        // that once atomic should be always atomic.
        const bool isolated =
            cursor->ok() &&
            cursor->matcher() &&
            cursor->matcher()->docMatcher().atomic();

        // The 'cursor' the optimizer gave us may contain query plans that generate duplicate
        // diskloc's. We set up here the mechanims that will prevent us from processing those
        // twice if we see them. We also set up a 'ClientCursor' so that we can support
        // yielding.
        //
        // TODO: Is it valid to call this on a non-ok cursor?
        const bool dedupHere = cursor->autoDedup();

        //
        // We'll start assuming we have one or more documents for this update. (Othwerwise,
        // we'll fallback to upserting.)
        //

        // We record that this will not be an upsert, in case a mod doesn't want to be applied
        // when in strict update mode.
        driver->setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT );

        // Let's fetch each of them and pipe them through the update expression, making sure to
        // keep track of the necessary stats. Recall that we'll be pulling documents out of
        // cursors and some of them do not deduplicate the entries they generate. We have
        // deduping logic in here, too -- for now.
        unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs;
        int numMatched = 0;

        // Reset these counters on each call. We might re-enter this function to retry this
        // update if we throw a page fault exception below, and we rely on these counters
        // reflecting only the actions taken locally. In particlar, we must have the no-op
        // counter reset so that we can meaningfully comapre it with numMatched above.
        opDebug->nscanned = 0;
        opDebug->nupdateNoops = 0;

        Client& client = cc();

        mutablebson::Document doc;
        mutablebson::DamageVector damages;

        // If we are going to be yielding, we will need a ClientCursor scoped to this loop. We
        // only loop as long as the underlying cursor is OK.
        for ( auto_ptr<ClientCursor> clientCursor; cursor->ok(); ) {

            // If we haven't constructed a ClientCursor, and if the client allows us to throw
            // page faults, and if we are referring to a location that is likely not in
            // physical memory, then throw a PageFaultException. The entire operation will be
            // restarted.
            if ( clientCursor.get() == NULL &&
                 client.allowedToThrowPageFaultException() &&
                 !cursor->currLoc().isNull() &&
                 !cursor->currLoc().rec()->likelyInPhysicalMemory() ) {

                // We should never throw a PFE if we have already updated items. The numMatched
                // variable includes no-ops, which do not prevent us from raising a PFE, so if
                // numMatched is non-zero, we are still OK to throw as long all matched items
                // resulted in a no-op.
                dassert((numMatched == 0) || (numMatched == opDebug->nupdateNoops));

                throw PageFaultException( cursor->currLoc().rec() );
            }

            if ( !isolated && opDebug->nscanned != 0 ) {

                // We are permitted to yield. To do so we need a ClientCursor, so create one
                // now if we have not yet done so.
                if ( !clientCursor.get() )
                    clientCursor.reset(
                        new ClientCursor( QueryOption_NoCursorTimeout, cursor, nsString.ns() ) );

                // Ask the client cursor to yield. We get two bits of state back: whether or not
                // we yielded, and whether or not we correctly recovered from yielding.
                bool yielded = false;
                const bool recovered = clientCursor->yieldSometimes(
                    ClientCursor::WillNeed, &yielded );

                if ( !recovered ) {
                    // If we failed to recover from the yield, then the ClientCursor is already
                    // gone. Release it so we don't destroy it a second time.
                    clientCursor.release();
                    break;
                }

                if ( !cursor->ok() ) {
                    // If the cursor died while we were yielded, just get out of the update loop.
                    break;
                }

                if ( yielded ) {
                    // We yielded and recovered OK, and our cursor is still good. Details about
                    // our namespace may have changed while we were yielded, so we re-acquire
                    // them here. If we can't do so, escape the update loop. Otherwise, refresh
                    // the driver so that it knows about what is currently indexed.
                    nsDetails = nsdetails( nsString.ns() );
                    if ( !nsDetails )
                        break;
                    nsDetailsTransient = &NamespaceDetailsTransient::get( nsString.ns().c_str() );

                    // TODO: This copies the index keys, but it may not need to do so.
                    driver->refreshIndexKeys( nsDetailsTransient->indexKeys() );
                }

            }

            // Let's fetch the next candidate object for this update.
            Record* record = cursor->_current();
            DiskLoc loc = cursor->currLoc();
            const BSONObj oldObj = loc.obj();

            // We count how many documents we scanned even though we may skip those that are
            // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for
            // that reason.
            opDebug->nscanned++;

            // Skips this document if it:
            // a) doesn't match the query portion of the update
            // b) was deemed duplicate by the underlying cursor machinery
            //
            // Now, if we are going to update the document,
            // c) we don't want to do so while the cursor is at it, as that may invalidate
            // the cursor. So, we advance to next document, before issuing the update.
            MatchDetails matchDetails;
            matchDetails.requestElemMatchKey();
            if ( !cursor->currentMatches( &matchDetails ) ) {
                // a)
                cursor->advance();
                continue;
            }
            else if ( cursor->getsetdup( loc ) && dedupHere ) {
                // b)
                cursor->advance();
                continue;
            }
            else if (!driver->isDocReplacement() && request.isMulti()) {
                // c)
                cursor->advance();
                if ( dedupHere ) {
                    if ( seenLocs.count( loc ) ) {
                        continue;
                    }
                }

                // There are certain kind of cursors that hold multiple pointers to data
                // underneath. $or cursors is one example. In a $or cursor, it may be the case
                // that when we did the last advance(), we finished consuming documents from
                // one of $or child and started consuming the next one. In that case, it is
                // possible that the last document of the previous child is the same as the
                // first document of the next (see SERVER-5198 and jstests/orp.js).
                //
                // So we advance the cursor here until we see a new diskloc.
                //
                // Note that we won't be yielding, and we may not do so for a while if we find
                // a particularly duplicated sequence of loc's. That is highly unlikely,
                // though.  (See SERVER-5725, if curious, but "stage" based $or will make that
                // ticket moot).
                while( cursor->ok() && loc == cursor->currLoc() ) {
                    cursor->advance();
                }
            }

            // For some (unfortunate) historical reasons, not all cursors would be valid after
            // a write simply because we advanced them to a document not affected by the write.
            // To protect in those cases, not only we engaged in the advance() logic above, but
            // we also tell the cursor we're about to write a document that we've just seen.
            // prepareToTouchEarlierIterate() requires calling later
            // recoverFromTouchingEarlierIterate(), so we make a note here to do so.
            bool touchPreviousDoc = request.isMulti() && cursor->ok();
            if ( touchPreviousDoc ) {
                if ( clientCursor.get() )
                    clientCursor->setDoingDeletes( true );
                cursor->prepareToTouchEarlierIterate();
            }

            // Found a matching document
            numMatched++;

            // Ask the driver to apply the mods. It may be that the driver can apply those "in
            // place", that is, some values of the old document just get adjusted without any
            // change to the binary layout on the bson layer. It may be that a whole new
            // document is needed to accomodate the new bson layout of the resulting document.
            doc.reset( oldObj, mutablebson::Document::kInPlaceEnabled );
            BSONObj logObj;

            // If there was a matched field, obtain it.
            string matchedField;
            if (matchDetails.hasElemMatchKey())
                matchedField = matchDetails.elemMatchKey();

            Status status = driver->update( matchedField, &doc, &logObj );
            if ( !status.isOK() ) {
                uasserted( 16837, status.reason() );
            }

            // If the driver applied the mods in place, we can ask the mutable for what
            // changed. We call those changes "damages". :) We use the damages to inform the
            // journal what was changed, and then apply them to the original document
            // ourselves. If, however, the driver applied the mods out of place, we ask it to
            // generate a new, modified document for us. In that case, the file manager will
            // take care of the journaling details for us.
            //
            // This code flow is admittedly odd. But, right now, journaling is baked in the file
            // manager. And if we aren't using the file manager, we have to do jounaling
            // ourselves.
            bool objectWasChanged = false;
            BSONObj newObj;
            const char* source = NULL;
            bool inPlace = doc.getInPlaceUpdates(&damages, &source);
            if ( inPlace && !driver->modsAffectIndices() ) {
                // If a set of modifiers were all no-ops, we are still 'in place', but there is
                // no work to do, in which case we want to consider the object unchanged.
                if (!damages.empty() ) {
                    nsDetails->paddingFits();

                    // All updates were in place. Apply them via durability and writing pointer.
                    mutablebson::DamageVector::const_iterator where = damages.begin();
                    const mutablebson::DamageVector::const_iterator end = damages.end();
                    for( ; where != end; ++where ) {
                        const char* sourcePtr = source + where->sourceOffset;
                        void* targetPtr = getDur().writingPtr(
                            const_cast<char*>(oldObj.objdata()) + where->targetOffset,
                            where->size);
                        std::memcpy(targetPtr, sourcePtr, where->size);
                    }
                    objectWasChanged = true;
                    opDebug->fastmod = true;
                }
                newObj = oldObj;
            }
            else {

                // The updates were not in place. Apply them through the file manager.
                newObj = doc.getObject();
                DiskLoc newLoc = theDataFileMgr.updateRecord(nsString.ns().c_str(),
                                                             nsDetails,
                                                             nsDetailsTransient,
                                                             record,
                                                             loc,
                                                             newObj.objdata(),
                                                             newObj.objsize(),
                                                             *opDebug);

                // If we've moved this object to a new location, make sure we don't apply
                // that update again if our traversal picks the objecta again.
                //
                // We also take note that the diskloc if the updates are affecting indices.
                // Chances are that we're traversing one of them and they may be multi key and
                // therefore duplicate disklocs.
                if ( newLoc != loc || driver->modsAffectIndices()  ) {
                    seenLocs.insert( newLoc );
                }

                objectWasChanged = true;
            }

            // Log Obj
            if ( request.shouldUpdateOpLog() ) {
                if ( driver->isDocReplacement() || !logObj.isEmpty() ) {
                    BSONObj idQuery = driver->makeOplogEntryQuery(newObj, request.isMulti());
                    logOp("u", nsString.ns().c_str(), logObj , &idQuery,
                          NULL, request.isFromMigration(), &newObj);
                }
            }

            // If it was noop since the document didn't change, record that.
            if (!objectWasChanged)
                opDebug->nupdateNoops++;

            if (!request.isMulti()) {
                break;
            }

            // If we used the cursor mechanism that prepares an earlier seen document for a
            // write we need to tell such mechanisms that the write is over.
            if ( touchPreviousDoc ) {
                cursor->recoverFromTouchingEarlierIterate();
            }

            getDur().commitIfNeeded();

        }

        // TODO: Can this be simplified?
        if ((numMatched > 0) || (numMatched == 0 && !request.isUpsert()) ) {
            opDebug->nupdated = numMatched;
            return UpdateResult( numMatched > 0 /* updated existing object(s) */,
                                 !driver->isDocReplacement() /* $mod or obj replacement */,
                                 numMatched /* # of docments update, even no-ops */,
                                 BSONObj() );
        }

        //
        // We haven't found any existing document so an insert is done
        // (upsert is true).
        //
        opDebug->upsert = true;

        // Since this is an insert (no docs found and upsert:true), we will be logging it
        // as an insert in the oplog. We don't need the driver's help to build the
        // oplog record, then. We also set the context of the update driver to the INSERT_CONTEXT.
        // Some mods may only work in that context (e.g. $setOnInsert).
        driver->setLogOp( false );
        driver->setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT );

        BSONObj baseObj;

        // Reset the document we will be writing to
        doc.reset( baseObj, mutablebson::Document::kInPlaceDisabled );
        if ( request.getQuery().hasElement("_id") ) {
            uassertStatusOK(doc.root().appendElement(request.getQuery().getField("_id")));
        }


        // If this is a $mod base update, we need to generate a document by examining the
        // query and the mods. Otherwise, we can use the object replacement sent by the user
        // update command that was parsed by the driver before.
        // In the following block we handle the query part, and then do the regular mods after.
        if ( *request.getUpdates().firstElementFieldName() == '$' ) {
            uassertStatusOK(UpdateDriver::createFromQuery(request.getQuery(), doc));
            opDebug->fastmodinsert = true;
        }

        // Apply the update modifications and then log the update as an insert manually.
        Status status = driver->update( StringData(), &doc, NULL /* no oplog record */);
        if ( !status.isOK() ) {
            uasserted( 16836, status.reason() );
        }

        BSONObj newObj = doc.getObject();
        theDataFileMgr.insertWithObjMod( nsString.ns().c_str(), newObj, false, request.isGod() );
        if ( request.shouldUpdateOpLog() ) {
            logOp( "i", nsString.ns().c_str(), newObj,
                   NULL, NULL, request.isFromMigration(), &newObj );
        }

        opDebug->nupdated = 1;
        return UpdateResult( false /* updated a non existing document */,
                             !driver->isDocReplacement() /* $mod or obj replacement? */,
                             1 /* count of updated documents */,
                             newObj /* object that was upserted */ );
    }
Пример #12
0
    long long _deleteObjects(const char *ns, BSONObj pattern, bool justOne, bool logop) {
        Collection *cl = getCollection(ns);
        if (cl == NULL) {
            return 0;
        }

        uassert(10101, "can't remove from a capped collection", !cl->isCapped());

        BSONObj obj;
        BSONObj pk = cl->getSimplePKFromQuery(pattern);

        // Fast-path for simple primary key deletes.
        if (!pk.isEmpty()) {
            if (queryByPKHack(cl, pk, pattern, obj)) {
                if (logop) {
                    OpLogHelpers::logDelete(ns, obj, false);
                }
                deleteOneObject(cl, pk, obj);
                return 1;
            }
            return 0;
        }

        long long nDeleted = 0;
        for (shared_ptr<Cursor> c = getOptimizedCursor(ns, pattern); c->ok(); ) {
            pk = c->currPK();
            if (c->getsetdup(pk)) {
                c->advance();
                continue;
            }
            if (!c->currentMatches()) {
                c->advance();
                continue;
            }

            obj = c->current();

            // justOne deletes do not intend to advance, so there's
            // no reason to do so here and potentially overlock rows.
            if (!justOne) {
                // There may be interleaved query plans that utilize multiple
                // cursors, some of which point to the same PK. We advance
                // here while those cursors point the row to be deleted.
                // 
                // Make sure to get local copies of pk/obj before advancing.
                pk = pk.getOwned();
                obj = obj.getOwned();
                while (c->ok() && c->currPK() == pk) {
                    c->advance();
                }
            }

            if (logop) {
                OpLogHelpers::logDelete(ns, obj, false);
            }
            deleteOneObject(cl, pk, obj);
            nDeleted++;

            if (justOne) {
                break;
            }
        }
        return nDeleted;
    }
Пример #13
0
    UpdateResult _updateObjectsNEW( bool su,
                                    const char* ns,
                                    const BSONObj& updateobj,
                                    const BSONObj& patternOrig,
                                    bool upsert,
                                    bool multi,
                                    bool logop ,
                                    OpDebug& debug,
                                    RemoveSaver* rs,
                                    bool fromMigrate,
                                    const QueryPlanSelectionPolicy& planPolicy,
                                    bool forReplication ) {

        // TODO
        // + Separate UpdateParser from UpdateRunner (the latter should be "stage-y")
        //   + All the yield and deduplicate logic would move to the query stage
        //     portion of it
        //
        // + Replication related
        //   + fast path for update for query by _id
        //   + support for relaxing viable path constraint in replication
        //
        // + Field Management
        //   + Force all upsert to contain _id
        //   + Prevent changes to immutable fields (_id, and those mentioned by sharding)
        //
        // + Yiedling related
        //   + $atomic support (or better, support proper yielding if not)
        //   + page fault support

        debug.updateobj = updateobj;

        NamespaceDetails* d = nsdetails( ns );
        NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get( ns );

        UpdateDriver::Options opts;
        opts.multi = multi;
        opts.upsert = upsert;
        opts.logOp = logop;
        UpdateDriver driver( opts );
        Status status = driver.parse( nsdt->indexKeys(), updateobj );
        if ( !status.isOK() ) {
            uasserted( 16840, status.reason() );
        }

        shared_ptr<Cursor> cursor = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy );

        // The 'cursor' the optimizer gave us may contain query plans that generate duplicate
        // diskloc's. We set up here the mechanims that will prevent us from processing those
        // twice if we see them. We also set up a 'ClientCursor' so that we can support
        // yielding.
        const bool dedupHere = cursor->autoDedup();
        shared_ptr<Cursor> cPtr = cursor;
        auto_ptr<ClientCursor> clientCursor( new ClientCursor( QueryOption_NoCursorTimeout,
                                                               cPtr,
                                                               ns ) );

        //
        // Upsert Logic
        //

        // We may or may not have documents for this update. If we don't, then try to upsert,
        // if allowed.
        if ( !cursor->ok() && upsert ) {

            // If this is a $mod base update, we need to generate a document by examining the
            // query and the mods. Otherwise, we can use the object replacement sent by the user
            // update command that was parsed by the driver before.
            BSONObj oldObj;
            if ( *updateobj.firstElementFieldName() == '$' ) {
                if ( !driver.createFromQuery( patternOrig, &oldObj ) ) {
                    uasserted( 16835, "cannot create object to update" );
                }
                debug.fastmodinsert = true;
            }
            else {
                debug.upsert = true;
            }

            // Since this is an upsert, we will be oplogging it as an insert. We don't
            // need the driver's help to build the oplog record, then. We also set the
            // context of the update driver to an "upsert". Some mods may only work in that
            // context (e.g. $setOnInsert).
            driver.setLogOp( false );
            driver.setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT );

            mutablebson::Document doc( oldObj, mutablebson::Document::kInPlaceDisabled );
            status = driver.update( StringData(), &doc, NULL /* no oplog record */);
            if ( !status.isOK() ) {
                uasserted( 16836, status.reason() );
            }
            BSONObj newObj = doc.getObject();

            theDataFileMgr.insertWithObjMod( ns, newObj, false, su );

            if ( logop ) {
                logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj );
            }

            return UpdateResult( false /* updated a non existing document */,
                                 driver.dollarModMode() /* $mod or obj replacement? */,
                                 1 /* count of updated documents */,
                                 newObj /* object that was upserted */ );
        }

        //
        // We have one or more documents for this update.
        //

        // We record that this will not be an upsert, in case a mod doesn't want to be applied
        // when in strict update mode.
        driver.setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT );

        // Let's fetch each of them and pipe them through the update expression, making sure to
        // keep track of the necessary stats. Recall that we'll be pulling documents out of
        // cursors and some of them do not deduplicate the entries they generate. We have
        // deduping logic in here, too -- for now.
        unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs;
        int numUpdated = 0;
        debug.nscanned = 0;
        while ( cursor->ok() ) {

            // Let's fetch the next candidate object for this update.
            Record* r = cursor->_current();
            DiskLoc loc = cursor->currLoc();
            const BSONObj oldObj = loc.obj();

            // We count how many documents we scanned even though we may skip those that are
            // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for
            // that reason.
            debug.nscanned++;

            // Skips this document if it:
            // a) doesn't match the query portion of the update
            // b) was deemed duplicate by the underlying cursor machinery
            //
            // Now, if we are going to update the document,
            // c) we don't want to do so while the cursor is at it, as that may invalidate
            // the cursor. So, we advance to next document, before issuing the update.
            MatchDetails matchDetails;
            matchDetails.requestElemMatchKey();
            if ( !cursor->currentMatches( &matchDetails ) ) {
                // a)
                cursor->advance();
                continue;
            }
            else if ( cursor->getsetdup( loc ) && dedupHere ) {
                // b)
                cursor->advance();
                continue;
            }
            else if (driver.dollarModMode() && multi) {
                // c)
                cursor->advance();
                if ( dedupHere ) {
                    if ( seenLocs.count( loc ) ) {
                        continue;
                    }
                }

                // There are certain kind of cursors that hold multiple pointers to data
                // underneath. $or cursors is one example. In a $or cursor, it may be the case
                // that when we did the last advance(), we finished consuming documents from
                // one of $or child and started consuming the next one. In that case, it is
                // possible that the last document of the previous child is the same as the
                // first document of the next (see SERVER-5198 and jstests/orp.js).
                //
                // So we advance the cursor here until we see a new diskloc.
                //
                // Note that we won't be yielding, and we may not do so for a while if we find
                // a particularly duplicated sequence of loc's. That is highly unlikely,
                // though.  (See SERVER-5725, if curious, but "stage" based $or will make that
                // ticket moot).
                while( cursor->ok() && loc == cursor->currLoc() ) {
                    cursor->advance();
                }
            }

            // For some (unfortunate) historical reasons, not all cursors would be valid after
            // a write simply because we advanced them to a document not affected by the write.
            // To protect in those cases, not only we engaged in the advance() logic above, but
            // we also tell the cursor we're about to write a document that we've just seen.
            // prepareToTouchEarlierIterate() requires calling later
            // recoverFromTouchingEarlierIterate(), so we make a note here to do so.
            bool touchPreviousDoc = multi && cursor->ok();
            if ( touchPreviousDoc  ) {
                clientCursor->setDoingDeletes( true );
                cursor->prepareToTouchEarlierIterate();
            }

            // Ask the driver to apply the mods. It may be that the driver can apply those "in
            // place", that is, some values of the old document just get adjusted without any
            // change to the binary layout on the bson layer. It may be that a whole new
            // document is needed to accomodate the new bson layout of the resulting document.
            mutablebson::Document doc( oldObj, mutablebson::Document::kInPlaceEnabled );
            BSONObj logObj;
            StringData matchedField = matchDetails.hasElemMatchKey() ?
                                                    matchDetails.elemMatchKey():
                                                    StringData();
            status = driver.update( matchedField, &doc, &logObj );
            if ( !status.isOK() ) {
                uasserted( 16837, status.reason() );
            }

            // If the driver applied the mods in place, we can ask the mutable for what
            // changed. We call those changes "damages". :) We use the damages to inform the
            // journal what was changed, and then apply them to the original document
            // ourselves. If, however, the driver applied the mods out of place, we ask it to
            // generate a new, modified document for us. In that case, the file manager will
            // take care of the journaling details for us.
            //
            // This code flow is admittedly odd. But, right now, journaling is baked in the file
            // manager. And if we aren't using the file manager, we have to do jounaling
            // ourselves.
            BSONObj newObj;
            const char* source = NULL;
            mutablebson::DamageVector damages;
            bool inPlace = doc.getInPlaceUpdates(&damages, &source);
            if ( inPlace && !driver.modsAffectIndices() ) {

                // All updates were in place. Apply them via durability and writing pointer.
                mutablebson::DamageVector::const_iterator where = damages.begin();
                const mutablebson::DamageVector::const_iterator end = damages.end();
                for( ; where != end; ++where ) {
                    const char* sourcePtr = source + where->sourceOffset;
                    void* targetPtr = getDur().writingPtr(
                        const_cast<char*>(oldObj.objdata()) + where->targetOffset,
                        where->size);
                    std::memcpy(targetPtr, sourcePtr, where->size);
                }
                newObj = oldObj;
                debug.fastmod = true;
            }
            else {

                // The updates were not in place. Apply them through the file manager.
                newObj = doc.getObject();
                DiskLoc newLoc = theDataFileMgr.updateRecord(ns,
                                                             d,
                                                             nsdt,
                                                             r,
                                                             loc,
                                                             newObj.objdata(),
                                                             newObj.objsize(),
                                                             debug);

                // If we've moved this object to a new location, make sure we don't apply
                // that update again if our traversal picks the objecta again.
                //
                // We also take note that the diskloc if the updates are affecting indices.
                // Chances are that we're traversing one of them and they may be multi key and
                // therefore duplicate disklocs.
                if ( newLoc != loc || driver.modsAffectIndices()  ) {
                    seenLocs.insert( newLoc );
                }
            }

            // Log Obj
            if ( logop ) {
                if ( !logObj.isEmpty() ) {
                    BSONObj pattern = patternOrig;
                    logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj );
                }
            }

            // One more document updated.
            numUpdated++;

            if (!multi) {
                break;
            }

            // If we used the cursor mechanism that prepares an earlier seen document for a
            // write we need to tell such mechanisms that the write is over.
            if ( touchPreviousDoc ) {
                cursor->recoverFromTouchingEarlierIterate();
            }

            getDur().commitIfNeeded();

        }

        return UpdateResult( true /* updated existing object(s) */,
                             driver.dollarModMode() /* $mod or obj replacement */,
                             numUpdated /* # of docments update */,
                             BSONObj() );
    }
Пример #14
0
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
            Timer t;
            string ns = dbname + '.' + cmdObj.firstElement().valuestr();

            string key = cmdObj["key"].valuestrsafe();
            BSONObj keyPattern = BSON( key << 1 );

            BSONObj query = getQuery( cmdObj );

            int bufSize = BSONObjMaxUserSize - 4096;
            BufBuilder bb( bufSize );
            char * start = bb.buf();

            BSONArrayBuilder arr( bb );
            BSONElementSet values;

            long long nscanned = 0; // locations looked at
            long long nscannedObjects = 0; // full objects looked at
            long long n = 0; // matches
            MatchDetails md;

            NamespaceDetails * d = nsdetails( ns );

            if ( ! d ) {
                result.appendArray( "values" , BSONObj() );
                result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) );
                return true;
            }

            shared_ptr<Cursor> cursor;
            if ( ! query.isEmpty() ) {
                cursor = getOptimizedCursor( ns.c_str(), query, BSONObj() );
            }
            else {

                // query is empty, so lets see if we can find an index
                // with the key so we don't have to hit the raw data
                NamespaceDetails::IndexIterator ii = d->ii();
                while ( ii.more() ) {
                    IndexDetails& idx = ii.next();

                    if ( d->isMultikey( ii.pos() - 1 ) )
                        continue;

                    if ( idx.inKeyPattern( key ) ) {
                        cursor = getBestGuessCursor( ns.c_str(), BSONObj(), idx.keyPattern() );
                        if( cursor.get() ) break;
                    }

                }

                if ( ! cursor.get() )
                    cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() );

            }

            
            verify( cursor );
            string cursorName = cursor->toString();
            
            auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns));

            // map from indexed field to offset in key object
            map<string, int> indexedFields;  
            if (!cursor->modifiedKeys()) {
                // store index information so we can decide if we can
                // get something out of the index key rather than full object

                int x = 0;
                BSONObjIterator i( cursor->indexKeyPattern() );
                while ( i.more() ) {
                    BSONElement e = i.next();
                    if ( e.isNumber() ) {
                        // only want basic index fields, not "2d" etc
                        indexedFields[e.fieldName()] = x;
                    }
                    x++;
                }
            }

            while ( cursor->ok() ) {
                nscanned++;
                bool loadedRecord = false;

                if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currLoc() ) ) {
                    n++;

                    BSONObj holder;
                    BSONElementSet temp;
                    // Try to get the record from the key fields.
                    loadedRecord = !getFieldsDotted(indexedFields, cursor, key, temp, holder);

                    for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) {
                        BSONElement e = *i;
                        if ( values.count( e ) )
                            continue;

                        int now = bb.len();

                        uassert(10044,  "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize );

                        arr.append( e );
                        BSONElement x( start + now );

                        values.insert( x );
                    }
                }

                if ( loadedRecord || md.hasLoadedRecord() )
                    nscannedObjects++;

                cursor->advance();

                if (!cc->yieldSometimes( ClientCursor::MaybeCovered )) {
                    cc.release();
                    break;
                }

                RARELY killCurrentOp.checkForInterrupt();
            }

            verify( start == bb.buf() );

            result.appendArray( "values" , arr.done() );

            {
                BSONObjBuilder b;
                b.appendNumber( "n" , n );
                b.appendNumber( "nscanned" , nscanned );
                b.appendNumber( "nscannedObjects" , nscannedObjects );
                b.appendNumber( "timems" , t.millis() );
                b.append( "cursor" , cursorName );
                result.append( "stats" , b.obj() );
            }

            return true;
        }
Пример #15
0
    /**
     * Run a query with a cursor provided by the query optimizer, or FindingStartCursor.
     * @returns true if client cursor was saved, false if the query has completed.
     */
    bool queryWithQueryOptimizer( int queryOptions, const string& ns,
                                  const BSONObj &jsobj, CurOp& curop,
                                  const BSONObj &query, const BSONObj &order,
                                  const shared_ptr<ParsedQuery> &pq_shared,
                                  const ConfigVersion &shardingVersionAtStart,
                                  const bool getCachedExplainPlan,
                                  const bool inMultiStatementTxn,
                                  Message &result ) {

        const ParsedQuery &pq( *pq_shared );
        shared_ptr<Cursor> cursor;
        QueryPlanSummary queryPlan;

        const bool tailable = pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1;
        
        LOG(1) << "query beginning read-only transaction. tailable: " << tailable << endl;
        
        BSONObj oldPlan;
        if (getCachedExplainPlan) {
            scoped_ptr<MultiPlanScanner> mps( MultiPlanScanner::make( ns.c_str(), query, order ) );
            oldPlan = mps->cachedPlanExplainSummary();
        }
        
        cursor = getOptimizedCursor( ns.c_str(), query, order, QueryPlanSelectionPolicy::any(),
                                     pq_shared, false, &queryPlan );
        verify( cursor );

        // Tailable cursors must be marked as such before any use. This is so that
        // the implementation knows that uncommitted data cannot be returned.
        if ( tailable ) {
            cursor->setTailable();
        }

        scoped_ptr<QueryResponseBuilder> queryResponseBuilder
                ( QueryResponseBuilder::make( pq, cursor, queryPlan, oldPlan ) );
        bool saveClientCursor = false;
        int options = QueryOption_NoCursorTimeout;
        if (pq.hasOption( QueryOption_OplogReplay )) {
            options |= QueryOption_OplogReplay;
        }
        // create a client cursor that does not create a cursorID.
        // The cursor ID will be created if and only if we save
        // the client cursor further below
        ClientCursor::Holder ccPointer(
            new ClientCursor( options, cursor, ns, BSONObj(), false, false )
            );

        // for oplog cursors, we check if we are reading data that is too old and might
        // be stale.
        bool opChecked = false;
        bool slaveLocationUpdated = false;
        BSONObj last;
        bool lastBSONObjSet = false;
        for ( ; cursor->ok(); cursor->advance() ) {

            if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) {
                break;
            }
            
            if ( !queryResponseBuilder->addMatch() ) {
                continue;
            }
            
            // Note slave's position in the oplog.
            if ( pq.hasOption( QueryOption_OplogReplay ) ) {
                BSONObj current = cursor->current();
                last = current.copy();
                lastBSONObjSet = true;
                
                // the first row returned is equal to the last element that
                // the slave has synced up to, so we might as well update
                // the slave location
                if (!slaveLocationUpdated) {
                    ccPointer->updateSlaveLocation(curop);
                    slaveLocationUpdated = true;
                }
                // check if data we are about to return may be too stale
                if (!opChecked) {
                    ccPointer->storeOpForSlave(current);
                    uassert(16785, "oplog cursor reading data that is too old", !ccPointer->lastOpForSlaveTooOld());
                    opChecked = true;
                }
            }
            
            if ( pq.isExplain() ) {
                if ( queryResponseBuilder->enoughTotalResults() ) {
                    break;
                }
            }
            else if ( queryResponseBuilder->enoughForFirstBatch() ) {
                // if only 1 requested, no cursor saved for efficiency...we assume it is findOne()
                if ( pq.wantMore() && pq.getNumToReturn() != 1 ) {
                    queryResponseBuilder->finishedFirstBatch();
                    if ( cursor->advance() ) {
                        saveClientCursor = true;
                    }
                }
                break;
            }
        }

        // If the tailing request succeeded
        if ( cursor->tailable() ) {
            saveClientCursor = true;
        }
        
        if ( ! shardingState.getVersion( ns ).isWriteCompatibleWith( shardingVersionAtStart ) ) {
            // if the version changed during the query
            // we might be missing some data
            // and its safe to send this as mongos can resend
            // at this point
            throw SendStaleConfigException( ns , "version changed during initial query", shardingVersionAtStart, shardingState.getVersion( ns ) );
        }
        
        int nReturned = queryResponseBuilder->handoff( result );

        ccPointer.reset();
        long long cursorid = 0;
        if ( saveClientCursor ) {
            // Create a new ClientCursor, with a default timeout.
            ccPointer.reset( new ClientCursor( queryOptions, cursor, ns,
                                               jsobj.getOwned(), inMultiStatementTxn ) );
            cursorid = ccPointer->cursorid();
            DEV tlog(2) << "query has more, cursorid: " << cursorid << endl;
            
            if ( !ccPointer->ok() && ccPointer->c()->tailable() ) {
                DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
            }
            
            if( queryOptions & QueryOption_Exhaust ) {
                curop.debug().exhaust = true;
            }
            
            // Set attributes for getMore.
            ccPointer->setChunkManager( queryResponseBuilder->chunkManager() );
            ccPointer->setPos( nReturned );
            ccPointer->pq = pq_shared;
            ccPointer->fields = pq.getFieldPtr();
            if (pq.hasOption( QueryOption_OplogReplay ) && lastBSONObjSet) {
                ccPointer->storeOpForSlave(last);
            }
            if (!inMultiStatementTxn) {
                // This cursor is not part of a multi-statement transaction, so
                // we pass off the current client's transaction stack to the
                // cursor so that it may be live as long as the cursor.
                cc().swapTransactionStack(ccPointer->transactions);
                verify(!cc().hasTxn());
            }
            ccPointer.release();
        }

        QueryResult *qr = (QueryResult *) result.header();
        qr->cursorId = cursorid;
        curop.debug().cursorid = ( cursorid == 0 ? -1 : qr->cursorId );
        qr->setResultFlagsToOk();
        // qr->len is updated automatically by appendData()
        curop.debug().responseLength = qr->len;
        qr->setOperation(opReply);
        qr->startingFrom = 0;
        qr->nReturned = nReturned;

        curop.debug().nscanned = ( cursor ? cursor->nscanned() : 0LL );
        curop.debug().ntoskip = pq.getSkip();
        curop.debug().nreturned = nReturned;

        return saveClientCursor;
    }
Пример #16
0
    UpdateResult _updateObjectsNEW( bool su,
                                    const char* ns,
                                    const BSONObj& updateobj,
                                    const BSONObj& patternOrig,
                                    bool upsert,
                                    bool multi,
                                    bool logop ,
                                    OpDebug& debug,
                                    RemoveSaver* rs,
                                    bool fromMigrate,
                                    const QueryPlanSelectionPolicy& planPolicy,
                                    bool forReplication ) {

        // TODO
        // + Separate UpdateParser from UpdateRunner (the latter should be "stage-y")
        //   + All the yield and deduplicate logic would move to the query stage
        //     portion of it
        //
        // + Replication related
        //   + fast path for update for query by _id
        //   + support for relaxing viable path constraint in replication
        //
        // + Field Management
        //   + Force all upsert to contain _id
        //   + Prevent changes to immutable fields (_id, and those mentioned by sharding)
        //
        // + Yiedling related
        //   + $atomic support (or better, support proper yielding if not)
        //   + page fault support

        debug.updateobj = updateobj;

        NamespaceDetails* d = nsdetails( ns );
        NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get( ns );

        // TODO: Put this logic someplace central and check based on constants (maybe using the
        // list of actually excluded config collections, and not global for the config db).
        NamespaceString nsStr( ns );

        // Should the modifiers validdate their embedded docs via okForStorage
        bool shouldValidate = true;

        // Config db docs shouldn't get checked for valid field names since the shard key can have
        // a dot (".") in it. Therefore we disable validation for storage.
        if ( nsStr.db() == "config" ) {
            LOG(0) << "disabling okForStorage on config db";
            shouldValidate = false;
        }

        UpdateDriver::Options opts;
        opts.multi = multi;
        opts.upsert = upsert;
        opts.logOp = logop;
        opts.modOptions = ModifierInterface::Options( forReplication, shouldValidate );
        UpdateDriver driver( opts );

        // TODO: This copies the index keys, but we may not actually need to.
        Status status = driver.parse( nsdt->indexKeys(), updateobj );
        if ( !status.isOK() ) {
            uasserted( 16840, status.reason() );
        }

        shared_ptr<Cursor> cursor = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy );

        // If the update was marked with '$isolated' (a.k.a '$atomic'), we are not allowed to
        // yield while evaluating the update loop below.
        //
        // TODO: Old code checks this repeatedly within the update loop. Is that necessary? It seems
        // that once atomic should be always atomic.
        const bool canYield =
            cursor->ok() &&
            cursor->matcher() &&
            cursor->matcher()->docMatcher().atomic();

        // The 'cursor' the optimizer gave us may contain query plans that generate duplicate
        // diskloc's. We set up here the mechanims that will prevent us from processing those
        // twice if we see them. We also set up a 'ClientCursor' so that we can support
        // yielding.
        //
        // TODO: Is it valid to call this on a non-ok cursor?
        const bool dedupHere = cursor->autoDedup();

        //
        // We'll start assuming we have one or more documents for this update. (Othwerwise,
        // we'll fallback to upserting.)
        //

        // We record that this will not be an upsert, in case a mod doesn't want to be applied
        // when in strict update mode.
        driver.setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT );

        // Let's fetch each of them and pipe them through the update expression, making sure to
        // keep track of the necessary stats. Recall that we'll be pulling documents out of
        // cursors and some of them do not deduplicate the entries they generate. We have
        // deduping logic in here, too -- for now.
        unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs;
        int numUpdated = 0;
        debug.nscanned = 0;

        Client& client = cc();

        mutablebson::Document doc;

        // If we are going to be yielding, we will need a ClientCursor scoped to this loop. We
        // only loop as long as the underlying cursor is OK.
        for ( auto_ptr<ClientCursor> clientCursor; cursor->ok(); ) {

            // If we haven't constructed a ClientCursor, and if the client allows us to throw
            // page faults, and if we are referring to a location that is likely not in
            // physical memory, then throw a PageFaultException. The entire operation will be
            // restarted.
            if ( clientCursor.get() == NULL &&
                 client.allowedToThrowPageFaultException() &&
                 !cursor->currLoc().isNull() &&
                 !cursor->currLoc().rec()->likelyInPhysicalMemory() ) {
                // We should never throw a PFE if we have already updated items.
                dassert(numUpdated == 0);
                throw PageFaultException( cursor->currLoc().rec() );
            }

            if ( !canYield && debug.nscanned != 0 ) {

                // We are permitted to yield. To do so we need a ClientCursor, so create one
                // now if we have not yet done so.
                if ( !clientCursor.get() )
                    clientCursor.reset(
                        new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) );

                // Ask the client cursor to yield. We get two bits of state back: whether or not
                // we yielded, and whether or not we correctly recovered from yielding.
                bool yielded = false;
                const bool recovered = clientCursor->yieldSometimes(
                    ClientCursor::WillNeed, &yielded );

                // If we couldn't recover from the yield, or if the cursor died while we were
                // yielded, get out of the update loop right away. We don't need to reset
                // 'clientCursor' since we are leaving the scope.
                if ( !recovered || !cursor->ok() )
                    break;

                if ( yielded ) {
                    // Details about our namespace may have changed while we were yielded, so
                    // we re-acquire them here. If we can't do so, escape the update
                    // loop. Otherwise, refresh the driver so that it knows about what is
                    // currently indexed.
                    d = nsdetails( ns );
                    if ( !d )
                        break;
                    nsdt = &NamespaceDetailsTransient::get( ns );

                    // TODO: This copies the index keys, but it may not need to do so.
                    driver.refreshIndexKeys( nsdt->indexKeys() );
                }

            }

            // Let's fetch the next candidate object for this update.
            Record* r = cursor->_current();
            DiskLoc loc = cursor->currLoc();
            const BSONObj oldObj = loc.obj();

            // We count how many documents we scanned even though we may skip those that are
            // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for
            // that reason.
            debug.nscanned++;

            // Skips this document if it:
            // a) doesn't match the query portion of the update
            // b) was deemed duplicate by the underlying cursor machinery
            //
            // Now, if we are going to update the document,
            // c) we don't want to do so while the cursor is at it, as that may invalidate
            // the cursor. So, we advance to next document, before issuing the update.
            MatchDetails matchDetails;
            matchDetails.requestElemMatchKey();
            if ( !cursor->currentMatches( &matchDetails ) ) {
                // a)
                cursor->advance();
                continue;
            }
            else if ( cursor->getsetdup( loc ) && dedupHere ) {
                // b)
                cursor->advance();
                continue;
            }
            else if (driver.dollarModMode() && multi) {
                // c)
                cursor->advance();
                if ( dedupHere ) {
                    if ( seenLocs.count( loc ) ) {
                        continue;
                    }
                }

                // There are certain kind of cursors that hold multiple pointers to data
                // underneath. $or cursors is one example. In a $or cursor, it may be the case
                // that when we did the last advance(), we finished consuming documents from
                // one of $or child and started consuming the next one. In that case, it is
                // possible that the last document of the previous child is the same as the
                // first document of the next (see SERVER-5198 and jstests/orp.js).
                //
                // So we advance the cursor here until we see a new diskloc.
                //
                // Note that we won't be yielding, and we may not do so for a while if we find
                // a particularly duplicated sequence of loc's. That is highly unlikely,
                // though.  (See SERVER-5725, if curious, but "stage" based $or will make that
                // ticket moot).
                while( cursor->ok() && loc == cursor->currLoc() ) {
                    cursor->advance();
                }
            }

            // For some (unfortunate) historical reasons, not all cursors would be valid after
            // a write simply because we advanced them to a document not affected by the write.
            // To protect in those cases, not only we engaged in the advance() logic above, but
            // we also tell the cursor we're about to write a document that we've just seen.
            // prepareToTouchEarlierIterate() requires calling later
            // recoverFromTouchingEarlierIterate(), so we make a note here to do so.
            bool touchPreviousDoc = multi && cursor->ok();
            if ( touchPreviousDoc ) {
                if ( clientCursor.get() )
                    clientCursor->setDoingDeletes( true );
                cursor->prepareToTouchEarlierIterate();
            }

            // Ask the driver to apply the mods. It may be that the driver can apply those "in
            // place", that is, some values of the old document just get adjusted without any
            // change to the binary layout on the bson layer. It may be that a whole new
            // document is needed to accomodate the new bson layout of the resulting document.
            doc.reset( oldObj, mutablebson::Document::kInPlaceEnabled );
            BSONObj logObj;
            StringData matchedField = matchDetails.hasElemMatchKey() ?
                                                    matchDetails.elemMatchKey():
                                                    StringData();
            status = driver.update( matchedField, &doc, &logObj );
            if ( !status.isOK() ) {
                uasserted( 16837, status.reason() );
            }

            // If the driver applied the mods in place, we can ask the mutable for what
            // changed. We call those changes "damages". :) We use the damages to inform the
            // journal what was changed, and then apply them to the original document
            // ourselves. If, however, the driver applied the mods out of place, we ask it to
            // generate a new, modified document for us. In that case, the file manager will
            // take care of the journaling details for us.
            //
            // This code flow is admittedly odd. But, right now, journaling is baked in the file
            // manager. And if we aren't using the file manager, we have to do jounaling
            // ourselves.
            bool objectWasChanged = false;
            BSONObj newObj;
            const char* source = NULL;
            mutablebson::DamageVector damages;
            bool inPlace = doc.getInPlaceUpdates(&damages, &source);
            if ( inPlace && !damages.empty() && !driver.modsAffectIndices() ) {
                d->paddingFits();

                // All updates were in place. Apply them via durability and writing pointer.
                mutablebson::DamageVector::const_iterator where = damages.begin();
                const mutablebson::DamageVector::const_iterator end = damages.end();
                for( ; where != end; ++where ) {
                    const char* sourcePtr = source + where->sourceOffset;
                    void* targetPtr = getDur().writingPtr(
                        const_cast<char*>(oldObj.objdata()) + where->targetOffset,
                        where->size);
                    std::memcpy(targetPtr, sourcePtr, where->size);
                }
                newObj = oldObj;
                debug.fastmod = true;

                objectWasChanged = true;
            }
            else {

                // The updates were not in place. Apply them through the file manager.
                newObj = doc.getObject();
                DiskLoc newLoc = theDataFileMgr.updateRecord(ns,
                                                             d,
                                                             nsdt,
                                                             r,
                                                             loc,
                                                             newObj.objdata(),
                                                             newObj.objsize(),
                                                             debug);

                // If we've moved this object to a new location, make sure we don't apply
                // that update again if our traversal picks the objecta again.
                //
                // We also take note that the diskloc if the updates are affecting indices.
                // Chances are that we're traversing one of them and they may be multi key and
                // therefore duplicate disklocs.
                if ( newLoc != loc || driver.modsAffectIndices()  ) {
                    seenLocs.insert( newLoc );
                }

                objectWasChanged = true;
            }

            // Log Obj
            if ( logop ) {
                if ( !logObj.isEmpty() ) {
                    BSONObj idQuery = driver.makeOplogEntryQuery(newObj, multi);
                    logOp("u", ns, logObj , &idQuery, 0, fromMigrate, &newObj);
                }
            }

            // If we applied any in-place updates, or asked the DataFileMgr to write for us,
            // then count this as an update.
            if (objectWasChanged)
                numUpdated++;

            if (!multi) {
                break;
            }

            // If we used the cursor mechanism that prepares an earlier seen document for a
            // write we need to tell such mechanisms that the write is over.
            if ( touchPreviousDoc ) {
                cursor->recoverFromTouchingEarlierIterate();
            }

            getDur().commitIfNeeded();

        }

        if (numUpdated > 0) {
            return UpdateResult( true /* updated existing object(s) */,
                                 driver.dollarModMode() /* $mod or obj replacement */,
                                 numUpdated /* # of docments update */,
                                 BSONObj() );
        }
        else if (numUpdated == 0 && !upsert) {
            return UpdateResult( false /* no object updated */,
                                 driver.dollarModMode() /* $mod or obj replacement */,
                                 0 /* no updates */,
                                 BSONObj() );
        }

        //
        // We haven't succeeded updating any existing document but upserts are allowed.
        //

        // If this is a $mod base update, we need to generate a document by examining the
        // query and the mods. Otherwise, we can use the object replacement sent by the user
        // update command that was parsed by the driver before.
        BSONObj oldObj;
        if ( *updateobj.firstElementFieldName() == '$' ) {
            if ( !driver.createFromQuery( patternOrig, &oldObj ) ) {
                uasserted( 16835, "cannot create object to update" );
            }
            debug.fastmodinsert = true;
        }
        else {
            // Copy the _id
            if (patternOrig.hasElement("_id")) {
                oldObj = patternOrig.getField("_id").wrap();
            }
            debug.upsert = true;
        }

        // Since this is an upsert, we will be oplogging it as an insert. We don't
        // need the driver's help to build the oplog record, then. We also set the
        // context of the update driver to an "upsert". Some mods may only work in that
        // context (e.g. $setOnInsert).
        driver.setLogOp( false );
        driver.setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT );

        doc.reset( oldObj, mutablebson::Document::kInPlaceDisabled );
        status = driver.update( StringData(), &doc, NULL /* no oplog record */);
        if ( !status.isOK() ) {
            uasserted( 16836, status.reason() );
        }
        BSONObj newObj = doc.getObject();

        theDataFileMgr.insertWithObjMod( ns, newObj, false, su );

        if ( logop ) {
            logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj );
        }

        return UpdateResult( false /* updated a non existing document */,
                             driver.dollarModMode() /* $mod or obj replacement? */,
                             1 /* count of updated documents */,
                             newObj /* object that was upserted */ );
    }