void insertObjects(const char *ns, const vector<BSONObj> &objs, bool keepGoing, uint64_t flags, bool logop ) { if (mongoutils::str::contains(ns, "system.")) { massert(16748, "need transaction to run insertObjects", cc().txnStackSize() > 0); uassert(10095, "attempt to insert in reserved database name 'system'", !mongoutils::str::startsWith(ns, "system.")); massert(16750, "attempted to insert multiple objects into a system namspace at once", objs.size() == 1); if (handle_system_collection_insert(ns, objs[0], logop) != 0) { return; } } NamespaceDetails *details = getAndMaybeCreateNS(ns, logop); NamespaceDetailsTransient *nsdt = &NamespaceDetailsTransient::get(ns); for (size_t i = 0; i < objs.size(); i++) { const BSONObj &obj = objs[i]; try { uassert( 10059 , "object to insert too large", obj.objsize() <= BSONObjMaxUserSize); BSONObjIterator i( obj ); while ( i.more() ) { BSONElement e = i.next(); uassert( 13511 , "document to insert can't have $ fields" , e.fieldName()[0] != '$' ); } uassert( 16440 , "_id cannot be an array", obj["_id"].type() != Array ); BSONObj objModified = obj; BSONElementManipulator::lookForTimestamps(objModified); if (details->isCapped() && logop) { // unfortunate hack we need for capped collections // we do this because the logic for generating the pk // and what subsequent rows to delete are buried in the // namespace details object. There is probably a nicer way // to do this, but this works. details->insertObjectIntoCappedAndLogOps(objModified, flags); if (nsdt != NULL) { nsdt->notifyOfWriteOp(); } } else { insertOneObject(details, nsdt, objModified, flags); // may add _id field if (logop) { OpLogHelpers::logInsert(ns, objModified, &cc().txn()); } } } catch (const UserException &) { if (!keepGoing || i == objs.size() - 1) { throw; } } } }
UpdateResult _updateObjects( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { DEBUGUPDATE( "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi ); Client& client = cc(); debug.updateobj = updateobj; // The idea with these here it to make them loop invariant for // multi updates, and thus be a bit faster for that case. The // pointers may be left invalid on a failed or terminal yield // recovery. NamespaceDetails* d = nsdetails(ns); // can be null if an upsert... NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get(ns); auto_ptr<ModSet> mods; bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; int modsIsIndexed = false; // really the # of indexes if ( isOperatorUpdate ) { mods.reset( new ModSet(updateobj, nsdt->indexKeys(), forReplication) ); modsIsIndexed = mods->maxNumIndexUpdated(); } if( planPolicy.permitOptimalIdPlan() && !multi && isSimpleIdQuery(patternOrig) && d && !modsIsIndexed ) { int idxNo = d->findIdIndex(); if( idxNo >= 0 ) { debug.idhack = true; UpdateResult result = _updateById( isOperatorUpdate, idxNo, mods.get(), d, nsdt, su, ns, updateobj, patternOrig, logop, debug, fromMigrate); if ( result.existing || ! upsert ) { return result; } else if ( upsert && ! isOperatorUpdate ) { // this handles repl inserts checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate, &no ); return UpdateResult( 0 , 0 , 1 , no ); } } } int numModded = 0; debug.nscanned = 0; shared_ptr<Cursor> c = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); d = nsdetails(ns); nsdt = &NamespaceDetailsTransient::get(ns); bool autoDedup = c->autoDedup(); if( c->ok() ) { set<DiskLoc> seenObjects; MatchDetails details; auto_ptr<ClientCursor> cc; do { if ( cc.get() == 0 && client.allowedToThrowPageFaultException() && ! c->currLoc().isNull() && ! c->currLoc().rec()->likelyInPhysicalMemory() ) { throw PageFaultException( c->currLoc().rec() ); } bool atomic = c->matcher() && c->matcher()->docMatcher().atomic(); if ( ! atomic && debug.nscanned > 0 ) { // we need to use a ClientCursor to yield if ( cc.get() == 0 ) { shared_ptr< Cursor > cPtr = c; cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); } bool didYield; if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) { cc.release(); break; } if ( !c->ok() ) { break; } if ( didYield ) { d = nsdetails(ns); if ( ! d ) break; nsdt = &NamespaceDetailsTransient::get(ns); if ( mods.get() ) { mods->setIndexedStatus( nsdt->indexKeys() ); modsIsIndexed = mods->maxNumIndexUpdated(); } } } // end yielding block debug.nscanned++; if ( mods.get() && mods->hasDynamicArray() ) { details.requestElemMatchKey(); } if ( !c->currentMatches( &details ) ) { c->advance(); continue; } Record* r = c->_current(); DiskLoc loc = c->currLoc(); if ( c->getsetdup( loc ) && autoDedup ) { c->advance(); continue; } BSONObj js = BSONObj::make(r); BSONObj pattern = patternOrig; if ( logop ) { BSONObjBuilder idPattern; BSONElement id; // NOTE: If the matching object lacks an id, we'll log // with the original pattern. This isn't replay-safe. // It might make sense to suppress the log instead // if there's no id. if ( js.getObjectID( id ) ) { idPattern.append( id ); pattern = idPattern.obj(); } else { uassert( 10157 , "multi-update requires all modified objects to have an _id" , ! multi ); } } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ if ( isOperatorUpdate ) { if ( multi ) { // go to next record in case this one moves c->advance(); // Update operations are deduped for cursors that implement their own // deduplication. In particular, some geo cursors are excluded. if ( autoDedup ) { if ( seenObjects.count( loc ) ) { continue; } // SERVER-5198 Advance past the document to be modified, provided // deduplication is enabled, but see SERVER-5725. while( c->ok() && loc == c->currLoc() ) { c->advance(); } } } const BSONObj& onDisk = loc.obj(); ModSet* useMods = mods.get(); auto_ptr<ModSet> mymodset; if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) { useMods = mods->fixDynamicArray( details.elemMatchKey() ); mymodset.reset( useMods ); } auto_ptr<ModSetState> mss = useMods->prepare( onDisk, false /* not an insertion */ ); bool willAdvanceCursor = multi && c->ok() && ( modsIsIndexed || ! mss->canApplyInPlace() ); if ( willAdvanceCursor ) { if ( cc.get() ) { cc->setDoingDeletes( true ); } c->prepareToTouchEarlierIterate(); } // If we've made it this far, "ns" must contain a valid collection name, and so // is of the form "db.collection". Therefore, the following expression must // always be valid. "system.users" updates must never be done in place, in // order to ensure that they are validated inside DataFileMgr::updateRecord(.). bool isSystemUsersMod = (NamespaceString(ns).coll == "system.users"); BSONObj newObj; if ( !mss->isUpdateIndexed() && mss->canApplyInPlace() && !isSystemUsersMod ) { mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) ); DEBUGUPDATE( "\t\t\t doing in place update" ); if ( !multi ) debug.fastmod = true; if ( modsIsIndexed ) { seenObjects.insert( loc ); } newObj = loc.obj(); d->paddingFits(); } else { newObj = mss->createNewFromMods(); checkTooLarge(newObj); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); if ( newLoc != loc || modsIsIndexed ){ // log() << "Moved obj " << newLoc.obj()["_id"] << " from " << loc << " to " << newLoc << endl; // object moved, need to make sure we don' get again seenObjects.insert( newLoc ); } } if ( logop ) { DEV verify( mods->size() ); BSONObj logObj = mss->getOpLogRewrite(); DEBUGUPDATE( "\t rewrite update: " << logObj ); // It is possible that the entire mod set was a no-op over this // document. We would have an empty log record in that case. If we // call logOp, with an empty record, that would be replicated as "clear // this record", which is not what we want. Therefore, to get a no-op // in the replica, we simply don't log. if ( logObj.nFields() ) { logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj ); } } numModded++; if ( ! multi ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( willAdvanceCursor ) c->recoverFromTouchingEarlierIterate(); getDur().commitIfNeeded(); continue; } uassert( 10158 , "multi update only works with $ operators" , ! multi ); BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, su); if ( logop ) { DEV wassert( !su ); // super used doesn't get logged, this would be bad. logOp("u", ns, updateobj, &pattern, 0, fromMigrate, &updateobj ); } return UpdateResult( 1 , 0 , 1 , BSONObj() ); } while ( c->ok() ); } // endif if ( numModded ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( upsert ) { if ( updateobj.firstElementFieldName()[0] == '$' ) { // upsert of an $operation. build a default object BSONObj newObj = mods->createNewFromQuery( patternOrig ); checkNoMods( newObj ); debug.fastmodinsert = true; theDataFileMgr.insertWithObjMod(ns, newObj, false, su); if ( logop ) logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj ); return UpdateResult( 0 , 1 , 1 , newObj ); } uassert( 10159 , "multi update only works with $ operators" , ! multi ); checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate, &no ); return UpdateResult( 0 , 0 , 1 , no ); } return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() ); }
UpdateResult _updateObjectsNEW( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { // TODO // + Separate UpdateParser from UpdateRunner (the latter should be "stage-y") // + All the yield and deduplicate logic would move to the query stage // portion of it // // + Replication related // + fast path for update for query by _id // + support for relaxing viable path constraint in replication // // + Field Management // + Force all upsert to contain _id // + Prevent changes to immutable fields (_id, and those mentioned by sharding) // // + Yiedling related // + $atomic support (or better, support proper yielding if not) // + page fault support debug.updateobj = updateobj; NamespaceDetails* d = nsdetails( ns ); NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get( ns ); UpdateDriver::Options opts; opts.multi = multi; opts.upsert = upsert; opts.logOp = logop; UpdateDriver driver( opts ); Status status = driver.parse( nsdt->indexKeys(), updateobj ); if ( !status.isOK() ) { uasserted( 16840, status.reason() ); } shared_ptr<Cursor> cursor = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); // The 'cursor' the optimizer gave us may contain query plans that generate duplicate // diskloc's. We set up here the mechanims that will prevent us from processing those // twice if we see them. We also set up a 'ClientCursor' so that we can support // yielding. const bool dedupHere = cursor->autoDedup(); shared_ptr<Cursor> cPtr = cursor; auto_ptr<ClientCursor> clientCursor( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns ) ); // // Upsert Logic // // We may or may not have documents for this update. If we don't, then try to upsert, // if allowed. if ( !cursor->ok() && upsert ) { // If this is a $mod base update, we need to generate a document by examining the // query and the mods. Otherwise, we can use the object replacement sent by the user // update command that was parsed by the driver before. BSONObj oldObj; if ( *updateobj.firstElementFieldName() == '$' ) { if ( !driver.createFromQuery( patternOrig, &oldObj ) ) { uasserted( 16835, "cannot create object to update" ); } debug.fastmodinsert = true; } else { debug.upsert = true; } // Since this is an upsert, we will be oplogging it as an insert. We don't // need the driver's help to build the oplog record, then. We also set the // context of the update driver to an "upsert". Some mods may only work in that // context (e.g. $setOnInsert). driver.setLogOp( false ); driver.setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT ); mutablebson::Document doc( oldObj, mutablebson::Document::kInPlaceDisabled ); status = driver.update( StringData(), &doc, NULL /* no oplog record */); if ( !status.isOK() ) { uasserted( 16836, status.reason() ); } BSONObj newObj = doc.getObject(); theDataFileMgr.insertWithObjMod( ns, newObj, false, su ); if ( logop ) { logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj ); } return UpdateResult( false /* updated a non existing document */, driver.dollarModMode() /* $mod or obj replacement? */, 1 /* count of updated documents */, newObj /* object that was upserted */ ); } // // We have one or more documents for this update. // // We record that this will not be an upsert, in case a mod doesn't want to be applied // when in strict update mode. driver.setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT ); // Let's fetch each of them and pipe them through the update expression, making sure to // keep track of the necessary stats. Recall that we'll be pulling documents out of // cursors and some of them do not deduplicate the entries they generate. We have // deduping logic in here, too -- for now. unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs; int numUpdated = 0; debug.nscanned = 0; while ( cursor->ok() ) { // Let's fetch the next candidate object for this update. Record* r = cursor->_current(); DiskLoc loc = cursor->currLoc(); const BSONObj oldObj = loc.obj(); // We count how many documents we scanned even though we may skip those that are // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for // that reason. debug.nscanned++; // Skips this document if it: // a) doesn't match the query portion of the update // b) was deemed duplicate by the underlying cursor machinery // // Now, if we are going to update the document, // c) we don't want to do so while the cursor is at it, as that may invalidate // the cursor. So, we advance to next document, before issuing the update. MatchDetails matchDetails; matchDetails.requestElemMatchKey(); if ( !cursor->currentMatches( &matchDetails ) ) { // a) cursor->advance(); continue; } else if ( cursor->getsetdup( loc ) && dedupHere ) { // b) cursor->advance(); continue; } else if (driver.dollarModMode() && multi) { // c) cursor->advance(); if ( dedupHere ) { if ( seenLocs.count( loc ) ) { continue; } } // There are certain kind of cursors that hold multiple pointers to data // underneath. $or cursors is one example. In a $or cursor, it may be the case // that when we did the last advance(), we finished consuming documents from // one of $or child and started consuming the next one. In that case, it is // possible that the last document of the previous child is the same as the // first document of the next (see SERVER-5198 and jstests/orp.js). // // So we advance the cursor here until we see a new diskloc. // // Note that we won't be yielding, and we may not do so for a while if we find // a particularly duplicated sequence of loc's. That is highly unlikely, // though. (See SERVER-5725, if curious, but "stage" based $or will make that // ticket moot). while( cursor->ok() && loc == cursor->currLoc() ) { cursor->advance(); } } // For some (unfortunate) historical reasons, not all cursors would be valid after // a write simply because we advanced them to a document not affected by the write. // To protect in those cases, not only we engaged in the advance() logic above, but // we also tell the cursor we're about to write a document that we've just seen. // prepareToTouchEarlierIterate() requires calling later // recoverFromTouchingEarlierIterate(), so we make a note here to do so. bool touchPreviousDoc = multi && cursor->ok(); if ( touchPreviousDoc ) { clientCursor->setDoingDeletes( true ); cursor->prepareToTouchEarlierIterate(); } // Ask the driver to apply the mods. It may be that the driver can apply those "in // place", that is, some values of the old document just get adjusted without any // change to the binary layout on the bson layer. It may be that a whole new // document is needed to accomodate the new bson layout of the resulting document. mutablebson::Document doc( oldObj, mutablebson::Document::kInPlaceEnabled ); BSONObj logObj; StringData matchedField = matchDetails.hasElemMatchKey() ? matchDetails.elemMatchKey(): StringData(); status = driver.update( matchedField, &doc, &logObj ); if ( !status.isOK() ) { uasserted( 16837, status.reason() ); } // If the driver applied the mods in place, we can ask the mutable for what // changed. We call those changes "damages". :) We use the damages to inform the // journal what was changed, and then apply them to the original document // ourselves. If, however, the driver applied the mods out of place, we ask it to // generate a new, modified document for us. In that case, the file manager will // take care of the journaling details for us. // // This code flow is admittedly odd. But, right now, journaling is baked in the file // manager. And if we aren't using the file manager, we have to do jounaling // ourselves. BSONObj newObj; const char* source = NULL; mutablebson::DamageVector damages; bool inPlace = doc.getInPlaceUpdates(&damages, &source); if ( inPlace && !driver.modsAffectIndices() ) { // All updates were in place. Apply them via durability and writing pointer. mutablebson::DamageVector::const_iterator where = damages.begin(); const mutablebson::DamageVector::const_iterator end = damages.end(); for( ; where != end; ++where ) { const char* sourcePtr = source + where->sourceOffset; void* targetPtr = getDur().writingPtr( const_cast<char*>(oldObj.objdata()) + where->targetOffset, where->size); std::memcpy(targetPtr, sourcePtr, where->size); } newObj = oldObj; debug.fastmod = true; } else { // The updates were not in place. Apply them through the file manager. newObj = doc.getObject(); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); // If we've moved this object to a new location, make sure we don't apply // that update again if our traversal picks the objecta again. // // We also take note that the diskloc if the updates are affecting indices. // Chances are that we're traversing one of them and they may be multi key and // therefore duplicate disklocs. if ( newLoc != loc || driver.modsAffectIndices() ) { seenLocs.insert( newLoc ); } } // Log Obj if ( logop ) { if ( !logObj.isEmpty() ) { BSONObj pattern = patternOrig; logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj ); } } // One more document updated. numUpdated++; if (!multi) { break; } // If we used the cursor mechanism that prepares an earlier seen document for a // write we need to tell such mechanisms that the write is over. if ( touchPreviousDoc ) { cursor->recoverFromTouchingEarlierIterate(); } getDur().commitIfNeeded(); } return UpdateResult( true /* updated existing object(s) */, driver.dollarModMode() /* $mod or obj replacement */, numUpdated /* # of docments update */, BSONObj() ); }
UpdateResult update(const UpdateRequest& request, OpDebug* opDebug, UpdateDriver* driver) { LOG(3) << "processing update : " << request; const NamespaceString& nsString = request.getNamespaceString(); validateUpdate( nsString.ns().c_str(), request.getUpdates(), request.getQuery() ); NamespaceDetails* nsDetails = nsdetails( nsString.ns() ); NamespaceDetailsTransient* nsDetailsTransient = &NamespaceDetailsTransient::get( nsString.ns().c_str() ); // TODO: This seems a bit circuitious. opDebug->updateobj = request.getUpdates(); driver->refreshIndexKeys( nsDetailsTransient->indexKeys() ); shared_ptr<Cursor> cursor = getOptimizedCursor( nsString.ns(), request.getQuery(), BSONObj(), request.getQueryPlanSelectionPolicy() ); // If the update was marked with '$isolated' (a.k.a '$atomic'), we are not allowed to // yield while evaluating the update loop below. // // TODO: Old code checks this repeatedly within the update loop. Is that necessary? It seems // that once atomic should be always atomic. const bool isolated = cursor->ok() && cursor->matcher() && cursor->matcher()->docMatcher().atomic(); // The 'cursor' the optimizer gave us may contain query plans that generate duplicate // diskloc's. We set up here the mechanims that will prevent us from processing those // twice if we see them. We also set up a 'ClientCursor' so that we can support // yielding. // // TODO: Is it valid to call this on a non-ok cursor? const bool dedupHere = cursor->autoDedup(); // // We'll start assuming we have one or more documents for this update. (Othwerwise, // we'll fallback to upserting.) // // We record that this will not be an upsert, in case a mod doesn't want to be applied // when in strict update mode. driver->setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT ); // Let's fetch each of them and pipe them through the update expression, making sure to // keep track of the necessary stats. Recall that we'll be pulling documents out of // cursors and some of them do not deduplicate the entries they generate. We have // deduping logic in here, too -- for now. unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs; int numMatched = 0; // Reset these counters on each call. We might re-enter this function to retry this // update if we throw a page fault exception below, and we rely on these counters // reflecting only the actions taken locally. In particlar, we must have the no-op // counter reset so that we can meaningfully comapre it with numMatched above. opDebug->nscanned = 0; opDebug->nupdateNoops = 0; Client& client = cc(); mutablebson::Document doc; mutablebson::DamageVector damages; // If we are going to be yielding, we will need a ClientCursor scoped to this loop. We // only loop as long as the underlying cursor is OK. for ( auto_ptr<ClientCursor> clientCursor; cursor->ok(); ) { // If we haven't constructed a ClientCursor, and if the client allows us to throw // page faults, and if we are referring to a location that is likely not in // physical memory, then throw a PageFaultException. The entire operation will be // restarted. if ( clientCursor.get() == NULL && client.allowedToThrowPageFaultException() && !cursor->currLoc().isNull() && !cursor->currLoc().rec()->likelyInPhysicalMemory() ) { // We should never throw a PFE if we have already updated items. The numMatched // variable includes no-ops, which do not prevent us from raising a PFE, so if // numMatched is non-zero, we are still OK to throw as long all matched items // resulted in a no-op. dassert((numMatched == 0) || (numMatched == opDebug->nupdateNoops)); throw PageFaultException( cursor->currLoc().rec() ); } if ( !isolated && opDebug->nscanned != 0 ) { // We are permitted to yield. To do so we need a ClientCursor, so create one // now if we have not yet done so. if ( !clientCursor.get() ) clientCursor.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, nsString.ns() ) ); // Ask the client cursor to yield. We get two bits of state back: whether or not // we yielded, and whether or not we correctly recovered from yielding. bool yielded = false; const bool recovered = clientCursor->yieldSometimes( ClientCursor::WillNeed, &yielded ); if ( !recovered ) { // If we failed to recover from the yield, then the ClientCursor is already // gone. Release it so we don't destroy it a second time. clientCursor.release(); break; } if ( !cursor->ok() ) { // If the cursor died while we were yielded, just get out of the update loop. break; } if ( yielded ) { // We yielded and recovered OK, and our cursor is still good. Details about // our namespace may have changed while we were yielded, so we re-acquire // them here. If we can't do so, escape the update loop. Otherwise, refresh // the driver so that it knows about what is currently indexed. nsDetails = nsdetails( nsString.ns() ); if ( !nsDetails ) break; nsDetailsTransient = &NamespaceDetailsTransient::get( nsString.ns().c_str() ); // TODO: This copies the index keys, but it may not need to do so. driver->refreshIndexKeys( nsDetailsTransient->indexKeys() ); } } // Let's fetch the next candidate object for this update. Record* record = cursor->_current(); DiskLoc loc = cursor->currLoc(); const BSONObj oldObj = loc.obj(); // We count how many documents we scanned even though we may skip those that are // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for // that reason. opDebug->nscanned++; // Skips this document if it: // a) doesn't match the query portion of the update // b) was deemed duplicate by the underlying cursor machinery // // Now, if we are going to update the document, // c) we don't want to do so while the cursor is at it, as that may invalidate // the cursor. So, we advance to next document, before issuing the update. MatchDetails matchDetails; matchDetails.requestElemMatchKey(); if ( !cursor->currentMatches( &matchDetails ) ) { // a) cursor->advance(); continue; } else if ( cursor->getsetdup( loc ) && dedupHere ) { // b) cursor->advance(); continue; } else if (!driver->isDocReplacement() && request.isMulti()) { // c) cursor->advance(); if ( dedupHere ) { if ( seenLocs.count( loc ) ) { continue; } } // There are certain kind of cursors that hold multiple pointers to data // underneath. $or cursors is one example. In a $or cursor, it may be the case // that when we did the last advance(), we finished consuming documents from // one of $or child and started consuming the next one. In that case, it is // possible that the last document of the previous child is the same as the // first document of the next (see SERVER-5198 and jstests/orp.js). // // So we advance the cursor here until we see a new diskloc. // // Note that we won't be yielding, and we may not do so for a while if we find // a particularly duplicated sequence of loc's. That is highly unlikely, // though. (See SERVER-5725, if curious, but "stage" based $or will make that // ticket moot). while( cursor->ok() && loc == cursor->currLoc() ) { cursor->advance(); } } // For some (unfortunate) historical reasons, not all cursors would be valid after // a write simply because we advanced them to a document not affected by the write. // To protect in those cases, not only we engaged in the advance() logic above, but // we also tell the cursor we're about to write a document that we've just seen. // prepareToTouchEarlierIterate() requires calling later // recoverFromTouchingEarlierIterate(), so we make a note here to do so. bool touchPreviousDoc = request.isMulti() && cursor->ok(); if ( touchPreviousDoc ) { if ( clientCursor.get() ) clientCursor->setDoingDeletes( true ); cursor->prepareToTouchEarlierIterate(); } // Found a matching document numMatched++; // Ask the driver to apply the mods. It may be that the driver can apply those "in // place", that is, some values of the old document just get adjusted without any // change to the binary layout on the bson layer. It may be that a whole new // document is needed to accomodate the new bson layout of the resulting document. doc.reset( oldObj, mutablebson::Document::kInPlaceEnabled ); BSONObj logObj; // If there was a matched field, obtain it. string matchedField; if (matchDetails.hasElemMatchKey()) matchedField = matchDetails.elemMatchKey(); Status status = driver->update( matchedField, &doc, &logObj ); if ( !status.isOK() ) { uasserted( 16837, status.reason() ); } // If the driver applied the mods in place, we can ask the mutable for what // changed. We call those changes "damages". :) We use the damages to inform the // journal what was changed, and then apply them to the original document // ourselves. If, however, the driver applied the mods out of place, we ask it to // generate a new, modified document for us. In that case, the file manager will // take care of the journaling details for us. // // This code flow is admittedly odd. But, right now, journaling is baked in the file // manager. And if we aren't using the file manager, we have to do jounaling // ourselves. bool objectWasChanged = false; BSONObj newObj; const char* source = NULL; bool inPlace = doc.getInPlaceUpdates(&damages, &source); if ( inPlace && !driver->modsAffectIndices() ) { // If a set of modifiers were all no-ops, we are still 'in place', but there is // no work to do, in which case we want to consider the object unchanged. if (!damages.empty() ) { nsDetails->paddingFits(); // All updates were in place. Apply them via durability and writing pointer. mutablebson::DamageVector::const_iterator where = damages.begin(); const mutablebson::DamageVector::const_iterator end = damages.end(); for( ; where != end; ++where ) { const char* sourcePtr = source + where->sourceOffset; void* targetPtr = getDur().writingPtr( const_cast<char*>(oldObj.objdata()) + where->targetOffset, where->size); std::memcpy(targetPtr, sourcePtr, where->size); } objectWasChanged = true; opDebug->fastmod = true; } newObj = oldObj; } else { // The updates were not in place. Apply them through the file manager. newObj = doc.getObject(); DiskLoc newLoc = theDataFileMgr.updateRecord(nsString.ns().c_str(), nsDetails, nsDetailsTransient, record, loc, newObj.objdata(), newObj.objsize(), *opDebug); // If we've moved this object to a new location, make sure we don't apply // that update again if our traversal picks the objecta again. // // We also take note that the diskloc if the updates are affecting indices. // Chances are that we're traversing one of them and they may be multi key and // therefore duplicate disklocs. if ( newLoc != loc || driver->modsAffectIndices() ) { seenLocs.insert( newLoc ); } objectWasChanged = true; } // Log Obj if ( request.shouldUpdateOpLog() ) { if ( driver->isDocReplacement() || !logObj.isEmpty() ) { BSONObj idQuery = driver->makeOplogEntryQuery(newObj, request.isMulti()); logOp("u", nsString.ns().c_str(), logObj , &idQuery, NULL, request.isFromMigration(), &newObj); } } // If it was noop since the document didn't change, record that. if (!objectWasChanged) opDebug->nupdateNoops++; if (!request.isMulti()) { break; } // If we used the cursor mechanism that prepares an earlier seen document for a // write we need to tell such mechanisms that the write is over. if ( touchPreviousDoc ) { cursor->recoverFromTouchingEarlierIterate(); } getDur().commitIfNeeded(); } // TODO: Can this be simplified? if ((numMatched > 0) || (numMatched == 0 && !request.isUpsert()) ) { opDebug->nupdated = numMatched; return UpdateResult( numMatched > 0 /* updated existing object(s) */, !driver->isDocReplacement() /* $mod or obj replacement */, numMatched /* # of docments update, even no-ops */, BSONObj() ); } // // We haven't found any existing document so an insert is done // (upsert is true). // opDebug->upsert = true; // Since this is an insert (no docs found and upsert:true), we will be logging it // as an insert in the oplog. We don't need the driver's help to build the // oplog record, then. We also set the context of the update driver to the INSERT_CONTEXT. // Some mods may only work in that context (e.g. $setOnInsert). driver->setLogOp( false ); driver->setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT ); BSONObj baseObj; // Reset the document we will be writing to doc.reset( baseObj, mutablebson::Document::kInPlaceDisabled ); if ( request.getQuery().hasElement("_id") ) { uassertStatusOK(doc.root().appendElement(request.getQuery().getField("_id"))); } // If this is a $mod base update, we need to generate a document by examining the // query and the mods. Otherwise, we can use the object replacement sent by the user // update command that was parsed by the driver before. // In the following block we handle the query part, and then do the regular mods after. if ( *request.getUpdates().firstElementFieldName() == '$' ) { uassertStatusOK(UpdateDriver::createFromQuery(request.getQuery(), doc)); opDebug->fastmodinsert = true; } // Apply the update modifications and then log the update as an insert manually. Status status = driver->update( StringData(), &doc, NULL /* no oplog record */); if ( !status.isOK() ) { uasserted( 16836, status.reason() ); } BSONObj newObj = doc.getObject(); theDataFileMgr.insertWithObjMod( nsString.ns().c_str(), newObj, false, request.isGod() ); if ( request.shouldUpdateOpLog() ) { logOp( "i", nsString.ns().c_str(), newObj, NULL, NULL, request.isFromMigration(), &newObj ); } opDebug->nupdated = 1; return UpdateResult( false /* updated a non existing document */, !driver->isDocReplacement() /* $mod or obj replacement? */, 1 /* count of updated documents */, newObj /* object that was upserted */ ); }
UpdateResult _updateObjectsNEW( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { // TODO // + Separate UpdateParser from UpdateRunner (the latter should be "stage-y") // + All the yield and deduplicate logic would move to the query stage // portion of it // // + Replication related // + fast path for update for query by _id // + support for relaxing viable path constraint in replication // // + Field Management // + Force all upsert to contain _id // + Prevent changes to immutable fields (_id, and those mentioned by sharding) // // + Yiedling related // + $atomic support (or better, support proper yielding if not) // + page fault support debug.updateobj = updateobj; NamespaceDetails* d = nsdetails( ns ); NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get( ns ); // TODO: Put this logic someplace central and check based on constants (maybe using the // list of actually excluded config collections, and not global for the config db). NamespaceString nsStr( ns ); // Should the modifiers validdate their embedded docs via okForStorage bool shouldValidate = true; // Config db docs shouldn't get checked for valid field names since the shard key can have // a dot (".") in it. Therefore we disable validation for storage. if ( nsStr.db() == "config" ) { LOG(0) << "disabling okForStorage on config db"; shouldValidate = false; } UpdateDriver::Options opts; opts.multi = multi; opts.upsert = upsert; opts.logOp = logop; opts.modOptions = ModifierInterface::Options( forReplication, shouldValidate ); UpdateDriver driver( opts ); // TODO: This copies the index keys, but we may not actually need to. Status status = driver.parse( nsdt->indexKeys(), updateobj ); if ( !status.isOK() ) { uasserted( 16840, status.reason() ); } shared_ptr<Cursor> cursor = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); // If the update was marked with '$isolated' (a.k.a '$atomic'), we are not allowed to // yield while evaluating the update loop below. // // TODO: Old code checks this repeatedly within the update loop. Is that necessary? It seems // that once atomic should be always atomic. const bool canYield = cursor->ok() && cursor->matcher() && cursor->matcher()->docMatcher().atomic(); // The 'cursor' the optimizer gave us may contain query plans that generate duplicate // diskloc's. We set up here the mechanims that will prevent us from processing those // twice if we see them. We also set up a 'ClientCursor' so that we can support // yielding. // // TODO: Is it valid to call this on a non-ok cursor? const bool dedupHere = cursor->autoDedup(); // // We'll start assuming we have one or more documents for this update. (Othwerwise, // we'll fallback to upserting.) // // We record that this will not be an upsert, in case a mod doesn't want to be applied // when in strict update mode. driver.setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT ); // Let's fetch each of them and pipe them through the update expression, making sure to // keep track of the necessary stats. Recall that we'll be pulling documents out of // cursors and some of them do not deduplicate the entries they generate. We have // deduping logic in here, too -- for now. unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs; int numUpdated = 0; debug.nscanned = 0; Client& client = cc(); mutablebson::Document doc; // If we are going to be yielding, we will need a ClientCursor scoped to this loop. We // only loop as long as the underlying cursor is OK. for ( auto_ptr<ClientCursor> clientCursor; cursor->ok(); ) { // If we haven't constructed a ClientCursor, and if the client allows us to throw // page faults, and if we are referring to a location that is likely not in // physical memory, then throw a PageFaultException. The entire operation will be // restarted. if ( clientCursor.get() == NULL && client.allowedToThrowPageFaultException() && !cursor->currLoc().isNull() && !cursor->currLoc().rec()->likelyInPhysicalMemory() ) { // We should never throw a PFE if we have already updated items. dassert(numUpdated == 0); throw PageFaultException( cursor->currLoc().rec() ); } if ( !canYield && debug.nscanned != 0 ) { // We are permitted to yield. To do so we need a ClientCursor, so create one // now if we have not yet done so. if ( !clientCursor.get() ) clientCursor.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) ); // Ask the client cursor to yield. We get two bits of state back: whether or not // we yielded, and whether or not we correctly recovered from yielding. bool yielded = false; const bool recovered = clientCursor->yieldSometimes( ClientCursor::WillNeed, &yielded ); // If we couldn't recover from the yield, or if the cursor died while we were // yielded, get out of the update loop right away. We don't need to reset // 'clientCursor' since we are leaving the scope. if ( !recovered || !cursor->ok() ) break; if ( yielded ) { // Details about our namespace may have changed while we were yielded, so // we re-acquire them here. If we can't do so, escape the update // loop. Otherwise, refresh the driver so that it knows about what is // currently indexed. d = nsdetails( ns ); if ( !d ) break; nsdt = &NamespaceDetailsTransient::get( ns ); // TODO: This copies the index keys, but it may not need to do so. driver.refreshIndexKeys( nsdt->indexKeys() ); } } // Let's fetch the next candidate object for this update. Record* r = cursor->_current(); DiskLoc loc = cursor->currLoc(); const BSONObj oldObj = loc.obj(); // We count how many documents we scanned even though we may skip those that are // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for // that reason. debug.nscanned++; // Skips this document if it: // a) doesn't match the query portion of the update // b) was deemed duplicate by the underlying cursor machinery // // Now, if we are going to update the document, // c) we don't want to do so while the cursor is at it, as that may invalidate // the cursor. So, we advance to next document, before issuing the update. MatchDetails matchDetails; matchDetails.requestElemMatchKey(); if ( !cursor->currentMatches( &matchDetails ) ) { // a) cursor->advance(); continue; } else if ( cursor->getsetdup( loc ) && dedupHere ) { // b) cursor->advance(); continue; } else if (driver.dollarModMode() && multi) { // c) cursor->advance(); if ( dedupHere ) { if ( seenLocs.count( loc ) ) { continue; } } // There are certain kind of cursors that hold multiple pointers to data // underneath. $or cursors is one example. In a $or cursor, it may be the case // that when we did the last advance(), we finished consuming documents from // one of $or child and started consuming the next one. In that case, it is // possible that the last document of the previous child is the same as the // first document of the next (see SERVER-5198 and jstests/orp.js). // // So we advance the cursor here until we see a new diskloc. // // Note that we won't be yielding, and we may not do so for a while if we find // a particularly duplicated sequence of loc's. That is highly unlikely, // though. (See SERVER-5725, if curious, but "stage" based $or will make that // ticket moot). while( cursor->ok() && loc == cursor->currLoc() ) { cursor->advance(); } } // For some (unfortunate) historical reasons, not all cursors would be valid after // a write simply because we advanced them to a document not affected by the write. // To protect in those cases, not only we engaged in the advance() logic above, but // we also tell the cursor we're about to write a document that we've just seen. // prepareToTouchEarlierIterate() requires calling later // recoverFromTouchingEarlierIterate(), so we make a note here to do so. bool touchPreviousDoc = multi && cursor->ok(); if ( touchPreviousDoc ) { if ( clientCursor.get() ) clientCursor->setDoingDeletes( true ); cursor->prepareToTouchEarlierIterate(); } // Ask the driver to apply the mods. It may be that the driver can apply those "in // place", that is, some values of the old document just get adjusted without any // change to the binary layout on the bson layer. It may be that a whole new // document is needed to accomodate the new bson layout of the resulting document. doc.reset( oldObj, mutablebson::Document::kInPlaceEnabled ); BSONObj logObj; StringData matchedField = matchDetails.hasElemMatchKey() ? matchDetails.elemMatchKey(): StringData(); status = driver.update( matchedField, &doc, &logObj ); if ( !status.isOK() ) { uasserted( 16837, status.reason() ); } // If the driver applied the mods in place, we can ask the mutable for what // changed. We call those changes "damages". :) We use the damages to inform the // journal what was changed, and then apply them to the original document // ourselves. If, however, the driver applied the mods out of place, we ask it to // generate a new, modified document for us. In that case, the file manager will // take care of the journaling details for us. // // This code flow is admittedly odd. But, right now, journaling is baked in the file // manager. And if we aren't using the file manager, we have to do jounaling // ourselves. bool objectWasChanged = false; BSONObj newObj; const char* source = NULL; mutablebson::DamageVector damages; bool inPlace = doc.getInPlaceUpdates(&damages, &source); if ( inPlace && !damages.empty() && !driver.modsAffectIndices() ) { d->paddingFits(); // All updates were in place. Apply them via durability and writing pointer. mutablebson::DamageVector::const_iterator where = damages.begin(); const mutablebson::DamageVector::const_iterator end = damages.end(); for( ; where != end; ++where ) { const char* sourcePtr = source + where->sourceOffset; void* targetPtr = getDur().writingPtr( const_cast<char*>(oldObj.objdata()) + where->targetOffset, where->size); std::memcpy(targetPtr, sourcePtr, where->size); } newObj = oldObj; debug.fastmod = true; objectWasChanged = true; } else { // The updates were not in place. Apply them through the file manager. newObj = doc.getObject(); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); // If we've moved this object to a new location, make sure we don't apply // that update again if our traversal picks the objecta again. // // We also take note that the diskloc if the updates are affecting indices. // Chances are that we're traversing one of them and they may be multi key and // therefore duplicate disklocs. if ( newLoc != loc || driver.modsAffectIndices() ) { seenLocs.insert( newLoc ); } objectWasChanged = true; } // Log Obj if ( logop ) { if ( !logObj.isEmpty() ) { BSONObj idQuery = driver.makeOplogEntryQuery(newObj, multi); logOp("u", ns, logObj , &idQuery, 0, fromMigrate, &newObj); } } // If we applied any in-place updates, or asked the DataFileMgr to write for us, // then count this as an update. if (objectWasChanged) numUpdated++; if (!multi) { break; } // If we used the cursor mechanism that prepares an earlier seen document for a // write we need to tell such mechanisms that the write is over. if ( touchPreviousDoc ) { cursor->recoverFromTouchingEarlierIterate(); } getDur().commitIfNeeded(); } if (numUpdated > 0) { return UpdateResult( true /* updated existing object(s) */, driver.dollarModMode() /* $mod or obj replacement */, numUpdated /* # of docments update */, BSONObj() ); } else if (numUpdated == 0 && !upsert) { return UpdateResult( false /* no object updated */, driver.dollarModMode() /* $mod or obj replacement */, 0 /* no updates */, BSONObj() ); } // // We haven't succeeded updating any existing document but upserts are allowed. // // If this is a $mod base update, we need to generate a document by examining the // query and the mods. Otherwise, we can use the object replacement sent by the user // update command that was parsed by the driver before. BSONObj oldObj; if ( *updateobj.firstElementFieldName() == '$' ) { if ( !driver.createFromQuery( patternOrig, &oldObj ) ) { uasserted( 16835, "cannot create object to update" ); } debug.fastmodinsert = true; } else { // Copy the _id if (patternOrig.hasElement("_id")) { oldObj = patternOrig.getField("_id").wrap(); } debug.upsert = true; } // Since this is an upsert, we will be oplogging it as an insert. We don't // need the driver's help to build the oplog record, then. We also set the // context of the update driver to an "upsert". Some mods may only work in that // context (e.g. $setOnInsert). driver.setLogOp( false ); driver.setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT ); doc.reset( oldObj, mutablebson::Document::kInPlaceDisabled ); status = driver.update( StringData(), &doc, NULL /* no oplog record */); if ( !status.isOK() ) { uasserted( 16836, status.reason() ); } BSONObj newObj = doc.getObject(); theDataFileMgr.insertWithObjMod( ns, newObj, false, su ); if ( logop ) { logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj ); } return UpdateResult( false /* updated a non existing document */, driver.dollarModMode() /* $mod or obj replacement? */, 1 /* count of updated documents */, newObj /* object that was upserted */ ); }