bool CoveredIndexMatcher::matches( const BSONObj& key, const DiskLoc& recLoc, MatchDetails* details, bool keyUsable ) const { LOG(5) << "CoveredIndexMatcher::matches() " << key.toString() << ' ' << recLoc.toString() << ' ' << keyUsable << endl; dassert( key.isValid() ); if ( details ) details->resetOutput(); if ( keyUsable ) { if ( !_keyMatcher.matches(key, details ) ) { return false; } bool needRecordForDetails = details && details->needRecord(); if ( !_needRecord && !needRecordForDetails ) { return true; } } if ( details ) details->setLoadedRecord( true ); BSONObj obj = recLoc.obj(); bool res = _docMatcher->matches( obj, details ) && !isOrClauseDup( obj ); LOG(5) << "CoveredIndexMatcher _docMatcher->matches() returns " << res << endl; return res; }
bool Helpers::findById(Client& c, const char *ns, BSONObj query, BSONObj& result , bool * nsFound , bool * indexFound ) { Lock::assertAtLeastReadLocked(ns); Database *database = c.database(); verify( database ); NamespaceDetails *d = database->namespaceIndex.details(ns); if ( ! d ) return false; if ( nsFound ) *nsFound = 1; int idxNo = d->findIdIndex(); if ( idxNo < 0 ) return false; if ( indexFound ) *indexFound = 1; IndexDetails& i = d->idx( idxNo ); BSONObj key = i.getKeyFromQuery( query ); DiskLoc loc = QueryRunner::fastFindSingle(i, key); if ( loc.isNull() ) return false; result = loc.obj(); return true; }
DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc ){ LogIndentLevel lil; if ( eLoc.getOfs() <= 0 ){ error() << "invalid extent ofs: " << eLoc.getOfs() << endl; return DiskLoc(); } MongoDataFile * mdf = db->getFile( eLoc.a() ); Extent * e = mdf->debug_getExtent( eLoc ); if ( ! e->isOk() ){ warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl; } log() << "length:" << e->length << endl; LogIndentLevel lil2; DiskLoc loc = forward ? e->firstRecord : e->lastRecord; while ( ! loc.isNull() ){ if ( loc.getOfs() <= 0 ){ error() << "offset is 0 for record which should be impossible" << endl; break; } log() << loc << endl; Record* rec = loc.rec(); log() << loc.obj() << endl; loc = forward ? rec->getNext( loc ) : rec->getPrev( loc ); } return forward ? e->xnext : e->xprev; }
// Consider the point in loc, and keep it if it's within _maxDistance (and we have space for // it) void consider(const DiskLoc& loc) { if (limitReached()) return; Point p(loc.obj().getFieldDotted(_geoField)); if (distance(_near, p) > _maxDistance) return; _locs.push_back(loc); }
/* fetch a single object from collection ns that matches query set your db SavedContext first */ bool Helpers::findOne(Collection* collection, const BSONObj &query, BSONObj& result, bool requireIndex) { DiskLoc loc = findOne( collection, query, requireIndex ); if ( loc.isNull() ) return false; result = loc.obj(); return true; }
PlanStage::StageState TextStage::returnResults(WorkingSetID* out) { if (_scoreIterator == _scores.end()) { _internalState = DONE; return PlanStage::IS_EOF; } // Filter for phrases and negative terms, score and truncate. DiskLoc loc = _scoreIterator->first; double score = _scoreIterator->second; _scoreIterator++; // Ignore non-matched documents. if (score < 0) { return PlanStage::NEED_TIME; } // Filter for phrases and negated terms if (_params.query.hasNonTermPieces()) { if (!_ftsMatcher.matchesNonTerm(loc.obj())) { return PlanStage::NEED_TIME; } } *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->loc = loc; member->obj = member->loc.obj(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; member->addComputed(new TextScoreComputedData(score)); return PlanStage::ADVANCED; }
/* fetch a single object from collection ns that matches query set your db SavedContext first */ bool Helpers::findOne(const StringData& ns, const BSONObj &query, BSONObj& result, bool requireIndex) { DiskLoc loc = findOne( ns, query, requireIndex ); if ( loc.isNull() ) return false; result = loc.obj(); return true; }
long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback, bool fromMigrate ) { BSONObj keya , keyb; BSONObj minClean = toKeyFormat( min , keya ); BSONObj maxClean = toKeyFormat( max , keyb ); verify( keya == keyb ); Client::Context ctx(ns); shared_ptr<Cursor> c; auto_ptr<ClientCursor> cc; { NamespaceDetails* nsd = nsdetails( ns.c_str() ); if ( ! nsd ) return 0; int ii = nsd->findIndexByKeyPattern( keya ); verify( ii >= 0 ); IndexDetails& i = nsd->idx( ii ); c.reset( BtreeCursor::make( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); cc->setDoingDeletes( true ); } long long num = 0; while ( cc->ok() ) { if ( yield && ! cc->yieldSometimes( ClientCursor::WillNeed) ) { // cursor got finished by someone else, so we're done cc.release(); // if the collection/db is dropped, cc may be deleted break; } if ( ! cc->ok() ) break; DiskLoc rloc = cc->currLoc(); if ( callback ) callback->goingToDelete( cc->current() ); cc->advance(); c->prepareToTouchEarlierIterate(); logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() , 0 , 0 , fromMigrate ); theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); num++; c->recoverFromTouchingEarlierIterate(); getDur().commitIfNeeded(); } return num; }
void IndexSpec::reset( const DiskLoc& loc ){ info = loc.obj(); keyPattern = info["key"].embeddedObjectUserCheck(); if ( keyPattern.objsize() == 0 ) { out() << info.toString() << endl; assert(false); } _init(); }
void ClientCursor::storeOpForSlave( DiskLoc last ) { if ( ! ( _queryOptions & QueryOption_OplogReplay )) return; if ( last.isNull() ) return; BSONElement e = last.obj()["ts"]; if ( e.type() == Date || e.type() == Timestamp ) _slaveReadTill = e._opTime(); }
/** * analyzeDiskStorage helper which processes a single record. */ void processRecord(const DiskLoc& dl, const DiskLoc& prevDl, const Record* r, int extentOfs, const AnalyzeParams& params, vector<DiskStorageData>& sliceData, BSONArrayBuilder* recordsArrayBuilder) { killCurrentOp.checkForInterrupt(); BSONObj obj = dl.obj(); int recBytes = r->lengthWithHeaders(); double characteristicFieldValue = 0; bool hasCharacteristicField = extractCharacteristicFieldValue(obj, params, characteristicFieldValue); bool isLocatedBeforePrevious = dl.a() < prevDl.a(); RecPos pos = RecPos::from(dl.getOfs(), recBytes, extentOfs, params); bool spansRequestedArea = false; for (RecPos::SliceIterator it = pos.iterateSlices(); !it.end(); ++it) { spansRequestedArea = true; DiskStorageData& slice = sliceData[it->sliceNum]; slice.numEntries += it->ratioHere; slice.recBytes += it->sizeHere; slice.bsonBytes += static_cast<long long>(it->ratioHere * obj.objsize()); if (hasCharacteristicField) { slice.characteristicCount += it->ratioHere; slice.characteristicSum += it->ratioHere * characteristicFieldValue; } if (isLocatedBeforePrevious) { slice.outOfOrderRecs += it->ratioHere; } } if (recordsArrayBuilder != NULL && spansRequestedArea) { DEV { int startsAt = dl.getOfs() - extentOfs; int endsAt = startsAt + recBytes; verify((startsAt < params.startOfs && endsAt > params.startOfs) || (startsAt < params.endOfs && endsAt >= params.endOfs) || (startsAt >= params.startOfs && endsAt < params.endOfs)); } BSONObjBuilder recordBuilder(recordsArrayBuilder->subobjStart()); recordBuilder.append("ofs", dl.getOfs() - extentOfs); recordBuilder.append("recBytes", recBytes); recordBuilder.append("bsonBytes", obj.objsize()); recordBuilder.append("_id", obj["_id"]); if (hasCharacteristicField) { recordBuilder.append("characteristic", characteristicFieldValue); } recordBuilder.doneFast(); }
long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback ) { BSONObj keya , keyb; BSONObj minClean = toKeyFormat( min , keya ); BSONObj maxClean = toKeyFormat( max , keyb ); assert( keya == keyb ); Client::Context ctx(ns); NamespaceDetails* nsd = nsdetails( ns.c_str() ); if ( ! nsd ) return 0; int ii = nsd->findIndexByKeyPattern( keya ); assert( ii >= 0 ); long long num = 0; IndexDetails& i = nsd->idx( ii ); shared_ptr<Cursor> c( new BtreeCursor( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); cc->setDoingDeletes( true ); while ( c->ok() ) { DiskLoc rloc = c->currLoc(); BSONObj key = c->currKey(); if ( callback ) callback->goingToDelete( c->current() ); c->advance(); c->noteLocation(); logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() ); theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); num++; c->checkLocation(); if ( yield && ! cc->yieldSometimes() ) { // cursor got finished by someone else, so we're done cc.release(); // if the collection/db is dropped, cc may be deleted break; } } return num; }
bool BtreeBasedAccessMethod::removeOneKey(const BSONObj& key, const DiskLoc& loc) { bool ret = false; try { ret = _newInterface->unindex(key, loc); } catch (AssertionException& e) { problem() << "Assertion failure: _unindex failed " << _descriptor->indexNamespace() << endl; out() << "Assertion failure: _unindex failed: " << e.what() << '\n'; out() << " obj:" << loc.obj().toString() << '\n'; out() << " key:" << key.toString() << '\n'; out() << " dl:" << loc.toString() << endl; logContext(); } return ret; }
/** * @param loc the location in system.indexes where the index spec is */ void NOINLINE_DECL insert_makeIndex(Collection* collectionToIndex, const DiskLoc& loc, bool mayInterrupt) { uassert(13143, "can't create index on system.indexes", collectionToIndex->ns().coll() != "system.indexes"); BSONObj info = loc.obj(); std::string idxName = info["name"].valuestr(); // Set curop description before setting indexBuildInProg, so that there's something // commands can find and kill as soon as indexBuildInProg is set. Only set this if it's a // killable index, so we don't overwrite commands in currentOp. if (mayInterrupt) { cc().curop()->setQuery(info); } IndexCatalog::IndexBuildBlock indexBuildBlock( collectionToIndex->getIndexCatalog(), idxName, loc ); verify( indexBuildBlock.indexDetails() ); try { buildAnIndex( collectionToIndex->ns(), collectionToIndex->details(), *indexBuildBlock.indexDetails(), mayInterrupt); indexBuildBlock.success(); } catch (DBException& e) { // save our error msg string as an exception or dropIndexes will overwrite our message LastError *le = lastError.get(); int savecode = 0; string saveerrmsg; if ( le ) { savecode = le->code; saveerrmsg = le->msg; } else { savecode = e.getCode(); saveerrmsg = e.what(); } verify(le && !saveerrmsg.empty()); setLastError(savecode,saveerrmsg.c_str()); throw; } }
// Remove the provided doc from the index. Status BtreeBasedAccessMethod::remove(const BSONObj &obj, const DiskLoc& loc, const InsertDeleteOptions &options, int64_t* numDeleted) { BSONObjSet keys; getKeys(obj, &keys); *numDeleted = 0; for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { bool thisKeyOK = removeOneKey(*i, loc); if (thisKeyOK) { ++*numDeleted; } else if (options.logIfError) { log() << "unindex failed (key too big?) " << _descriptor->indexNamespace() << " key: " << *i << " " << loc.obj()["_id"] << endl; } } return Status::OK(); }
bool CoveredIndexMatcher::matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details , bool keyUsable ) { if ( details ) details->reset(); if ( keyUsable ) { if ( !_keyMatcher.matches(key, details ) ) { return false; } if ( ! _needRecord ) { return true; } } if ( details ) details->loadedObject = true; return _docMatcher->matches(recLoc.obj() , details ); }
/* ns: namespace, e.g. <database>.<collection> pattern: the "where" clause / criteria justOne: stop after 1 match god: allow access to system namespaces, and don't yield */ long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) { if( !god ) { if ( strstr(ns, ".system.") ) { /* note a delete from system.indexes would corrupt the db if done here, as there are pointers into those objects in NamespaceDetails. */ uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) ); } if ( strchr( ns , '$' ) ) { log() << "cannot delete from collection with reserved $ in name: " << ns << endl; uassert( 10100 , "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 ); } } { NamespaceDetails *d = nsdetails( ns ); if ( ! d ) return 0; uassert( 10101 , "can't remove from a capped collection" , ! d->capped ); } long long nDeleted = 0; shared_ptr< Cursor > creal = NamespaceDetailsTransient::getCursor( ns, pattern, BSONObj(), false, 0 ); if( !creal->ok() ) return nDeleted; shared_ptr< Cursor > cPtr = creal; auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) ); cc->setDoingDeletes( true ); CursorId id = cc->cursorid(); bool justOne = justOneOrig; bool canYield = !god && !(creal->matcher() && creal->matcher()->docMatcher().atomic()); do { // TODO: we can generalize this I believe // bool willNeedRecord = (creal->matcher() && creal->matcher()->needRecord()) || pattern.isEmpty() || isSimpleIdQuery( pattern ); if ( ! willNeedRecord ) { // TODO: this is a total hack right now // check if the index full encompasses query if ( pattern.nFields() == 1 && str::equals( pattern.firstElement().fieldName() , creal->indexKeyPattern().firstElement().fieldName() ) ) willNeedRecord = true; } if ( canYield && ! cc->yieldSometimes( willNeedRecord ? ClientCursor::WillNeed : ClientCursor::MaybeCovered ) ) { cc.release(); // has already been deleted elsewhere // TODO should we assert or something? break; } if ( !cc->ok() ) { break; // if we yielded, could have hit the end } // this way we can avoid calling updateLocation() every time (expensive) // as well as some other nuances handled cc->setDoingDeletes( true ); DiskLoc rloc = cc->currLoc(); BSONObj key = cc->currKey(); bool match = creal->currentMatches(); bool dup = cc->c()->getsetdup(rloc); if ( ! cc->advance() ) justOne = true; if ( ! match ) continue; assert( !dup ); // can't be a dup, we deleted it! if ( !justOne ) { /* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore blocks. here we might call millions of times which would be bad. */ cc->c()->prepareToTouchEarlierIterate(); } if ( logop ) { BSONElement e; if( BSONObj( rloc.rec() ).getObjectID( e ) ) { BSONObjBuilder b; b.append( e ); bool replJustOne = true; logOp( "d", ns, b.done(), 0, &replJustOne ); } else { problem() << "deleted object without id, not logging" << endl; } } if ( rs ) rs->goingToDelete( rloc.obj() /*cc->c->current()*/ ); theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc); nDeleted++; if ( justOne ) { break; } cc->c()->recoverFromTouchingEarlierIterate(); if( !god ) getDur().commitIfNeeded(); if( debug && god && nDeleted == 100 ) log() << "warning high number of deletes with god=true which could use significant memory" << endl; } while ( cc->ok() ); if ( cc.get() && ClientCursor::find( id , false ) == 0 ) { // TODO: remove this and the id declaration above if this doesn't trigger // if it does, then i'm very confused (ERH 06/2011) error() << "this should be impossible" << endl; printStackTrace(); cc.release(); } return nDeleted; }
DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc , Writer& w ){ LogIndentLevel lil; if ( eLoc.getOfs() <= 0 ){ error() << "invalid extent ofs: " << eLoc.getOfs() << endl; return DiskLoc(); } MongoDataFile * mdf = db->getFile( eLoc.a() ); Extent * e = mdf->debug_getExtent( eLoc ); if ( ! e->isOk() ){ warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl; } log() << "length:" << e->length << endl; LogIndentLevel lil2; set<DiskLoc> seen; DiskLoc loc = forward ? e->firstRecord : e->lastRecord; while ( ! loc.isNull() ){ if ( ! seen.insert( loc ).second ) { error() << "infinite loop in extend, seen: " << loc << " before" << endl; break; } if ( loc.getOfs() <= 0 ){ error() << "offset is 0 for record which should be impossible" << endl; break; } log(1) << loc << endl; Record* rec = loc.rec(); BSONObj obj; try { obj = loc.obj(); assert( obj.valid() ); LOG(1) << obj << endl; w( obj ); } catch ( std::exception& e ) { log() << "found invalid document @ " << loc << " " << e.what() << endl; if ( ! obj.isEmpty() ) { try { BSONElement e = obj.firstElement(); stringstream ss; ss << "first element: " << e; log() << ss.str(); } catch ( std::exception& ) { } } } loc = forward ? rec->getNext( loc ) : rec->getPrev( loc ); } return forward ? e->xnext : e->xprev; }
PlanStage::StageState TextStage::fillOutResults() { Database* db = cc().database(); Collection* collection = db->getCollection( _params.ns ); if (NULL == collection) { warning() << "TextStage params namespace error"; return PlanStage::FAILURE; } vector<IndexDescriptor*> idxMatches; collection->getIndexCatalog()->findIndexByType("text", idxMatches); if (1 != idxMatches.size()) { warning() << "Expected exactly one text index"; return PlanStage::FAILURE; } // Get all the index scans for each term in our query. OwnedPointerVector<PlanStage> scanners; for (size_t i = 0; i < _params.query.getTerms().size(); i++) { const string& term = _params.query.getTerms()[i]; IndexScanParams params; params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT, term, _params.indexPrefix); params.bounds.endKey = FTSIndexFormat::getIndexKey(0, term, _params.indexPrefix); params.bounds.endKeyInclusive = true; params.bounds.isSimpleRange = true; params.descriptor = idxMatches[0]; params.direction = -1; IndexScan* ixscan = new IndexScan(params, _ws, NULL); scanners.mutableVector().push_back(ixscan); } // Map: diskloc -> aggregate score for doc. typedef unordered_map<DiskLoc, double, DiskLoc::Hasher> ScoreMap; ScoreMap scores; // For each index scan, read all results and store scores. size_t currentIndexScanner = 0; while (currentIndexScanner < scanners.size()) { BSONObj keyObj; DiskLoc loc; WorkingSetID id; PlanStage::StageState state = scanners.vector()[currentIndexScanner]->work(&id); if (PlanStage::ADVANCED == state) { WorkingSetMember* wsm = _ws->get(id); IndexKeyDatum& keyDatum = wsm->keyData.back(); filterAndScore(keyDatum.keyData, wsm->loc, &scores[wsm->loc]); _ws->free(id); } else if (PlanStage::IS_EOF == state) { // Done with this scan. ++currentIndexScanner; } else if (PlanStage::NEED_FETCH == state) { // We're calling work() on ixscans and they have no way to return a fetch. verify(false); } else if (PlanStage::NEED_TIME == state) { // We are a blocking stage, so ignore scanner's request for more time. } else { verify(PlanStage::FAILURE == state); warning() << "error from index scan during text stage: invalid FAILURE state"; return PlanStage::FAILURE; } } // Filter for phrases and negative terms, score and truncate. for (ScoreMap::iterator i = scores.begin(); i != scores.end(); ++i) { DiskLoc loc = i->first; double score = i->second; // Ignore non-matched documents. if (score < 0) { continue; } // Filter for phrases and negated terms if (_params.query.hasNonTermPieces()) { if (!_ftsMatcher.matchesNonTerm(loc.obj())) { continue; } } // Add results to working set as LOC_AND_UNOWNED_OBJ initially. // On invalidation, we copy the object and change the state to // OWNED_OBJ. // Fill out a WSM. WorkingSetID id = _ws->allocate(); WorkingSetMember* member = _ws->get(id); member->loc = loc; member->obj = member->loc.obj(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; member->addComputed(new TextScoreComputedData(score)); _results.push_back(id); _wsidByDiskLoc[member->loc] = id; } _filledOutResults = true; if (_results.size() == 0) { return PlanStage::IS_EOF; } return PlanStage::NEED_TIME; }
/* note: this is only (as-is) called for - not multi - not mods is indexed - not upsert */ static UpdateResult _updateById(bool isOperatorUpdate, int idIdxNo, ModSet* mods, NamespaceDetails* d, NamespaceDetailsTransient *nsdt, bool su, const char* ns, const BSONObj& updateobj, BSONObj patternOrig, bool logop, OpDebug& debug, bool fromMigrate = false) { DiskLoc loc; { IndexDetails& i = d->idx(idIdxNo); BSONObj key = i.getKeyFromQuery( patternOrig ); loc = QueryRunner::fastFindSingle(i, key); if( loc.isNull() ) { // no upsert support in _updateById yet, so we are done. return UpdateResult( 0 , 0 , 0 , BSONObj() ); } } Record* r = loc.rec(); if ( cc().allowedToThrowPageFaultException() && ! r->likelyInPhysicalMemory() ) { throw PageFaultException( r ); } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ BSONObj newObj; if ( isOperatorUpdate ) { const BSONObj& onDisk = loc.obj(); auto_ptr<ModSetState> mss = mods->prepare( onDisk, false /* not an insertion */ ); if( mss->canApplyInPlace() ) { mss->applyModsInPlace(true); debug.fastmod = true; DEBUGUPDATE( "\t\t\t updateById doing in place update" ); newObj = onDisk; } else { newObj = mss->createNewFromMods(); checkTooLarge(newObj); verify(nsdt); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug); } if ( logop ) { DEV verify( mods->size() ); BSONObj pattern = patternOrig; BSONObj logObj = mss->getOpLogRewrite(); DEBUGUPDATE( "\t rewrite update: " << logObj ); // It is possible that the entire mod set was a no-op over this document. We // would have an empty log record in that case. If we call logOp, with an empty // record, that would be replicated as "clear this record", which is not what // we want. Therefore, to get a no-op in the replica, we simply don't log. if ( logObj.nFields() ) { logOp("u", ns, logObj, &pattern, 0, fromMigrate, &newObj ); } } return UpdateResult( 1 , 1 , 1 , BSONObj() ); } // end $operator update // regular update BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); verify(nsdt); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug ); if ( logop ) { logOp("u", ns, updateobj, &patternOrig, 0, fromMigrate, &updateobj ); } return UpdateResult( 1 , 0 , 1 , BSONObj() ); }
UpdateResult update(const UpdateRequest& request, OpDebug* opDebug, UpdateDriver* driver) { LOG(3) << "processing update : " << request; const NamespaceString& nsString = request.getNamespaceString(); validateUpdate( nsString.ns().c_str(), request.getUpdates(), request.getQuery() ); NamespaceDetails* nsDetails = nsdetails( nsString.ns() ); NamespaceDetailsTransient* nsDetailsTransient = &NamespaceDetailsTransient::get( nsString.ns().c_str() ); // TODO: This seems a bit circuitious. opDebug->updateobj = request.getUpdates(); driver->refreshIndexKeys( nsDetailsTransient->indexKeys() ); shared_ptr<Cursor> cursor = getOptimizedCursor( nsString.ns(), request.getQuery(), BSONObj(), request.getQueryPlanSelectionPolicy() ); // If the update was marked with '$isolated' (a.k.a '$atomic'), we are not allowed to // yield while evaluating the update loop below. // // TODO: Old code checks this repeatedly within the update loop. Is that necessary? It seems // that once atomic should be always atomic. const bool isolated = cursor->ok() && cursor->matcher() && cursor->matcher()->docMatcher().atomic(); // The 'cursor' the optimizer gave us may contain query plans that generate duplicate // diskloc's. We set up here the mechanims that will prevent us from processing those // twice if we see them. We also set up a 'ClientCursor' so that we can support // yielding. // // TODO: Is it valid to call this on a non-ok cursor? const bool dedupHere = cursor->autoDedup(); // // We'll start assuming we have one or more documents for this update. (Othwerwise, // we'll fallback to upserting.) // // We record that this will not be an upsert, in case a mod doesn't want to be applied // when in strict update mode. driver->setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT ); // Let's fetch each of them and pipe them through the update expression, making sure to // keep track of the necessary stats. Recall that we'll be pulling documents out of // cursors and some of them do not deduplicate the entries they generate. We have // deduping logic in here, too -- for now. unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs; int numMatched = 0; // Reset these counters on each call. We might re-enter this function to retry this // update if we throw a page fault exception below, and we rely on these counters // reflecting only the actions taken locally. In particlar, we must have the no-op // counter reset so that we can meaningfully comapre it with numMatched above. opDebug->nscanned = 0; opDebug->nupdateNoops = 0; Client& client = cc(); mutablebson::Document doc; mutablebson::DamageVector damages; // If we are going to be yielding, we will need a ClientCursor scoped to this loop. We // only loop as long as the underlying cursor is OK. for ( auto_ptr<ClientCursor> clientCursor; cursor->ok(); ) { // If we haven't constructed a ClientCursor, and if the client allows us to throw // page faults, and if we are referring to a location that is likely not in // physical memory, then throw a PageFaultException. The entire operation will be // restarted. if ( clientCursor.get() == NULL && client.allowedToThrowPageFaultException() && !cursor->currLoc().isNull() && !cursor->currLoc().rec()->likelyInPhysicalMemory() ) { // We should never throw a PFE if we have already updated items. The numMatched // variable includes no-ops, which do not prevent us from raising a PFE, so if // numMatched is non-zero, we are still OK to throw as long all matched items // resulted in a no-op. dassert((numMatched == 0) || (numMatched == opDebug->nupdateNoops)); throw PageFaultException( cursor->currLoc().rec() ); } if ( !isolated && opDebug->nscanned != 0 ) { // We are permitted to yield. To do so we need a ClientCursor, so create one // now if we have not yet done so. if ( !clientCursor.get() ) clientCursor.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, nsString.ns() ) ); // Ask the client cursor to yield. We get two bits of state back: whether or not // we yielded, and whether or not we correctly recovered from yielding. bool yielded = false; const bool recovered = clientCursor->yieldSometimes( ClientCursor::WillNeed, &yielded ); if ( !recovered ) { // If we failed to recover from the yield, then the ClientCursor is already // gone. Release it so we don't destroy it a second time. clientCursor.release(); break; } if ( !cursor->ok() ) { // If the cursor died while we were yielded, just get out of the update loop. break; } if ( yielded ) { // We yielded and recovered OK, and our cursor is still good. Details about // our namespace may have changed while we were yielded, so we re-acquire // them here. If we can't do so, escape the update loop. Otherwise, refresh // the driver so that it knows about what is currently indexed. nsDetails = nsdetails( nsString.ns() ); if ( !nsDetails ) break; nsDetailsTransient = &NamespaceDetailsTransient::get( nsString.ns().c_str() ); // TODO: This copies the index keys, but it may not need to do so. driver->refreshIndexKeys( nsDetailsTransient->indexKeys() ); } } // Let's fetch the next candidate object for this update. Record* record = cursor->_current(); DiskLoc loc = cursor->currLoc(); const BSONObj oldObj = loc.obj(); // We count how many documents we scanned even though we may skip those that are // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for // that reason. opDebug->nscanned++; // Skips this document if it: // a) doesn't match the query portion of the update // b) was deemed duplicate by the underlying cursor machinery // // Now, if we are going to update the document, // c) we don't want to do so while the cursor is at it, as that may invalidate // the cursor. So, we advance to next document, before issuing the update. MatchDetails matchDetails; matchDetails.requestElemMatchKey(); if ( !cursor->currentMatches( &matchDetails ) ) { // a) cursor->advance(); continue; } else if ( cursor->getsetdup( loc ) && dedupHere ) { // b) cursor->advance(); continue; } else if (!driver->isDocReplacement() && request.isMulti()) { // c) cursor->advance(); if ( dedupHere ) { if ( seenLocs.count( loc ) ) { continue; } } // There are certain kind of cursors that hold multiple pointers to data // underneath. $or cursors is one example. In a $or cursor, it may be the case // that when we did the last advance(), we finished consuming documents from // one of $or child and started consuming the next one. In that case, it is // possible that the last document of the previous child is the same as the // first document of the next (see SERVER-5198 and jstests/orp.js). // // So we advance the cursor here until we see a new diskloc. // // Note that we won't be yielding, and we may not do so for a while if we find // a particularly duplicated sequence of loc's. That is highly unlikely, // though. (See SERVER-5725, if curious, but "stage" based $or will make that // ticket moot). while( cursor->ok() && loc == cursor->currLoc() ) { cursor->advance(); } } // For some (unfortunate) historical reasons, not all cursors would be valid after // a write simply because we advanced them to a document not affected by the write. // To protect in those cases, not only we engaged in the advance() logic above, but // we also tell the cursor we're about to write a document that we've just seen. // prepareToTouchEarlierIterate() requires calling later // recoverFromTouchingEarlierIterate(), so we make a note here to do so. bool touchPreviousDoc = request.isMulti() && cursor->ok(); if ( touchPreviousDoc ) { if ( clientCursor.get() ) clientCursor->setDoingDeletes( true ); cursor->prepareToTouchEarlierIterate(); } // Found a matching document numMatched++; // Ask the driver to apply the mods. It may be that the driver can apply those "in // place", that is, some values of the old document just get adjusted without any // change to the binary layout on the bson layer. It may be that a whole new // document is needed to accomodate the new bson layout of the resulting document. doc.reset( oldObj, mutablebson::Document::kInPlaceEnabled ); BSONObj logObj; // If there was a matched field, obtain it. string matchedField; if (matchDetails.hasElemMatchKey()) matchedField = matchDetails.elemMatchKey(); Status status = driver->update( matchedField, &doc, &logObj ); if ( !status.isOK() ) { uasserted( 16837, status.reason() ); } // If the driver applied the mods in place, we can ask the mutable for what // changed. We call those changes "damages". :) We use the damages to inform the // journal what was changed, and then apply them to the original document // ourselves. If, however, the driver applied the mods out of place, we ask it to // generate a new, modified document for us. In that case, the file manager will // take care of the journaling details for us. // // This code flow is admittedly odd. But, right now, journaling is baked in the file // manager. And if we aren't using the file manager, we have to do jounaling // ourselves. bool objectWasChanged = false; BSONObj newObj; const char* source = NULL; bool inPlace = doc.getInPlaceUpdates(&damages, &source); if ( inPlace && !driver->modsAffectIndices() ) { // If a set of modifiers were all no-ops, we are still 'in place', but there is // no work to do, in which case we want to consider the object unchanged. if (!damages.empty() ) { nsDetails->paddingFits(); // All updates were in place. Apply them via durability and writing pointer. mutablebson::DamageVector::const_iterator where = damages.begin(); const mutablebson::DamageVector::const_iterator end = damages.end(); for( ; where != end; ++where ) { const char* sourcePtr = source + where->sourceOffset; void* targetPtr = getDur().writingPtr( const_cast<char*>(oldObj.objdata()) + where->targetOffset, where->size); std::memcpy(targetPtr, sourcePtr, where->size); } objectWasChanged = true; opDebug->fastmod = true; } newObj = oldObj; } else { // The updates were not in place. Apply them through the file manager. newObj = doc.getObject(); DiskLoc newLoc = theDataFileMgr.updateRecord(nsString.ns().c_str(), nsDetails, nsDetailsTransient, record, loc, newObj.objdata(), newObj.objsize(), *opDebug); // If we've moved this object to a new location, make sure we don't apply // that update again if our traversal picks the objecta again. // // We also take note that the diskloc if the updates are affecting indices. // Chances are that we're traversing one of them and they may be multi key and // therefore duplicate disklocs. if ( newLoc != loc || driver->modsAffectIndices() ) { seenLocs.insert( newLoc ); } objectWasChanged = true; } // Log Obj if ( request.shouldUpdateOpLog() ) { if ( driver->isDocReplacement() || !logObj.isEmpty() ) { BSONObj idQuery = driver->makeOplogEntryQuery(newObj, request.isMulti()); logOp("u", nsString.ns().c_str(), logObj , &idQuery, NULL, request.isFromMigration(), &newObj); } } // If it was noop since the document didn't change, record that. if (!objectWasChanged) opDebug->nupdateNoops++; if (!request.isMulti()) { break; } // If we used the cursor mechanism that prepares an earlier seen document for a // write we need to tell such mechanisms that the write is over. if ( touchPreviousDoc ) { cursor->recoverFromTouchingEarlierIterate(); } getDur().commitIfNeeded(); } // TODO: Can this be simplified? if ((numMatched > 0) || (numMatched == 0 && !request.isUpsert()) ) { opDebug->nupdated = numMatched; return UpdateResult( numMatched > 0 /* updated existing object(s) */, !driver->isDocReplacement() /* $mod or obj replacement */, numMatched /* # of docments update, even no-ops */, BSONObj() ); } // // We haven't found any existing document so an insert is done // (upsert is true). // opDebug->upsert = true; // Since this is an insert (no docs found and upsert:true), we will be logging it // as an insert in the oplog. We don't need the driver's help to build the // oplog record, then. We also set the context of the update driver to the INSERT_CONTEXT. // Some mods may only work in that context (e.g. $setOnInsert). driver->setLogOp( false ); driver->setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT ); BSONObj baseObj; // Reset the document we will be writing to doc.reset( baseObj, mutablebson::Document::kInPlaceDisabled ); if ( request.getQuery().hasElement("_id") ) { uassertStatusOK(doc.root().appendElement(request.getQuery().getField("_id"))); } // If this is a $mod base update, we need to generate a document by examining the // query and the mods. Otherwise, we can use the object replacement sent by the user // update command that was parsed by the driver before. // In the following block we handle the query part, and then do the regular mods after. if ( *request.getUpdates().firstElementFieldName() == '$' ) { uassertStatusOK(UpdateDriver::createFromQuery(request.getQuery(), doc)); opDebug->fastmodinsert = true; } // Apply the update modifications and then log the update as an insert manually. Status status = driver->update( StringData(), &doc, NULL /* no oplog record */); if ( !status.isOK() ) { uasserted( 16836, status.reason() ); } BSONObj newObj = doc.getObject(); theDataFileMgr.insertWithObjMod( nsString.ns().c_str(), newObj, false, request.isGod() ); if ( request.shouldUpdateOpLog() ) { logOp( "i", nsString.ns().c_str(), newObj, NULL, NULL, request.isFromMigration(), &newObj ); } opDebug->nupdated = 1; return UpdateResult( false /* updated a non existing document */, !driver->isDocReplacement() /* $mod or obj replacement? */, 1 /* count of updated documents */, newObj /* object that was upserted */ ); }
DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc , Writer& w ){ LogIndentLevel lil; if ( eLoc.getOfs() <= 0 ){ toolError() << "invalid extent ofs: " << eLoc.getOfs() << std::endl; return DiskLoc(); } const ExtentManager& extentManager = db->getExtentManager(); Extent* e = extentManager.getExtent( eLoc, false ); if ( ! e->isOk() ){ toolError() << "Extent not ok magic: " << e->magic << " going to try to continue" << std::endl; } toolInfoLog() << "length:" << e->length << std::endl; LogIndentLevel lil2; set<DiskLoc> seen; DiskLoc loc = forward ? e->firstRecord : e->lastRecord; while ( ! loc.isNull() ){ if ( ! seen.insert( loc ).second ) { toolError() << "infinite loop in extent, seen: " << loc << " before" << std::endl; break; } if ( loc.getOfs() <= 0 ){ toolError() << "offset is 0 for record which should be impossible" << std::endl; break; } if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))) { toolInfoLog() << loc << std::endl; } BSONObj obj; try { obj = loc.obj(); verify( obj.valid() ); if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))) { toolInfoLog() << obj << std::endl; } w( obj ); } catch ( std::exception& e ) { toolError() << "found invalid document @ " << loc << " " << e.what() << std::endl; if ( ! obj.isEmpty() ) { try { BSONElement e = obj.firstElement(); stringstream ss; ss << "first element: " << e; toolError() << ss.str() << std::endl; } catch ( std::exception& ) { toolError() << "unable to log invalid document @ " << loc << std::endl; } } } loc = forward ? extentManager.getNextRecordInExtent( loc ) : extentManager.getPrevRecordInExtent( loc ); // break when new loc is outside current extent boundary if ( loc.isNull() ) { break; } } toolInfoLog() << "wrote " << seen.size() << " documents" << std::endl; return forward ? e->xnext : e->xprev; }
Runner::RunnerState IDHackRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) { if (_killed) { return Runner::RUNNER_DEAD; } if (_done) { return Runner::RUNNER_EOF; } // Use the index catalog to get the id index. const IndexCatalog* catalog = _collection->getIndexCatalog(); // Find the index we use. IndexDescriptor* idDesc = catalog->findIdIndex(); if (NULL == idDesc) { _done = true; return Runner::RUNNER_EOF; } // This may not be valid always. See SERVER-12397. const BtreeBasedAccessMethod* accessMethod = static_cast<const BtreeBasedAccessMethod*>(catalog->getIndex(idDesc)); // Look up the key by going directly to the Btree. DiskLoc loc = accessMethod->findSingle( _key ); _done = true; // Key not found. if (loc.isNull()) { return Runner::RUNNER_EOF; } _nscanned++; // Set out parameters and note that we're done w/lookup. if (NULL == objOut) { // No object requested - nothing to do. } else if (hasIDProjection(_query.get())) { // Covered query on _id field only. // Set object to search key. // Search key is retrieved from the canonical query at // construction and always contains the _id field name. // It is possible to construct the ID hack runner with just the collection // and the key object (which could be {"": my_obj_id}) but _query would be null // in that case and the query would never be seen as covered. *objOut = _key.getOwned(); } else { invariant(!hasIDProjection(_query.get())); // Fetch object from storage. Record* record = loc.rec(); _nscannedObjects++; // If the record isn't in memory... if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) { // And we're allowed to yield ourselves... if (Runner::YIELD_AUTO == _policy) { // Note what we're yielding to fetch so that we don't crash if the loc is // deleted during a yield. _locFetching = loc; // Yield. TODO: Do we want to bother yielding if micros < 0? int micros = ClientCursor::suggestYieldMicros(); ClientCursor::staticYield(micros, "", record); // This can happen when we're yielded for various reasons (e.g. db/idx dropped). if (_killed) { return Runner::RUNNER_DEAD; } } } // Either the data was in memory or we paged it in. *objOut = loc.obj(); // If we're sharded make sure the key belongs to us. We need the object to do this. if (shardingState.needCollectionMetadata(_collection->ns().ns())) { CollectionMetadataPtr m = shardingState.getCollectionMetadata(_collection->ns().ns()); if (m) { KeyPattern kp(m->getKeyPattern()); if (!m->keyBelongsToMe( kp.extractSingleKey(*objOut))) { // We have something with a matching _id but it doesn't belong to me. return Runner::RUNNER_EOF; } } } } // Return the DiskLoc if the caller wants it. if (NULL != dlOut) { *dlOut = loc; } return Runner::RUNNER_ADVANCED; }
Runner::RunnerState IDHackRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) { if (_killed) { return Runner::RUNNER_DEAD; } if (_done) { return Runner::RUNNER_EOF; } // Use the index catalog to get the id index. IndexCatalog* catalog = _collection->getIndexCatalog(); // Find the index we use. const IndexDescriptor* idDesc = catalog->findIdIndex(); if (NULL == idDesc) { _done = true; return Runner::RUNNER_EOF; } BtreeBasedAccessMethod* accessMethod = catalog->getBtreeBasedIndex( idDesc ); BSONObj key = _query->getQueryObj()["_id"].wrap(); // Look up the key by going directly to the Btree. DiskLoc loc = accessMethod->findSingle( key ); _done = true; // Key not found. if (loc.isNull()) { return Runner::RUNNER_EOF; } // Set out parameters and note that we're done w/lookup. if (NULL != objOut) { Record* record = loc.rec(); // If the record isn't in memory... if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) { // And we're allowed to yield ourselves... if (Runner::YIELD_AUTO == _policy) { // Note what we're yielding to fetch so that we don't crash if the loc is // deleted during a yield. _locFetching = loc; // Yield. TODO: Do we want to bother yielding if micros < 0? int micros = ClientCursor::suggestYieldMicros(); ClientCursor::staticYield(micros, "", record); // This can happen when we're yielded for various reasons (e.g. db/idx dropped). if (_killed) { return Runner::RUNNER_DEAD; } } } // Either the data was in memory or we paged it in. *objOut = loc.obj(); // If we're sharded make sure the key belongs to us. We need the object to do this. if (shardingState.needCollectionMetadata(_query->ns())) { CollectionMetadataPtr m = shardingState.getCollectionMetadata(_query->ns()); if (m) { KeyPattern kp(m->getKeyPattern()); if (!m->keyBelongsToMe( kp.extractSingleKey(*objOut))) { // We have something with a matching _id but it doesn't belong to me. return Runner::RUNNER_EOF; } } } // If there is a projection... if (NULL != _query->getProj()) { // Create something to execute it. auto_ptr<ProjectionExec> projExec(new ProjectionExec(_query->getParsed().getProj(), _query->root())); projExec->transform(*objOut, objOut); } } // Return the DiskLoc if the caller wants it. if (NULL != dlOut) { *dlOut = loc; } return Runner::RUNNER_ADVANCED; }
long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , const BSONObj& keyPattern , bool maxInclusive , bool secondaryThrottle , RemoveCallback * callback, bool fromMigrate ) { Client& c = cc(); long long numDeleted = 0; PageFaultRetryableSection pgrs; long long millisWaitingForReplication = 0; while ( 1 ) { try { Client::WriteContext ctx(ns); scoped_ptr<Cursor> c; { NamespaceDetails* nsd = nsdetails( ns.c_str() ); if ( ! nsd ) break; int ii = nsd->findIndexByKeyPattern( keyPattern ); verify( ii >= 0 ); IndexDetails& i = nsd->idx( ii ); // Extend min to get (min, MinKey, MinKey, ....) BSONObj newMin = Helpers::modifiedRangeBound( min , keyPattern , -1 ); // If upper bound is included, extend max to get (max, MaxKey, MaxKey, ...) // If not included, extend max to get (max, MinKey, MinKey, ....) int minOrMax = maxInclusive ? 1 : -1; BSONObj newMax = Helpers::modifiedRangeBound( max , keyPattern , minOrMax ); c.reset( BtreeCursor::make( nsd , ii , i , newMin , newMax , maxInclusive , 1 ) ); } if ( ! c->ok() ) { // we're done break; } DiskLoc rloc = c->currLoc(); BSONObj obj = c->current(); // this is so that we don't have to handle this cursor in the delete code c.reset(0); if ( callback ) callback->goingToDelete( obj ); logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() , 0 , 0 , fromMigrate ); theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); numDeleted++; } catch( PageFaultException& e ) { e.touch(); continue; } Timer secondaryThrottleTime; if ( secondaryThrottle ) { if ( ! waitForReplication( c.getLastOp(), 2, 60 /* seconds to wait */ ) ) { warning() << "replication to secondaries for removeRange at least 60 seconds behind" << endl; } millisWaitingForReplication += secondaryThrottleTime.millis(); } if ( ! Lock::isLocked() ) { int micros = ( 2 * Client::recommendedYieldMicros() ) - secondaryThrottleTime.micros(); if ( micros > 0 ) { LOG(1) << "Helpers::removeRangeUnlocked going to sleep for " << micros << " micros" << endl; sleepmicros( micros ); } } } if ( secondaryThrottle ) log() << "Helpers::removeRangeUnlocked time spent waiting for replication: " << millisWaitingForReplication << "ms" << endl; return numDeleted; }
UpdateResult _updateObjects( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { DEBUGUPDATE( "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi ); Client& client = cc(); debug.updateobj = updateobj; // The idea with these here it to make them loop invariant for // multi updates, and thus be a bit faster for that case. The // pointers may be left invalid on a failed or terminal yield // recovery. NamespaceDetails* d = nsdetails(ns); // can be null if an upsert... NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get(ns); auto_ptr<ModSet> mods; bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; int modsIsIndexed = false; // really the # of indexes if ( isOperatorUpdate ) { mods.reset( new ModSet(updateobj, nsdt->indexKeys(), forReplication) ); modsIsIndexed = mods->maxNumIndexUpdated(); } if( planPolicy.permitOptimalIdPlan() && !multi && isSimpleIdQuery(patternOrig) && d && !modsIsIndexed ) { int idxNo = d->findIdIndex(); if( idxNo >= 0 ) { debug.idhack = true; UpdateResult result = _updateById( isOperatorUpdate, idxNo, mods.get(), d, nsdt, su, ns, updateobj, patternOrig, logop, debug, fromMigrate); if ( result.existing || ! upsert ) { return result; } else if ( upsert && ! isOperatorUpdate ) { // this handles repl inserts checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate, &no ); return UpdateResult( 0 , 0 , 1 , no ); } } } int numModded = 0; debug.nscanned = 0; shared_ptr<Cursor> c = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); d = nsdetails(ns); nsdt = &NamespaceDetailsTransient::get(ns); bool autoDedup = c->autoDedup(); if( c->ok() ) { set<DiskLoc> seenObjects; MatchDetails details; auto_ptr<ClientCursor> cc; do { if ( cc.get() == 0 && client.allowedToThrowPageFaultException() && ! c->currLoc().isNull() && ! c->currLoc().rec()->likelyInPhysicalMemory() ) { throw PageFaultException( c->currLoc().rec() ); } bool atomic = c->matcher() && c->matcher()->docMatcher().atomic(); if ( ! atomic && debug.nscanned > 0 ) { // we need to use a ClientCursor to yield if ( cc.get() == 0 ) { shared_ptr< Cursor > cPtr = c; cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); } bool didYield; if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) { cc.release(); break; } if ( !c->ok() ) { break; } if ( didYield ) { d = nsdetails(ns); if ( ! d ) break; nsdt = &NamespaceDetailsTransient::get(ns); if ( mods.get() ) { mods->setIndexedStatus( nsdt->indexKeys() ); modsIsIndexed = mods->maxNumIndexUpdated(); } } } // end yielding block debug.nscanned++; if ( mods.get() && mods->hasDynamicArray() ) { details.requestElemMatchKey(); } if ( !c->currentMatches( &details ) ) { c->advance(); continue; } Record* r = c->_current(); DiskLoc loc = c->currLoc(); if ( c->getsetdup( loc ) && autoDedup ) { c->advance(); continue; } BSONObj js = BSONObj::make(r); BSONObj pattern = patternOrig; if ( logop ) { BSONObjBuilder idPattern; BSONElement id; // NOTE: If the matching object lacks an id, we'll log // with the original pattern. This isn't replay-safe. // It might make sense to suppress the log instead // if there's no id. if ( js.getObjectID( id ) ) { idPattern.append( id ); pattern = idPattern.obj(); } else { uassert( 10157 , "multi-update requires all modified objects to have an _id" , ! multi ); } } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ if ( isOperatorUpdate ) { if ( multi ) { // go to next record in case this one moves c->advance(); // Update operations are deduped for cursors that implement their own // deduplication. In particular, some geo cursors are excluded. if ( autoDedup ) { if ( seenObjects.count( loc ) ) { continue; } // SERVER-5198 Advance past the document to be modified, provided // deduplication is enabled, but see SERVER-5725. while( c->ok() && loc == c->currLoc() ) { c->advance(); } } } const BSONObj& onDisk = loc.obj(); ModSet* useMods = mods.get(); auto_ptr<ModSet> mymodset; if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) { useMods = mods->fixDynamicArray( details.elemMatchKey() ); mymodset.reset( useMods ); } auto_ptr<ModSetState> mss = useMods->prepare( onDisk, false /* not an insertion */ ); bool willAdvanceCursor = multi && c->ok() && ( modsIsIndexed || ! mss->canApplyInPlace() ); if ( willAdvanceCursor ) { if ( cc.get() ) { cc->setDoingDeletes( true ); } c->prepareToTouchEarlierIterate(); } // If we've made it this far, "ns" must contain a valid collection name, and so // is of the form "db.collection". Therefore, the following expression must // always be valid. "system.users" updates must never be done in place, in // order to ensure that they are validated inside DataFileMgr::updateRecord(.). bool isSystemUsersMod = (NamespaceString(ns).coll == "system.users"); BSONObj newObj; if ( !mss->isUpdateIndexed() && mss->canApplyInPlace() && !isSystemUsersMod ) { mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) ); DEBUGUPDATE( "\t\t\t doing in place update" ); if ( !multi ) debug.fastmod = true; if ( modsIsIndexed ) { seenObjects.insert( loc ); } newObj = loc.obj(); d->paddingFits(); } else { newObj = mss->createNewFromMods(); checkTooLarge(newObj); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); if ( newLoc != loc || modsIsIndexed ){ // log() << "Moved obj " << newLoc.obj()["_id"] << " from " << loc << " to " << newLoc << endl; // object moved, need to make sure we don' get again seenObjects.insert( newLoc ); } } if ( logop ) { DEV verify( mods->size() ); BSONObj logObj = mss->getOpLogRewrite(); DEBUGUPDATE( "\t rewrite update: " << logObj ); // It is possible that the entire mod set was a no-op over this // document. We would have an empty log record in that case. If we // call logOp, with an empty record, that would be replicated as "clear // this record", which is not what we want. Therefore, to get a no-op // in the replica, we simply don't log. if ( logObj.nFields() ) { logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj ); } } numModded++; if ( ! multi ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( willAdvanceCursor ) c->recoverFromTouchingEarlierIterate(); getDur().commitIfNeeded(); continue; } uassert( 10158 , "multi update only works with $ operators" , ! multi ); BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, su); if ( logop ) { DEV wassert( !su ); // super used doesn't get logged, this would be bad. logOp("u", ns, updateobj, &pattern, 0, fromMigrate, &updateobj ); } return UpdateResult( 1 , 0 , 1 , BSONObj() ); } while ( c->ok() ); } // endif if ( numModded ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( upsert ) { if ( updateobj.firstElementFieldName()[0] == '$' ) { // upsert of an $operation. build a default object BSONObj newObj = mods->createNewFromQuery( patternOrig ); checkNoMods( newObj ); debug.fastmodinsert = true; theDataFileMgr.insertWithObjMod(ns, newObj, false, su); if ( logop ) logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj ); return UpdateResult( 0 , 1 , 1 , newObj ); } uassert( 10159 , "multi update only works with $ operators" , ! multi ); checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate, &no ); return UpdateResult( 0 , 0 , 1 , no ); } return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() ); }
void got( const DiskLoc& loc ) { Point p( loc.obj().getFieldDotted( _geoField ) ); if ( _near.distance( p ) > _maxDistance ) return; _locs.push_back( loc ); }
UpdateResult _updateObjectsNEW( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { // TODO // + Separate UpdateParser from UpdateRunner (the latter should be "stage-y") // + All the yield and deduplicate logic would move to the query stage // portion of it // // + Replication related // + fast path for update for query by _id // + support for relaxing viable path constraint in replication // // + Field Management // + Force all upsert to contain _id // + Prevent changes to immutable fields (_id, and those mentioned by sharding) // // + Yiedling related // + $atomic support (or better, support proper yielding if not) // + page fault support debug.updateobj = updateobj; NamespaceDetails* d = nsdetails( ns ); NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get( ns ); // TODO: Put this logic someplace central and check based on constants (maybe using the // list of actually excluded config collections, and not global for the config db). NamespaceString nsStr( ns ); // Should the modifiers validdate their embedded docs via okForStorage bool shouldValidate = true; // Config db docs shouldn't get checked for valid field names since the shard key can have // a dot (".") in it. Therefore we disable validation for storage. if ( nsStr.db() == "config" ) { LOG(0) << "disabling okForStorage on config db"; shouldValidate = false; } UpdateDriver::Options opts; opts.multi = multi; opts.upsert = upsert; opts.logOp = logop; opts.modOptions = ModifierInterface::Options( forReplication, shouldValidate ); UpdateDriver driver( opts ); // TODO: This copies the index keys, but we may not actually need to. Status status = driver.parse( nsdt->indexKeys(), updateobj ); if ( !status.isOK() ) { uasserted( 16840, status.reason() ); } shared_ptr<Cursor> cursor = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); // If the update was marked with '$isolated' (a.k.a '$atomic'), we are not allowed to // yield while evaluating the update loop below. // // TODO: Old code checks this repeatedly within the update loop. Is that necessary? It seems // that once atomic should be always atomic. const bool canYield = cursor->ok() && cursor->matcher() && cursor->matcher()->docMatcher().atomic(); // The 'cursor' the optimizer gave us may contain query plans that generate duplicate // diskloc's. We set up here the mechanims that will prevent us from processing those // twice if we see them. We also set up a 'ClientCursor' so that we can support // yielding. // // TODO: Is it valid to call this on a non-ok cursor? const bool dedupHere = cursor->autoDedup(); // // We'll start assuming we have one or more documents for this update. (Othwerwise, // we'll fallback to upserting.) // // We record that this will not be an upsert, in case a mod doesn't want to be applied // when in strict update mode. driver.setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT ); // Let's fetch each of them and pipe them through the update expression, making sure to // keep track of the necessary stats. Recall that we'll be pulling documents out of // cursors and some of them do not deduplicate the entries they generate. We have // deduping logic in here, too -- for now. unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs; int numUpdated = 0; debug.nscanned = 0; Client& client = cc(); mutablebson::Document doc; // If we are going to be yielding, we will need a ClientCursor scoped to this loop. We // only loop as long as the underlying cursor is OK. for ( auto_ptr<ClientCursor> clientCursor; cursor->ok(); ) { // If we haven't constructed a ClientCursor, and if the client allows us to throw // page faults, and if we are referring to a location that is likely not in // physical memory, then throw a PageFaultException. The entire operation will be // restarted. if ( clientCursor.get() == NULL && client.allowedToThrowPageFaultException() && !cursor->currLoc().isNull() && !cursor->currLoc().rec()->likelyInPhysicalMemory() ) { // We should never throw a PFE if we have already updated items. dassert(numUpdated == 0); throw PageFaultException( cursor->currLoc().rec() ); } if ( !canYield && debug.nscanned != 0 ) { // We are permitted to yield. To do so we need a ClientCursor, so create one // now if we have not yet done so. if ( !clientCursor.get() ) clientCursor.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) ); // Ask the client cursor to yield. We get two bits of state back: whether or not // we yielded, and whether or not we correctly recovered from yielding. bool yielded = false; const bool recovered = clientCursor->yieldSometimes( ClientCursor::WillNeed, &yielded ); // If we couldn't recover from the yield, or if the cursor died while we were // yielded, get out of the update loop right away. We don't need to reset // 'clientCursor' since we are leaving the scope. if ( !recovered || !cursor->ok() ) break; if ( yielded ) { // Details about our namespace may have changed while we were yielded, so // we re-acquire them here. If we can't do so, escape the update // loop. Otherwise, refresh the driver so that it knows about what is // currently indexed. d = nsdetails( ns ); if ( !d ) break; nsdt = &NamespaceDetailsTransient::get( ns ); // TODO: This copies the index keys, but it may not need to do so. driver.refreshIndexKeys( nsdt->indexKeys() ); } } // Let's fetch the next candidate object for this update. Record* r = cursor->_current(); DiskLoc loc = cursor->currLoc(); const BSONObj oldObj = loc.obj(); // We count how many documents we scanned even though we may skip those that are // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for // that reason. debug.nscanned++; // Skips this document if it: // a) doesn't match the query portion of the update // b) was deemed duplicate by the underlying cursor machinery // // Now, if we are going to update the document, // c) we don't want to do so while the cursor is at it, as that may invalidate // the cursor. So, we advance to next document, before issuing the update. MatchDetails matchDetails; matchDetails.requestElemMatchKey(); if ( !cursor->currentMatches( &matchDetails ) ) { // a) cursor->advance(); continue; } else if ( cursor->getsetdup( loc ) && dedupHere ) { // b) cursor->advance(); continue; } else if (driver.dollarModMode() && multi) { // c) cursor->advance(); if ( dedupHere ) { if ( seenLocs.count( loc ) ) { continue; } } // There are certain kind of cursors that hold multiple pointers to data // underneath. $or cursors is one example. In a $or cursor, it may be the case // that when we did the last advance(), we finished consuming documents from // one of $or child and started consuming the next one. In that case, it is // possible that the last document of the previous child is the same as the // first document of the next (see SERVER-5198 and jstests/orp.js). // // So we advance the cursor here until we see a new diskloc. // // Note that we won't be yielding, and we may not do so for a while if we find // a particularly duplicated sequence of loc's. That is highly unlikely, // though. (See SERVER-5725, if curious, but "stage" based $or will make that // ticket moot). while( cursor->ok() && loc == cursor->currLoc() ) { cursor->advance(); } } // For some (unfortunate) historical reasons, not all cursors would be valid after // a write simply because we advanced them to a document not affected by the write. // To protect in those cases, not only we engaged in the advance() logic above, but // we also tell the cursor we're about to write a document that we've just seen. // prepareToTouchEarlierIterate() requires calling later // recoverFromTouchingEarlierIterate(), so we make a note here to do so. bool touchPreviousDoc = multi && cursor->ok(); if ( touchPreviousDoc ) { if ( clientCursor.get() ) clientCursor->setDoingDeletes( true ); cursor->prepareToTouchEarlierIterate(); } // Ask the driver to apply the mods. It may be that the driver can apply those "in // place", that is, some values of the old document just get adjusted without any // change to the binary layout on the bson layer. It may be that a whole new // document is needed to accomodate the new bson layout of the resulting document. doc.reset( oldObj, mutablebson::Document::kInPlaceEnabled ); BSONObj logObj; StringData matchedField = matchDetails.hasElemMatchKey() ? matchDetails.elemMatchKey(): StringData(); status = driver.update( matchedField, &doc, &logObj ); if ( !status.isOK() ) { uasserted( 16837, status.reason() ); } // If the driver applied the mods in place, we can ask the mutable for what // changed. We call those changes "damages". :) We use the damages to inform the // journal what was changed, and then apply them to the original document // ourselves. If, however, the driver applied the mods out of place, we ask it to // generate a new, modified document for us. In that case, the file manager will // take care of the journaling details for us. // // This code flow is admittedly odd. But, right now, journaling is baked in the file // manager. And if we aren't using the file manager, we have to do jounaling // ourselves. bool objectWasChanged = false; BSONObj newObj; const char* source = NULL; mutablebson::DamageVector damages; bool inPlace = doc.getInPlaceUpdates(&damages, &source); if ( inPlace && !damages.empty() && !driver.modsAffectIndices() ) { d->paddingFits(); // All updates were in place. Apply them via durability and writing pointer. mutablebson::DamageVector::const_iterator where = damages.begin(); const mutablebson::DamageVector::const_iterator end = damages.end(); for( ; where != end; ++where ) { const char* sourcePtr = source + where->sourceOffset; void* targetPtr = getDur().writingPtr( const_cast<char*>(oldObj.objdata()) + where->targetOffset, where->size); std::memcpy(targetPtr, sourcePtr, where->size); } newObj = oldObj; debug.fastmod = true; objectWasChanged = true; } else { // The updates were not in place. Apply them through the file manager. newObj = doc.getObject(); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); // If we've moved this object to a new location, make sure we don't apply // that update again if our traversal picks the objecta again. // // We also take note that the diskloc if the updates are affecting indices. // Chances are that we're traversing one of them and they may be multi key and // therefore duplicate disklocs. if ( newLoc != loc || driver.modsAffectIndices() ) { seenLocs.insert( newLoc ); } objectWasChanged = true; } // Log Obj if ( logop ) { if ( !logObj.isEmpty() ) { BSONObj idQuery = driver.makeOplogEntryQuery(newObj, multi); logOp("u", ns, logObj , &idQuery, 0, fromMigrate, &newObj); } } // If we applied any in-place updates, or asked the DataFileMgr to write for us, // then count this as an update. if (objectWasChanged) numUpdated++; if (!multi) { break; } // If we used the cursor mechanism that prepares an earlier seen document for a // write we need to tell such mechanisms that the write is over. if ( touchPreviousDoc ) { cursor->recoverFromTouchingEarlierIterate(); } getDur().commitIfNeeded(); } if (numUpdated > 0) { return UpdateResult( true /* updated existing object(s) */, driver.dollarModMode() /* $mod or obj replacement */, numUpdated /* # of docments update */, BSONObj() ); } else if (numUpdated == 0 && !upsert) { return UpdateResult( false /* no object updated */, driver.dollarModMode() /* $mod or obj replacement */, 0 /* no updates */, BSONObj() ); } // // We haven't succeeded updating any existing document but upserts are allowed. // // If this is a $mod base update, we need to generate a document by examining the // query and the mods. Otherwise, we can use the object replacement sent by the user // update command that was parsed by the driver before. BSONObj oldObj; if ( *updateobj.firstElementFieldName() == '$' ) { if ( !driver.createFromQuery( patternOrig, &oldObj ) ) { uasserted( 16835, "cannot create object to update" ); } debug.fastmodinsert = true; } else { // Copy the _id if (patternOrig.hasElement("_id")) { oldObj = patternOrig.getField("_id").wrap(); } debug.upsert = true; } // Since this is an upsert, we will be oplogging it as an insert. We don't // need the driver's help to build the oplog record, then. We also set the // context of the update driver to an "upsert". Some mods may only work in that // context (e.g. $setOnInsert). driver.setLogOp( false ); driver.setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT ); doc.reset( oldObj, mutablebson::Document::kInPlaceDisabled ); status = driver.update( StringData(), &doc, NULL /* no oplog record */); if ( !status.isOK() ) { uasserted( 16836, status.reason() ); } BSONObj newObj = doc.getObject(); theDataFileMgr.insertWithObjMod( ns, newObj, false, su ); if ( logop ) { logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj ); } return UpdateResult( false /* updated a non existing document */, driver.dollarModMode() /* $mod or obj replacement? */, 1 /* count of updated documents */, newObj /* object that was upserted */ ); }
UpdateResult _updateObjectsNEW( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { // TODO // + Separate UpdateParser from UpdateRunner (the latter should be "stage-y") // + All the yield and deduplicate logic would move to the query stage // portion of it // // + Replication related // + fast path for update for query by _id // + support for relaxing viable path constraint in replication // // + Field Management // + Force all upsert to contain _id // + Prevent changes to immutable fields (_id, and those mentioned by sharding) // // + Yiedling related // + $atomic support (or better, support proper yielding if not) // + page fault support debug.updateobj = updateobj; NamespaceDetails* d = nsdetails( ns ); NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get( ns ); UpdateDriver::Options opts; opts.multi = multi; opts.upsert = upsert; opts.logOp = logop; UpdateDriver driver( opts ); Status status = driver.parse( nsdt->indexKeys(), updateobj ); if ( !status.isOK() ) { uasserted( 16840, status.reason() ); } shared_ptr<Cursor> cursor = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); // The 'cursor' the optimizer gave us may contain query plans that generate duplicate // diskloc's. We set up here the mechanims that will prevent us from processing those // twice if we see them. We also set up a 'ClientCursor' so that we can support // yielding. const bool dedupHere = cursor->autoDedup(); shared_ptr<Cursor> cPtr = cursor; auto_ptr<ClientCursor> clientCursor( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns ) ); // // Upsert Logic // // We may or may not have documents for this update. If we don't, then try to upsert, // if allowed. if ( !cursor->ok() && upsert ) { // If this is a $mod base update, we need to generate a document by examining the // query and the mods. Otherwise, we can use the object replacement sent by the user // update command that was parsed by the driver before. BSONObj oldObj; if ( *updateobj.firstElementFieldName() == '$' ) { if ( !driver.createFromQuery( patternOrig, &oldObj ) ) { uasserted( 16835, "cannot create object to update" ); } debug.fastmodinsert = true; } else { debug.upsert = true; } // Since this is an upsert, we will be oplogging it as an insert. We don't // need the driver's help to build the oplog record, then. We also set the // context of the update driver to an "upsert". Some mods may only work in that // context (e.g. $setOnInsert). driver.setLogOp( false ); driver.setContext( ModifierInterface::ExecInfo::INSERT_CONTEXT ); mutablebson::Document doc( oldObj, mutablebson::Document::kInPlaceDisabled ); status = driver.update( StringData(), &doc, NULL /* no oplog record */); if ( !status.isOK() ) { uasserted( 16836, status.reason() ); } BSONObj newObj = doc.getObject(); theDataFileMgr.insertWithObjMod( ns, newObj, false, su ); if ( logop ) { logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj ); } return UpdateResult( false /* updated a non existing document */, driver.dollarModMode() /* $mod or obj replacement? */, 1 /* count of updated documents */, newObj /* object that was upserted */ ); } // // We have one or more documents for this update. // // We record that this will not be an upsert, in case a mod doesn't want to be applied // when in strict update mode. driver.setContext( ModifierInterface::ExecInfo::UPDATE_CONTEXT ); // Let's fetch each of them and pipe them through the update expression, making sure to // keep track of the necessary stats. Recall that we'll be pulling documents out of // cursors and some of them do not deduplicate the entries they generate. We have // deduping logic in here, too -- for now. unordered_set<DiskLoc, DiskLoc::Hasher> seenLocs; int numUpdated = 0; debug.nscanned = 0; while ( cursor->ok() ) { // Let's fetch the next candidate object for this update. Record* r = cursor->_current(); DiskLoc loc = cursor->currLoc(); const BSONObj oldObj = loc.obj(); // We count how many documents we scanned even though we may skip those that are // deemed duplicated. The final 'numUpdated' and 'nscanned' numbers may differ for // that reason. debug.nscanned++; // Skips this document if it: // a) doesn't match the query portion of the update // b) was deemed duplicate by the underlying cursor machinery // // Now, if we are going to update the document, // c) we don't want to do so while the cursor is at it, as that may invalidate // the cursor. So, we advance to next document, before issuing the update. MatchDetails matchDetails; matchDetails.requestElemMatchKey(); if ( !cursor->currentMatches( &matchDetails ) ) { // a) cursor->advance(); continue; } else if ( cursor->getsetdup( loc ) && dedupHere ) { // b) cursor->advance(); continue; } else if (driver.dollarModMode() && multi) { // c) cursor->advance(); if ( dedupHere ) { if ( seenLocs.count( loc ) ) { continue; } } // There are certain kind of cursors that hold multiple pointers to data // underneath. $or cursors is one example. In a $or cursor, it may be the case // that when we did the last advance(), we finished consuming documents from // one of $or child and started consuming the next one. In that case, it is // possible that the last document of the previous child is the same as the // first document of the next (see SERVER-5198 and jstests/orp.js). // // So we advance the cursor here until we see a new diskloc. // // Note that we won't be yielding, and we may not do so for a while if we find // a particularly duplicated sequence of loc's. That is highly unlikely, // though. (See SERVER-5725, if curious, but "stage" based $or will make that // ticket moot). while( cursor->ok() && loc == cursor->currLoc() ) { cursor->advance(); } } // For some (unfortunate) historical reasons, not all cursors would be valid after // a write simply because we advanced them to a document not affected by the write. // To protect in those cases, not only we engaged in the advance() logic above, but // we also tell the cursor we're about to write a document that we've just seen. // prepareToTouchEarlierIterate() requires calling later // recoverFromTouchingEarlierIterate(), so we make a note here to do so. bool touchPreviousDoc = multi && cursor->ok(); if ( touchPreviousDoc ) { clientCursor->setDoingDeletes( true ); cursor->prepareToTouchEarlierIterate(); } // Ask the driver to apply the mods. It may be that the driver can apply those "in // place", that is, some values of the old document just get adjusted without any // change to the binary layout on the bson layer. It may be that a whole new // document is needed to accomodate the new bson layout of the resulting document. mutablebson::Document doc( oldObj, mutablebson::Document::kInPlaceEnabled ); BSONObj logObj; StringData matchedField = matchDetails.hasElemMatchKey() ? matchDetails.elemMatchKey(): StringData(); status = driver.update( matchedField, &doc, &logObj ); if ( !status.isOK() ) { uasserted( 16837, status.reason() ); } // If the driver applied the mods in place, we can ask the mutable for what // changed. We call those changes "damages". :) We use the damages to inform the // journal what was changed, and then apply them to the original document // ourselves. If, however, the driver applied the mods out of place, we ask it to // generate a new, modified document for us. In that case, the file manager will // take care of the journaling details for us. // // This code flow is admittedly odd. But, right now, journaling is baked in the file // manager. And if we aren't using the file manager, we have to do jounaling // ourselves. BSONObj newObj; const char* source = NULL; mutablebson::DamageVector damages; bool inPlace = doc.getInPlaceUpdates(&damages, &source); if ( inPlace && !driver.modsAffectIndices() ) { // All updates were in place. Apply them via durability and writing pointer. mutablebson::DamageVector::const_iterator where = damages.begin(); const mutablebson::DamageVector::const_iterator end = damages.end(); for( ; where != end; ++where ) { const char* sourcePtr = source + where->sourceOffset; void* targetPtr = getDur().writingPtr( const_cast<char*>(oldObj.objdata()) + where->targetOffset, where->size); std::memcpy(targetPtr, sourcePtr, where->size); } newObj = oldObj; debug.fastmod = true; } else { // The updates were not in place. Apply them through the file manager. newObj = doc.getObject(); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); // If we've moved this object to a new location, make sure we don't apply // that update again if our traversal picks the objecta again. // // We also take note that the diskloc if the updates are affecting indices. // Chances are that we're traversing one of them and they may be multi key and // therefore duplicate disklocs. if ( newLoc != loc || driver.modsAffectIndices() ) { seenLocs.insert( newLoc ); } } // Log Obj if ( logop ) { if ( !logObj.isEmpty() ) { BSONObj pattern = patternOrig; logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj ); } } // One more document updated. numUpdated++; if (!multi) { break; } // If we used the cursor mechanism that prepares an earlier seen document for a // write we need to tell such mechanisms that the write is over. if ( touchPreviousDoc ) { cursor->recoverFromTouchingEarlierIterate(); } getDur().commitIfNeeded(); } return UpdateResult( true /* updated existing object(s) */, driver.dollarModMode() /* $mod or obj replacement */, numUpdated /* # of docments update */, BSONObj() ); }
/* note: this is only (as-is) called for - not multi - not mods is indexed - not upsert */ static UpdateResult _updateById(bool isOperatorUpdate, int idIdxNo, ModSet* mods, int profile, NamespaceDetails* d, NamespaceDetailsTransient *nsdt, bool su, const char* ns, const BSONObj& updateobj, BSONObj patternOrig, bool logop, OpDebug& debug, bool fromMigrate = false) { DiskLoc loc; { IndexDetails& i = d->idx(idIdxNo); BSONObj key = i.getKeyFromQuery( patternOrig ); loc = i.idxInterface().findSingle(i, i.head, key); if( loc.isNull() ) { // no upsert support in _updateById yet, so we are done. return UpdateResult( 0 , 0 , 0 , BSONObj() ); } } Record* r = loc.rec(); if ( cc().allowedToThrowPageFaultException() && ! r->likelyInPhysicalMemory() ) { throw PageFaultException( r ); } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ if ( isOperatorUpdate ) { const BSONObj& onDisk = loc.obj(); auto_ptr<ModSetState> mss = mods->prepare( onDisk ); if( mss->canApplyInPlace() ) { mss->applyModsInPlace(true); DEBUGUPDATE( "\t\t\t updateById doing in place update" ); } else { BSONObj newObj = mss->createNewFromMods(); checkTooLarge(newObj); verify(nsdt); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug); } if ( logop ) { DEV verify( mods->size() ); BSONObj pattern = patternOrig; if ( mss->haveArrayDepMod() ) { BSONObjBuilder patternBuilder; patternBuilder.appendElements( pattern ); mss->appendSizeSpecForArrayDepMods( patternBuilder ); pattern = patternBuilder.obj(); } if( mss->needOpLogRewrite() ) { DEBUGUPDATE( "\t rewrite update: " << mss->getOpLogRewrite() ); logOp("u", ns, mss->getOpLogRewrite() , &pattern, 0, fromMigrate ); } else { logOp("u", ns, updateobj, &pattern, 0, fromMigrate ); } } return UpdateResult( 1 , 1 , 1 , BSONObj() ); } // end $operator update // regular update BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); verify(nsdt); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug ); if ( logop ) { logOp("u", ns, updateobj, &patternOrig, 0, fromMigrate ); } return UpdateResult( 1 , 0 , 1 , BSONObj() ); }