static void _buildHotIndex(const char *ns, Message &m, const vector<BSONObj> objs) { uassert(16905, "Can only build one index at a time.", objs.size() == 1); DEV { // System.indexes cannot be sharded. Client::ShardedOperationScope sc; verify(!sc.handlePossibleShardedMessage(m, 0)); } LOCK_REASON(lockReason, "building hot index"); scoped_ptr<Lock::DBWrite> lk(new Lock::DBWrite(ns, lockReason)); uassert(16902, "not master", isMasterNs(ns)); const BSONObj &info = objs[0]; const StringData &coll = info["ns"].Stringdata(); scoped_ptr<Client::Transaction> transaction(new Client::Transaction(DB_SERIALIZABLE)); shared_ptr<Collection::Indexer> indexer; // Prepare the index build. Performs index validation and marks // the collection as having an index build in progress. { Client::Context ctx(ns); Collection *cl = getOrCreateCollection(coll, true); if (cl->findIndexByKeyPattern(info["key"].Obj()) >= 0) { // No error or action if the index already exists. We need to commit // the transaction in case this is an ensure index on the _id field // and the ns was created by getOrCreateCollection() transaction->commit(); return; } _insertObjects(ns, objs, false, 0, true); indexer = cl->newIndexer(info, true); indexer->prepare(); } // Perform the index build { Lock::DBWrite::Downgrade dg(lk); uassert(16906, "not master: after indexer setup but before build", isMasterNs(ns)); Client::Context ctx(ns); indexer->build(); } uassert(16907, "not master: after indexer build but before commit", isMasterNs(ns)); // Commit the index build { Client::Context ctx(ns); indexer->commit(); } transaction->commit(); }
void receivedUpdate(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); op.debug().ns = ns; int flags = d.pullInt(); BSONObj query = d.nextJsObj(); assert( d.moreJSObjs() ); assert( query.objsize() < m.header()->dataLen() ); BSONObj toupdate = d.nextJsObj(); uassert( 10055 , "update object too large", toupdate.objsize() <= BSONObjMaxUserSize); assert( toupdate.objsize() < m.header()->dataLen() ); assert( query.objsize() + toupdate.objsize() < m.header()->dataLen() ); bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; bool broadcast = flags & UpdateOption_Broadcast; op.debug().query = query; op.setQuery(query); writelock lk; // writelock is used to synchronize stepdowns w/ writes uassert( 10054 , "not master", isMasterNs( ns ) ); // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx( ns ); UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, true, op.debug() ); lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror }
void receivedDelete(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); assert(*ns); uassert( 10056 , "not master", isMasterNs( ns ) ); op.debug().str << ns << ' '; int flags = d.pullInt(); bool justOne = flags & RemoveOption_JustOne; bool broadcast = flags & RemoveOption_Broadcast; assert( d.moreJSObjs() ); BSONObj pattern = d.nextJsObj(); { string s = pattern.toString(); op.debug().str << " query: " << s; op.setQuery(pattern); } writelock lk(ns); // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit if ( ! broadcast & handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx(ns); long long n = deleteObjects(ns, pattern, justOne, true); lastError.getSafe()->recordDelete( n ); }
void receivedUpdate(Message& m, stringstream& ss) { DbMessage d(m); const char *ns = d.getns(); assert(*ns); uassert( "not master", isMasterNs( ns ) ); setClient(ns); Client& client = cc(); client.top.setWrite(); ss << ns << ' '; int flags = d.pullInt(); BSONObj query = d.nextJsObj(); assert( d.moreJSObjs() ); assert( query.objsize() < m.data->dataLen() ); BSONObj toupdate = d.nextJsObj(); uassert("update object too large", toupdate.objsize() <= MaxBSONObjectSize); assert( toupdate.objsize() < m.data->dataLen() ); assert( query.objsize() + toupdate.objsize() < m.data->dataLen() ); bool upsert = flags & Option_Upsert; bool multi = flags & Option_Multi; { string s = query.toString(); /* todo: we shouldn't do all this ss stuff when we don't need it, it will slow us down. */ ss << " query: " << s; CurOp& currentOp = *client.curop(); strncpy(currentOp.query, s.c_str(), sizeof(currentOp.query)-2); } UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, ss, true); recordUpdate( res.existing , res.num ); // for getlasterror }
static bool checkIsMasterForCollection(const NamespaceString& ns, WriteErrorDetail** error) { if (!isMasterNs(ns.ns().c_str())) { WriteErrorDetail* errorDetail = *error = new WriteErrorDetail; errorDetail->setErrCode(ErrorCodes::NotMaster); errorDetail->setErrMessage(std::string(mongoutils::str::stream() << "Not primary while writing to " << ns.ns())); return false; } return true; }
static bool checkIsMasterForCollection(const std::string& ns, WriteOpResult* result) { if (!isMasterNs(ns.c_str())) { WriteErrorDetail* errorDetail = new WriteErrorDetail; result->setError(errorDetail); errorDetail->setErrCode(ErrorCodes::NotMaster); errorDetail->setErrMessage("Not primary while writing to " + ns); return false; } return true; }
void receivedInsert(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); op.debug().ns = ns; // Auth checking for index writes happens later. if (NamespaceString(ns).coll != "system.indexes") { Status status = cc().getAuthorizationManager()->checkAuthForInsert(ns); uassert(16544, status.reason(), status.isOK()); } if( !d.moreJSObjs() ) { // strange. should we complain? return; } BSONObj first = d.nextJsObj(); vector<BSONObj> multi; while (d.moreJSObjs()){ if (multi.empty()) // first pass multi.push_back(first); multi.push_back( d.nextJsObj() ); } PageFaultRetryableSection s; while ( true ) { try { Lock::DBWrite lk(ns); // CONCURRENCY TODO: is being read locked in big log sufficient here? // writelock is used to synchronize stepdowns w/ writes uassert( 10058 , "not master", isMasterNs(ns) ); if ( handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx(ns); if( !multi.empty() ) { const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; insertMulti(keepGoing, ns, multi, op); return; } checkAndInsert(ns, first); globalOpCounters.incInsertInWriteLock(1); op.debug().ninserted = 1; return; } catch ( PageFaultException& e ) { e.touch(); } } }
static void lockedReceivedUpdate(const char *ns, Message &m, CurOp &op, const BSONObj &updateobj, const BSONObj &query, const bool upsert, const bool multi) { // void ReplSetImpl::relinquish() uses big write lock so // this is thus synchronized given our lock above. uassert(10054, "not master", isMasterNs(ns)); Client::Context ctx(ns); scoped_ptr<Client::AlternateTransactionStack> altStack(opNeedsAltTxn(ns) ? new Client::AlternateTransactionStack : NULL); Client::Transaction transaction(DB_SERIALIZABLE); UpdateResult res = updateObjects(ns, updateobj, query, upsert, multi, true); transaction.commit(); lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror }
static void lockedReceivedInsert(const char *ns, Message &m, const vector<BSONObj> &objs, CurOp &op, const bool keepGoing) { // writelock is used to synchronize stepdowns w/ writes uassert(10058, "not master", isMasterNs(ns)); Client::Context ctx(ns); scoped_ptr<Client::AlternateTransactionStack> altStack(opNeedsAltTxn(ns) ? new Client::AlternateTransactionStack : NULL); Client::Transaction transaction(DB_SERIALIZABLE); insertObjects(ns, objs, keepGoing, 0, true); transaction.commit(); size_t n = objs.size(); globalOpCounters.gotInsert(n); op.debug().ninserted = n; }
/** we allow queries to SimpleSlave's */ void replVerifyReadsOk(const std::string& ns, const LiteParsedQuery* pq) { if( replSet ) { // todo: speed up the secondary case. as written here there are 2 mutex entries, it // can b 1. if (isMasterNs(ns.c_str())) return; if ( cc().isGod() ) return; uassert(NotMasterNoSlaveOkCode, "not master and slaveOk=false", !pq || pq->hasOption(QueryOption_SlaveOk) || pq->hasReadPref()); uassert(NotMasterOrSecondaryCode, "not master or secondary; cannot currently read from this replSet member", theReplSet && theReplSet->isSecondary() ); } else { // master/slave uassert(NotMaster, "not master", isMasterNs(ns.c_str()) || pq == NULL || pq->hasOption(QueryOption_SlaveOk) || replSettings.slave == SimpleSlave ); } }
void receivedInsert(Message& m, stringstream& ss) { DbMessage d(m); const char *ns = d.getns(); assert(*ns); uassert( "not master", isMasterNs( ns ) ); setClient(ns); cc().top.setWrite(); ss << ns; while ( d.moreJSObjs() ) { BSONObj js = d.nextJsObj(); uassert("object to insert too large", js.objsize() <= MaxBSONObjectSize); theDataFileMgr.insert(ns, js, false); logOp("i", ns, js); } }
void receivedInsert(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); op.debug().ns = ns; if( !d.moreJSObjs() ) { // strange. should we complain? return; } BSONObj first = d.nextJsObj(); vector<BSONObj> multi; while (d.moreJSObjs()){ if (multi.empty()) // first pass multi.push_back(first); multi.push_back( d.nextJsObj() ); } PageFaultRetryableSection s; while ( true ) { try { Lock::DBWrite lk(ns); // CONCURRENCY TODO: is being read locked in big log sufficient here? // writelock is used to synchronize stepdowns w/ writes uassert( 10058 , "not master", isMasterNs(ns) ); if ( handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx(ns); if( !multi.empty() ) { const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; insertMulti(keepGoing, ns, multi); return; } checkAndInsert(ns, first); globalOpCounters.incInsertInWriteLock(1); return; } catch ( PageFaultException& e ) { e.touch(); } } }
void receivedUpdate(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); op.debug().ns = ns; int flags = d.pullInt(); BSONObj query = d.nextJsObj(); verify( d.moreJSObjs() ); verify( query.objsize() < m.header()->dataLen() ); BSONObj toupdate = d.nextJsObj(); uassert( 10055 , "update object too large", toupdate.objsize() <= BSONObjMaxUserSize); verify( toupdate.objsize() < m.header()->dataLen() ); verify( query.objsize() + toupdate.objsize() < m.header()->dataLen() ); bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; bool broadcast = flags & UpdateOption_Broadcast; Status status = cc().getAuthorizationManager()->checkAuthForUpdate(ns, upsert); uassert(16538, status.reason(), status.isOK()); op.debug().query = query; op.setQuery(query); PageFaultRetryableSection s; while ( 1 ) { try { Lock::DBWrite lk(ns); // void ReplSetImpl::relinquish() uses big write lock so // this is thus synchronized given our lock above. uassert( 10054 , "not master", isMasterNs( ns ) ); // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx( ns ); UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, true, op.debug() ); lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror break; } catch ( PageFaultException& e ) { e.touch(); } } }
void receivedDelete(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); Status status = cc().getAuthorizationManager()->checkAuthForDelete(ns); uassert(16542, status.reason(), status.isOK()); op.debug().ns = ns; int flags = d.pullInt(); verify(d.moreJSObjs()); BSONObj pattern = d.nextJsObj(); op.debug().query = pattern; op.setQuery(pattern); const bool justOne = flags & RemoveOption_JustOne; const bool broadcast = flags & RemoveOption_Broadcast; OpSettings settings; settings.setQueryCursorMode(WRITE_LOCK_CURSOR); settings.setJustOne(justOne); cc().setOpSettings(settings); Client::ShardedOperationScope sc; if (!broadcast && sc.handlePossibleShardedMessage(m, 0)) { return; } LOCK_REASON(lockReason, "delete"); Lock::DBRead lk(ns, lockReason); // writelock is used to synchronize stepdowns w/ writes uassert(10056, "not master", isMasterNs(ns)); Client::Context ctx(ns); long long n; scoped_ptr<Client::AlternateTransactionStack> altStack(opNeedsAltTxn(ns) ? new Client::AlternateTransactionStack : NULL); Client::Transaction transaction(DB_SERIALIZABLE); n = deleteObjects(ns, pattern, justOne, true); transaction.commit(); lastError.getSafe()->recordDelete( n ); op.debug().ndeleted = n; }
void receivedDelete(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); Status status = cc().getAuthorizationManager()->checkAuthForDelete(ns); uassert(16542, status.reason(), status.isOK()); op.debug().ns = ns; int flags = d.pullInt(); bool justOne = flags & RemoveOption_JustOne; bool broadcast = flags & RemoveOption_Broadcast; verify( d.moreJSObjs() ); BSONObj pattern = d.nextJsObj(); op.debug().query = pattern; op.setQuery(pattern); PageFaultRetryableSection s; while ( 1 ) { try { Lock::DBWrite lk(ns); // writelock is used to synchronize stepdowns w/ writes uassert( 10056 , "not master", isMasterNs( ns ) ); // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx(ns); long long n = deleteObjects(ns, pattern, justOne, true); lastError.getSafe()->recordDelete( n ); op.debug().ndeleted = n; break; } catch ( PageFaultException& e ) { LOG(2) << "recordDelete got a PageFaultException" << endl; e.touch(); } } }
void receivedUpdate(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); assert(*ns); op.debug().str << ns << ' '; int flags = d.pullInt(); BSONObj query = d.nextJsObj(); assert( d.moreJSObjs() ); assert( query.objsize() < m.header()->dataLen() ); BSONObj toupdate = d.nextJsObj(); uassert( 10055 , "update object too large", toupdate.objsize() <= BSONObjMaxUserSize); assert( toupdate.objsize() < m.header()->dataLen() ); assert( query.objsize() + toupdate.objsize() < m.header()->dataLen() ); bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; bool broadcast = flags & UpdateOption_Broadcast; { string s = query.toString(); /* todo: we shouldn't do all this ss stuff when we don't need it, it will slow us down. instead, let's just story the query BSON in the debug object, and it can toString() lazily */ op.debug().str << " query: " << s; op.setQuery(query); } writelock lk; // writelock is used to synchronize stepdowns w/ writes uassert( 10054 , "not master", isMasterNs( ns ) ); // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx( ns ); UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, true, op.debug() ); lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror }
void receivedDelete(Message& m, stringstream &ss) { DbMessage d(m); const char *ns = d.getns(); assert(*ns); uassert( "not master", isMasterNs( ns ) ); setClient(ns); Client& client = cc(); client.top.setWrite(); int flags = d.pullInt(); bool justOne = flags & 1; assert( d.moreJSObjs() ); BSONObj pattern = d.nextJsObj(); { string s = pattern.toString(); ss << " query: " << s; CurOp& currentOp = *client.curop(); strncpy(currentOp.query, s.c_str(), sizeof(currentOp.query)-2); } int n = deleteObjects(ns, pattern, justOne, true); recordDelete( n ); }
// throws DBException void buildAnIndex( OperationContext* txn, Collection* collection, IndexCatalogEntry* btreeState, bool mayInterrupt ) { string ns = collection->ns().ns(); // our copy const IndexDescriptor* idx = btreeState->descriptor(); const BSONObj& idxInfo = idx->infoObj(); MONGO_TLOG(0) << "build index on: " << ns << " properties: " << idx->toString() << endl; audit::logCreateIndex( currentClient.get(), &idxInfo, idx->indexName(), ns ); Timer t; verify( Lock::isWriteLocked( ns ) ); // this is so that people know there are more keys to look at when doing // things like in place updates, etc... collection->infoCache()->addedIndex(); if ( collection->numRecords() == 0 ) { Status status = btreeState->accessMethod()->initializeAsEmpty(txn); massert( 17343, str::stream() << "IndexAccessMethod::initializeAsEmpty failed" << status.toString(), status.isOK() ); MONGO_TLOG(0) << "\t added index to empty collection"; return; } scoped_ptr<BackgroundOperation> backgroundOperation; bool doInBackground = false; if ( idxInfo["background"].trueValue() && !inDBRepair ) { doInBackground = true; backgroundOperation.reset( new BackgroundOperation(ns) ); uassert( 13130, "can't start bg index b/c in recursive lock (db.eval?)", !Lock::nested() ); log() << "\t building index in background"; } Status status = btreeState->accessMethod()->initializeAsEmpty(txn); massert( 17342, str::stream() << "IndexAccessMethod::initializeAsEmpty failed" << status.toString(), status.isOK() ); IndexAccessMethod* bulk = doInBackground ? NULL : btreeState->accessMethod()->initiateBulk(txn, collection->numRecords()); scoped_ptr<IndexAccessMethod> bulkHolder(bulk); IndexAccessMethod* iam = bulk ? bulk : btreeState->accessMethod(); if ( bulk ) log() << "\t building index using bulk method"; unsigned long long n = addExistingToIndex( txn, collection, btreeState->descriptor(), iam, doInBackground ); if ( bulk ) { LOG(1) << "\t bulk commit starting"; std::set<DiskLoc> dupsToDrop; Status status = btreeState->accessMethod()->commitBulk( bulk, mayInterrupt, &dupsToDrop ); // Code above us expects a uassert in case of dupkey errors. if (ErrorCodes::DuplicateKey == status.code()) { uassertStatusOK(status); } // Any other errors are probably bad and deserve a massert. massert( 17398, str::stream() << "commitBulk failed: " << status.toString(), status.isOK() ); if ( dupsToDrop.size() ) log() << "\t bulk dropping " << dupsToDrop.size() << " dups"; for( set<DiskLoc>::const_iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); ++i ) { BSONObj toDelete; collection->deleteDocument( txn, *i, false /* cappedOk */, true /* noWarn */, &toDelete ); if (isMasterNs(ns.c_str())) { logOp( txn, "d", ns.c_str(), toDelete ); } txn->recoveryUnit()->commitIfNeeded(); RARELY if ( mayInterrupt ) { txn->checkForInterrupt(); } } } verify( !btreeState->head().isNull() ); MONGO_TLOG(0) << "build index done. scanned " << n << " total records. " << t.millis() / 1000.0 << " secs" << endl; // this one is so people know that the index is finished collection->infoCache()->addedIndex(); }
long long DeleteExecutor::execute() { uassertStatusOK(prepare()); uassert(17417, mongoutils::str::stream() << "DeleteExecutor::prepare() failed to parse query " << _request->getQuery(), _isQueryParsed); const bool logop = _request->shouldCallLogOp(); const NamespaceString& ns(_request->getNamespaceString()); if (!_request->isGod()) { if (ns.isSystem()) { uassert(12050, "cannot delete from system namespace", legalClientSystemNS(ns.ns(), true)); } if (ns.ns().find('$') != string::npos) { log() << "cannot delete from collection with reserved $ in name: " << ns << endl; uasserted( 10100, "cannot delete from collection with reserved $ in name" ); } } massert(17418, mongoutils::str::stream() << "dbname = " << currentClient.get()->database()->name() << "; ns = " << ns.ns(), currentClient.get()->database()->name() == nsToDatabaseSubstring(ns.ns())); Collection* collection = currentClient.get()->database()->getCollection(ns.ns()); if (NULL == collection) { return 0; } uassert(10101, str::stream() << "cannot remove from a capped collection: " << ns.ns(), !collection->isCapped()); uassert(ErrorCodes::NotMaster, str::stream() << "Not primary while removing from " << ns.ns(), !logop || isMasterNs(ns.ns().c_str())); long long nDeleted = 0; const bool canYield = !_request->isGod() && ( _canonicalQuery.get() ? !QueryPlannerCommon::hasNode(_canonicalQuery->root(), MatchExpression::ATOMIC) : LiteParsedQuery::isQueryIsolated(_request->getQuery())); Runner* rawRunner; if (_canonicalQuery.get()) { uassertStatusOK(getRunner(collection, _canonicalQuery.release(), &rawRunner)); } else { CanonicalQuery* ignored; uassertStatusOK(getRunner(collection, ns.ns(), _request->getQuery(), &rawRunner, &ignored)); } auto_ptr<Runner> runner(rawRunner); auto_ptr<ScopedRunnerRegistration> safety; if (canYield) { safety.reset(new ScopedRunnerRegistration(runner.get())); runner->setYieldPolicy(Runner::YIELD_AUTO); } DiskLoc rloc; Runner::RunnerState state; CurOp* curOp = cc().curop(); int oldYieldCount = curOp->numYields(); while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &rloc))) { if (oldYieldCount != curOp->numYields()) { uassert(ErrorCodes::NotMaster, str::stream() << "No longer primary while removing from " << ns.ns(), !logop || isMasterNs(ns.ns().c_str())); oldYieldCount = curOp->numYields(); } BSONObj toDelete; // TODO: do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? runner->saveState(); collection->deleteDocument(rloc, false, false, logop ? &toDelete : NULL ); runner->restoreState(); nDeleted++; if (logop) { if ( toDelete.isEmpty() ) { problem() << "deleted object without id, not logging" << endl; } else { bool replJustOne = true; logOp("d", ns.ns().c_str(), toDelete, 0, &replJustOne); } } if (!_request->isMulti()) { break; } if (!_request->isGod()) { getDur().commitIfNeeded(); } if (debug && _request->isGod() && nDeleted == 100) { log() << "warning high number of deletes with god=true " << " which could use significant memory b/c we don't commit journal"; } } return nDeleted; }
UpdateResult update( OperationContext* txn, Database* db, const UpdateRequest& request, OpDebug* opDebug, UpdateDriver* driver, CanonicalQuery* cq) { LOG(3) << "processing update : " << request; std::auto_ptr<CanonicalQuery> cqHolder(cq); const NamespaceString& nsString = request.getNamespaceString(); UpdateLifecycle* lifecycle = request.getLifecycle(); Collection* collection = db->getCollection(nsString.ns()); validateUpdate(nsString.ns().c_str(), request.getUpdates(), request.getQuery()); // TODO: This seems a bit circuitious. opDebug->updateobj = request.getUpdates(); if (lifecycle) { lifecycle->setCollection(collection); driver->refreshIndexKeys(lifecycle->getIndexKeys()); } Runner* rawRunner; Status status = cq ? getRunner(collection, cqHolder.release(), &rawRunner) : getRunner(collection, nsString.ns(), request.getQuery(), &rawRunner, &cq); uassert(17243, "could not get runner " + request.getQuery().toString() + "; " + causedBy(status), status.isOK()); // Create the runner and setup all deps. auto_ptr<Runner> runner(rawRunner); // Register Runner with ClientCursor const ScopedRunnerRegistration safety(runner.get()); // // We'll start assuming we have one or more documents for this update. (Otherwise, // we'll fall-back to insert case (if upsert is true).) // // We are an update until we fall into the insert case below. driver->setContext(ModifierInterface::ExecInfo::UPDATE_CONTEXT); int numMatched = 0; // If the update was in-place, we may see it again. This only matters if we're doing // a multi-update; if we're not doing a multi-update we stop after one update and we // won't see any more docs. // // For example: If we're scanning an index {x:1} and performing {$inc:{x:5}}, we'll keep // moving the document forward and it will continue to reappear in our index scan. // Unless the index is multikey, the underlying query machinery won't de-dup. // // If the update wasn't in-place we may see it again. Our query may return the new // document and we wouldn't want to update that. // // So, no matter what, we keep track of where the doc wound up. typedef unordered_set<DiskLoc, DiskLoc::Hasher> DiskLocSet; const scoped_ptr<DiskLocSet> updatedLocs(request.isMulti() ? new DiskLocSet : NULL); // Reset these counters on each call. We might re-enter this function to retry this // update if we throw a page fault exception below, and we rely on these counters // reflecting only the actions taken locally. In particlar, we must have the no-op // counter reset so that we can meaningfully comapre it with numMatched above. opDebug->nscanned = 0; opDebug->nscannedObjects = 0; opDebug->nModified = 0; // Get the cached document from the update driver. mutablebson::Document& doc = driver->getDocument(); mutablebson::DamageVector damages; // Used during iteration of docs BSONObj oldObj; // Get first doc, and location Runner::RunnerState state = Runner::RUNNER_ADVANCED; uassert(ErrorCodes::NotMaster, mongoutils::str::stream() << "Not primary while updating " << nsString.ns(), !request.shouldCallLogOp() || isMasterNs(nsString.ns().c_str())); while (true) { // Get next doc, and location DiskLoc loc; state = runner->getNext(&oldObj, &loc); if (state != Runner::RUNNER_ADVANCED) { if (state == Runner::RUNNER_EOF) { // We have reached the logical end of the loop, so do yielding recovery break; } else { uassertStatusOK(Status(ErrorCodes::InternalError, str::stream() << " Update query failed -- " << Runner::statestr(state))); } } // We fill this with the new locs of moved doc so we don't double-update. if (updatedLocs && updatedLocs->count(loc) > 0) { continue; } // We count how many documents we scanned even though we may skip those that are // deemed duplicated. The final 'numMatched' and 'nscanned' numbers may differ for // that reason. // TODO: Do we want to pull this out of the underlying query plan? opDebug->nscanned++; // Found a matching document opDebug->nscannedObjects++; numMatched++; // Ask the driver to apply the mods. It may be that the driver can apply those "in // place", that is, some values of the old document just get adjusted without any // change to the binary layout on the bson layer. It may be that a whole new // document is needed to accomodate the new bson layout of the resulting document. doc.reset(oldObj, mutablebson::Document::kInPlaceEnabled); BSONObj logObj; FieldRefSet updatedFields; Status status = Status::OK(); if (!driver->needMatchDetails()) { // If we don't need match details, avoid doing the rematch status = driver->update(StringData(), &doc, &logObj, &updatedFields); } else { // If there was a matched field, obtain it. MatchDetails matchDetails; matchDetails.requestElemMatchKey(); dassert(cq); verify(cq->root()->matchesBSON(oldObj, &matchDetails)); string matchedField; if (matchDetails.hasElemMatchKey()) matchedField = matchDetails.elemMatchKey(); // TODO: Right now, each mod checks in 'prepare' that if it needs positional // data, that a non-empty StringData() was provided. In principle, we could do // that check here in an else clause to the above conditional and remove the // checks from the mods. status = driver->update(matchedField, &doc, &logObj, &updatedFields); } if (!status.isOK()) { uasserted(16837, status.reason()); } // Ensure _id exists and is first uassertStatusOK(ensureIdAndFirst(doc)); // If the driver applied the mods in place, we can ask the mutable for what // changed. We call those changes "damages". :) We use the damages to inform the // journal what was changed, and then apply them to the original document // ourselves. If, however, the driver applied the mods out of place, we ask it to // generate a new, modified document for us. In that case, the file manager will // take care of the journaling details for us. // // This code flow is admittedly odd. But, right now, journaling is baked in the file // manager. And if we aren't using the file manager, we have to do jounaling // ourselves. bool docWasModified = false; BSONObj newObj; const char* source = NULL; bool inPlace = doc.getInPlaceUpdates(&damages, &source); // If something changed in the document, verify that no immutable fields were changed // and data is valid for storage. if ((!inPlace || !damages.empty()) ) { if (!(request.isFromReplication() || request.isFromMigration())) { const std::vector<FieldRef*>* immutableFields = NULL; if (lifecycle) immutableFields = lifecycle->getImmutableFields(); uassertStatusOK(validate(oldObj, updatedFields, doc, immutableFields, driver->modOptions()) ); } } // Save state before making changes runner->saveState(); if (inPlace && !driver->modsAffectIndices()) { // If a set of modifiers were all no-ops, we are still 'in place', but there is // no work to do, in which case we want to consider the object unchanged. if (!damages.empty() ) { collection->updateDocumentWithDamages( txn, loc, source, damages ); docWasModified = true; opDebug->fastmod = true; } newObj = oldObj; } else { // The updates were not in place. Apply them through the file manager. newObj = doc.getObject(); uassert(17419, str::stream() << "Resulting document after update is larger than " << BSONObjMaxUserSize, newObj.objsize() <= BSONObjMaxUserSize); StatusWith<DiskLoc> res = collection->updateDocument(txn, loc, newObj, true, opDebug); uassertStatusOK(res.getStatus()); DiskLoc newLoc = res.getValue(); docWasModified = true; // If the document moved, we might see it again in a collection scan (maybe it's // a document after our current document). // // If the document is indexed and the mod changes an indexed value, we might see it // again. For an example, see the comment above near declaration of updatedLocs. if (updatedLocs && (newLoc != loc || driver->modsAffectIndices())) { updatedLocs->insert(newLoc); } } // Restore state after modification uassert(17278, "Update could not restore runner state after updating a document.", runner->restoreState()); // Call logOp if requested. if (request.shouldCallLogOp() && !logObj.isEmpty()) { BSONObj idQuery = driver->makeOplogEntryQuery(newObj, request.isMulti()); logOp(txn, "u", nsString.ns().c_str(), logObj , &idQuery, NULL, request.isFromMigration()); } // Only record doc modifications if they wrote (exclude no-ops) if (docWasModified) opDebug->nModified++; if (!request.isMulti()) { break; } // Opportunity for journaling to write during the update. txn->recoveryUnit()->commitIfNeeded(); } // TODO: Can this be simplified? if ((numMatched > 0) || (numMatched == 0 && !request.isUpsert()) ) { opDebug->nMatched = numMatched; return UpdateResult(numMatched > 0 /* updated existing object(s) */, !driver->isDocReplacement() /* $mod or obj replacement */, opDebug->nModified /* number of modified docs, no no-ops */, numMatched /* # of docs matched/updated, even no-ops */, BSONObj()); } // // We haven't found any existing document so an insert is done // (upsert is true). // opDebug->upsert = true; // Since this is an insert (no docs found and upsert:true), we will be logging it // as an insert in the oplog. We don't need the driver's help to build the // oplog record, then. We also set the context of the update driver to the INSERT_CONTEXT. // Some mods may only work in that context (e.g. $setOnInsert). driver->setLogOp(false); driver->setContext(ModifierInterface::ExecInfo::INSERT_CONTEXT); // Reset the document we will be writing to doc.reset(); // This remains the empty object in the case of an object replacement, but in the case // of an upsert where we are creating a base object from the query and applying mods, // we capture the query as the original so that we can detect immutable field mutations. BSONObj original = BSONObj(); // Calling createFromQuery will populate the 'doc' with fields from the query which // creates the base of the update for the inserterd doc (because upsert was true) if (cq) { uassertStatusOK(driver->populateDocumentWithQueryFields(cq, doc)); // Validate the base doc, as taken from the query -- no fields means validate all. FieldRefSet noFields; uassertStatusOK(validate(BSONObj(), noFields, doc, NULL, driver->modOptions())); if (!driver->isDocReplacement()) { opDebug->fastmodinsert = true; // We need all the fields from the query to compare against for validation below. original = doc.getObject(); } else { original = request.getQuery(); } } else { fassert(17354, CanonicalQuery::isSimpleIdQuery(request.getQuery())); BSONElement idElt = request.getQuery()["_id"]; original = idElt.wrap(); fassert(17352, doc.root().appendElement(idElt)); } // Apply the update modifications and then log the update as an insert manually. FieldRefSet updatedFields; status = driver->update(StringData(), &doc, NULL, &updatedFields); if (!status.isOK()) { uasserted(16836, status.reason()); } // Ensure _id exists and is first uassertStatusOK(ensureIdAndFirst(doc)); // Validate that the object replacement or modifiers resulted in a document // that contains all the immutable keys and can be stored. if (!(request.isFromReplication() || request.isFromMigration())){ const std::vector<FieldRef*>* immutableFields = NULL; if (lifecycle) immutableFields = lifecycle->getImmutableFields(); // This will only validate the modified fields if not a replacement. uassertStatusOK(validate(original, updatedFields, doc, immutableFields, driver->modOptions()) ); } // Only create the collection if the doc will be inserted. if (!collection) { collection = db->getCollection(request.getNamespaceString().ns()); if (!collection) { collection = db->createCollection(txn, request.getNamespaceString().ns()); } } // Insert the doc BSONObj newObj = doc.getObject(); uassert(17420, str::stream() << "Document to upsert is larger than " << BSONObjMaxUserSize, newObj.objsize() <= BSONObjMaxUserSize); StatusWith<DiskLoc> newLoc = collection->insertDocument(txn, newObj, !request.isGod() /*enforceQuota*/); uassertStatusOK(newLoc.getStatus()); if (request.shouldCallLogOp()) { logOp(txn, "i", nsString.ns().c_str(), newObj, NULL, NULL, request.isFromMigration()); } opDebug->nMatched = 1; return UpdateResult(false /* updated a non existing document */, !driver->isDocReplacement() /* $mod or obj replacement? */, 1 /* docs written*/, 1 /* count of updated documents */, newObj /* object that was upserted */ ); }
void doTTLForDB( const string& dbName ) { //check isMaster before becoming god bool isMaster = isMasterNs( dbName.c_str() ); Client::GodScope god; vector<BSONObj> indexes; { auto_ptr<DBClientCursor> cursor = db.query( dbName + ".system.indexes" , BSON( secondsExpireField << BSON( "$exists" << true ) ) , 0 , /* default nToReturn */ 0 , /* default nToSkip */ 0 , /* default fieldsToReturn */ QueryOption_SlaveOk ); /* perform on secondaries too */ if ( cursor.get() ) { while ( cursor->more() ) { indexes.push_back( cursor->next().getOwned() ); } } } for ( unsigned i=0; i<indexes.size(); i++ ) { BSONObj idx = indexes[i]; BSONObj key = idx["key"].Obj(); if ( key.nFields() != 1 ) { error() << "key for ttl index can only have 1 field" << endl; continue; } BSONObj query; { BSONObjBuilder b; b.appendDate( "$lt" , curTimeMillis64() - ( 1000 * idx[secondsExpireField].numberLong() ) ); query = BSON( key.firstElement().fieldName() << b.obj() ); } LOG(1) << "TTL: " << key << " \t " << query << endl; long long n = 0; { string ns = idx["ns"].String(); Client::WriteContext ctx( ns ); NamespaceDetails* nsd = nsdetails( ns ); if ( ! nsd ) { // collection was dropped continue; } if ( nsd->setUserFlag( NamespaceDetails::Flag_UsePowerOf2Sizes ) ) { nsd->syncUserFlags( ns ); } // only do deletes if on master if ( ! isMaster ) { continue; } n = deleteObjects( ns.c_str() , query , false , true ); ttlDeletedDocuments.increment( n ); } LOG(1) << "\tTTL deleted: " << n << endl; } }
/** * @ return true if not in sharded mode or if version for this client is ok */ bool shardVersionOk( const string& ns , string& errmsg, ConfigVersion& received, ConfigVersion& wanted ) { if ( ! shardingState.enabled() ) return true; if ( ! isMasterNs( ns.c_str() ) ) { // right now connections to secondaries aren't versioned at all return true; } ShardedConnectionInfo* info = ShardedConnectionInfo::get( false ); if ( ! info ) { // this means the client has nothing sharded // so this allows direct connections to do whatever they want // which i think is the correct behavior return true; } if ( info->inForceVersionOkMode() ) { return true; } // TODO // all collections at some point, be sharded or not, will have a version (and a ShardChunkManager) // for now, we remove the sharding state of dropped collection // so delayed request may come in. This has to be fixed. ConfigVersion clientVersion = info->getVersion(ns); ConfigVersion version; if ( ! shardingState.hasVersion( ns , version ) && ! clientVersion.isSet() ) { return true; } // The versions we're going to compare, saved for future use received = clientVersion; wanted = version; if ( ! version.isSet() && clientVersion.isSet() ) { stringstream ss; ss << "collection was dropped or this shard no longer valid version"; errmsg = ss.str(); return false; } if ( clientVersion >= version ) return true; if ( ! clientVersion.isSet() ) { stringstream ss; ss << "client in sharded mode, but doesn't have version set for this collection"; errmsg = ss.str(); return false; } if ( version.majorVersion() == clientVersion.majorVersion() ) { // this means there was just a split // since on a split w/o a migrate this server is ok // going to accept return true; } stringstream ss; ss << "your version is too old"; errmsg = ss.str(); return false; }
void receivedInsert(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); op.debug().ns = ns; bool isIndexWrite = NamespaceString(ns).coll == "system.indexes"; // Auth checking for index writes happens further down in this function. if (!isIndexWrite) { Status status = cc().getAuthorizationManager()->checkAuthForInsert(ns); uassert(16544, status.reason(), status.isOK()); } if( !d.moreJSObjs() ) { // strange. should we complain? return; } vector<BSONObj> multi; while (d.moreJSObjs()){ BSONObj obj = d.nextJsObj(); multi.push_back(obj); if (isIndexWrite) { string indexNS = obj.getStringField("ns"); uassert(16548, mongoutils::str::stream() << "not authorized to create index on " << indexNS, cc().getAuthorizationManager()->checkAuthorization( indexNS, ActionType::ensureIndex)); } } PageFaultRetryableSection s; while ( true ) { try { Lock::DBWrite lk(ns); // CONCURRENCY TODO: is being read locked in big log sufficient here? // writelock is used to synchronize stepdowns w/ writes uassert( 10058 , "not master", isMasterNs(ns) ); if ( handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx(ns); if (multi.size() > 1) { const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; insertMulti(keepGoing, ns, multi, op); } else { checkAndInsert(ns, multi[0]); globalOpCounters.incInsertInWriteLock(1); op.debug().ninserted = 1; } return; } catch ( PageFaultException& e ) { e.touch(); } } }
/** * @ return true if not in sharded mode or if version for this client is ok */ bool shardVersionOk( const string& ns , string& errmsg, ChunkVersion& received, ChunkVersion& wanted ) { if ( ! shardingState.enabled() ) return true; if ( ! isMasterNs( ns.c_str() ) ) { // right now connections to secondaries aren't versioned at all return true; } ShardedConnectionInfo* info = ShardedConnectionInfo::get( false ); if ( ! info ) { // this means the client has nothing sharded // so this allows direct connections to do whatever they want // which i think is the correct behavior return true; } if ( info->inForceVersionOkMode() ) { return true; } // TODO : all collections at some point, be sharded or not, will have a version // (and a CollectionMetadata) received = info->getVersion( ns ); wanted = shardingState.getVersion( ns ); if( received.isWriteCompatibleWith( wanted ) ) return true; // // Figure out exactly why not compatible, send appropriate error message // The versions themselves are returned in the error, so not needed in messages here // // Check epoch first, to send more meaningful message, since other parameters probably // won't match either if( ! wanted.hasCompatibleEpoch( received ) ){ errmsg = str::stream() << "version epoch mismatch detected for " << ns << ", " << "the collection may have been dropped and recreated"; return false; } if( ! wanted.isSet() && received.isSet() ){ errmsg = str::stream() << "this shard no longer contains chunks for " << ns << ", " << "the collection may have been dropped"; return false; } if( wanted.isSet() && ! received.isSet() ){ errmsg = str::stream() << "this shard contains versioned chunks for " << ns << ", " << "but no version set in request"; return false; } if( wanted.majorVersion() != received.majorVersion() ){ // // Could be > or < - wanted is > if this is the source of a migration, // wanted < if this is the target of a migration // errmsg = str::stream() << "version mismatch detected for " << ns << ", " << "stored major version " << wanted.majorVersion() << " does not match received " << received.majorVersion(); return false; } // Those are all the reasons the versions can mismatch verify( false ); return false; }