void DataFileMgr::deleteRecord(NamespaceDetails* d, const StringData& ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn, bool doLog ) { dassert( todelete == dl.rec() ); if ( d->isCapped() && !cappedOK ) { out() << "failing remove on a capped ns " << ns << endl; uassert( 10089 , "can't remove from a capped collection" , 0 ); return; } BSONObj obj = BSONObj::make( todelete ); Collection* collection = cc().database()->getCollection( ns ); verify( collection ); BSONObj toDelete; collection->deleteDocument( dl, cappedOK, noWarn, doLog ? &toDelete : NULL ); if ( ! toDelete.isEmpty() ) { // TODO: this is crazy, need to fix logOp const char* raw = ns.rawData(); if ( strlen(raw) == ns.size() ) { logOp( "d", raw, toDelete ); } else { string temp = ns.toString(); logOp( "d", temp.c_str(), toDelete ); } } }
/** * Perform a single insert into a collection. Requires the insert be preprocessed and the * collection already has been created. * * Might fault or error, otherwise populates the result. */ static void singleInsert( const BatchItemRef& insertItem, const BSONObj& normalInsert, Collection* collection, WriteOpResult* result ) { const string& insertNS = insertItem.getRequest()->getNS(); Lock::assertWriteLocked( insertNS ); try { // XXX - are we 100% sure that all !OK statuses do not write a document? StatusWith<DiskLoc> status = collection->insertDocument( normalInsert, true ); if ( !status.isOK() ) { result->error = toWriteError( status.getStatus() ); } else { logOp( "i", insertNS.c_str(), normalInsert ); getDur().commitIfNeeded(); result->stats.n = 1; } } catch ( const PageFaultException& ex ) { // TODO: An actual data structure that's not an exception for this result->fault = new PageFaultException( ex ); } catch ( const DBException& ex ) { result->error = toWriteError( ex.toStatus() ); } }
virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj.firstElement().type() != Array ){ errmsg = "ops has to be an array"; return false; } BSONObj ops = cmdObj.firstElement().Obj(); { // check input BSONObjIterator i( ops ); while ( i.more() ){ BSONElement e = i.next(); if ( e.type() == Object ) continue; errmsg = "op not an object: "; errmsg += e.fieldName(); return false; } } if ( cmdObj["preCondition"].type() == Array ){ BSONObjIterator i( cmdObj["preCondition"].Obj() ); while ( i.more() ){ BSONObj f = i.next().Obj(); BSONObj realres = db.findOne( f["ns"].String() , f["q"].Obj() ); Matcher m( f["res"].Obj() ); if ( ! m.matches( realres ) ){ result.append( "got" , realres ); result.append( "whatFailed" , f ); errmsg = "pre-condition failed"; return false; } } } // apply int num = 0; BSONObjIterator i( ops ); while ( i.more() ){ BSONElement e = i.next(); applyOperation_inlock( e.Obj() , false ); num++; } result.append( "applied" , num ); if ( ! fromRepl ){ // We want this applied atomically on slaves // so we re-wrap without the pre-condition for speed string tempNS = str::stream() << dbname << ".$cmd"; logOp( "c" , tempNS.c_str() , cmdObj.firstElement().wrap() ); } return true; }
/** * Perform a single index insert into a collection. Requires the index descriptor be * preprocessed and the collection already has been created. * * Might fault or error, otherwise populates the result. */ static void singleCreateIndex( const BatchItemRef& insertItem, const BSONObj& normalIndexDesc, Collection* collection, WriteOpResult* result ) { const string& indexNS = insertItem.getRequest()->getNS(); Lock::assertWriteLocked( indexNS ); try { Status status = collection->getIndexCatalog()->createIndex( normalIndexDesc, true ); if ( status.code() == ErrorCodes::IndexAlreadyExists ) { result->stats.n = 0; } else if ( !status.isOK() ) { result->error = toWriteError( status ); } else { logOp( "i", indexNS.c_str(), normalIndexDesc ); result->stats.n = 1; } } catch ( const PageFaultException& ex ) { // TODO: An actual data structure that's not an exception for this result->fault = new PageFaultException( ex ); } catch ( const DBException& ex ) { result->error = toWriteError( ex.toStatus() ); } }
long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback, bool fromMigrate ) { BSONObj keya , keyb; BSONObj minClean = toKeyFormat( min , keya ); BSONObj maxClean = toKeyFormat( max , keyb ); verify( keya == keyb ); Client::Context ctx(ns); shared_ptr<Cursor> c; auto_ptr<ClientCursor> cc; { NamespaceDetails* nsd = nsdetails( ns.c_str() ); if ( ! nsd ) return 0; int ii = nsd->findIndexByKeyPattern( keya ); verify( ii >= 0 ); IndexDetails& i = nsd->idx( ii ); c.reset( BtreeCursor::make( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); cc->setDoingDeletes( true ); } long long num = 0; while ( cc->ok() ) { if ( yield && ! cc->yieldSometimes( ClientCursor::WillNeed) ) { // cursor got finished by someone else, so we're done cc.release(); // if the collection/db is dropped, cc may be deleted break; } if ( ! cc->ok() ) break; DiskLoc rloc = cc->currLoc(); if ( callback ) callback->goingToDelete( cc->current() ); cc->advance(); c->prepareToTouchEarlierIterate(); logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() , 0 , 0 , fromMigrate ); theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); num++; c->recoverFromTouchingEarlierIterate(); getDur().commitIfNeeded(); } return num; }
virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { Lock::GlobalWrite globalWriteLock; DurTransaction txn; bool ok = wrappedRun(&txn, dbname, cmdObj, errmsg, result, fromRepl); if (ok && !fromRepl) logOp(&txn, "c",(dbname + ".$cmd").c_str(), cmdObj); return ok; }
void DataFileMgr::deleteRecord(NamespaceDetails* d, const StringData& ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn, bool doLog ) { dassert( todelete == dl.rec() ); if ( d->isCapped() && !cappedOK ) { out() << "failing remove on a capped ns " << ns << endl; uassert( 10089 , "can't remove from a capped collection" , 0 ); return; } BSONObj obj = BSONObj::make( todelete ); BSONObj toDelete; if ( doLog ) { BSONElement e = obj["_id"]; if ( e.type() ) { toDelete = e.wrap(); } } Collection* collection = cc().database()->getCollection( ns ); verify( collection ); /* check if any cursors point to us. if so, advance them. */ ClientCursor::aboutToDelete(ns, d, dl); collection->getIndexCatalog()->unindexRecord( obj, dl, noWarn ); _deleteRecord(d, ns, todelete, dl); collection->infoCache()->notifyOfWriteOp(); if ( ! toDelete.isEmpty() ) { // TODO: this is crazy, need to fix logOp const char* raw = ns.rawData(); if ( strlen(raw) == ns.size() ) { logOp( "d", raw, toDelete ); } else { string temp = ns.toString(); logOp( "d", temp.c_str(), toDelete ); } } }
void checkAndInsert(const char *ns, /*modifies*/BSONObj& js) { uassert( 10059 , "object to insert too large", js.objsize() <= BSONObjMaxUserSize); { // check no $ modifiers. note we only check top level. (scanning deep would be quite expensive) BSONObjIterator i( js ); while ( i.more() ) { BSONElement e = i.next(); uassert( 13511 , "document to insert can't have $ fields" , e.fieldName()[0] != '$' ); } } theDataFileMgr.insertWithObjMod(ns, js, false); // js may be modified in the call to add an _id field. logOp("i", ns, js); }
void BtreeBasedBuilder::doDropDups(Collection* collection, const set<DiskLoc>& dupsToDrop, bool mayInterrupt) { string ns = collection->ns().ns(); for( set<DiskLoc>::const_iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); ++i ) { RARELY killCurrentOp.checkForInterrupt( !mayInterrupt ); BSONObj toDelete; collection->deleteDocument( *i, false /* cappedOk */, true /* noWarn */, &toDelete ); getDur().commitIfNeeded(); if ( isMaster( ns.c_str() ) ) { logOp( "d", ns.c_str(), toDelete ); } } }
void receivedInsert(Message& m, stringstream& ss) { DbMessage d(m); const char *ns = d.getns(); assert(*ns); uassert( "not master", isMasterNs( ns ) ); setClient(ns); cc().top.setWrite(); ss << ns; while ( d.moreJSObjs() ) { BSONObj js = d.nextJsObj(); uassert("object to insert too large", js.objsize() <= MaxBSONObjectSize); theDataFileMgr.insert(ns, js, false); logOp("i", ns, js); } }
long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback ) { BSONObj keya , keyb; BSONObj minClean = toKeyFormat( min , keya ); BSONObj maxClean = toKeyFormat( max , keyb ); assert( keya == keyb ); Client::Context ctx(ns); NamespaceDetails* nsd = nsdetails( ns.c_str() ); if ( ! nsd ) return 0; int ii = nsd->findIndexByKeyPattern( keya ); assert( ii >= 0 ); long long num = 0; IndexDetails& i = nsd->idx( ii ); shared_ptr<Cursor> c( new BtreeCursor( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); cc->setDoingDeletes( true ); while ( c->ok() ) { DiskLoc rloc = c->currLoc(); BSONObj key = c->currKey(); if ( callback ) callback->goingToDelete( c->current() ); c->advance(); c->noteLocation(); logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() ); theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); num++; c->checkLocation(); if ( yield && ! cc->yieldSometimes() ) { // cursor got finished by someone else, so we're done cc.release(); // if the collection/db is dropped, cc may be deleted break; } } return num; }
/* copy the specified collection isindex - if true, this is system.indexes collection, in which we do some transformation when copying. */ void Cloner::copy(const char *from_collection, const char *to_collection, bool isindex, bool logForRepl, bool masterSameProcess, bool slaveOk, BSONObj query) { auto_ptr<DBClientCursor> c; { dbtemprelease r; c = conn->query( from_collection, query, 0, 0, 0, slaveOk ? Option_SlaveOk : 0 ); } assert( c.get() ); long long n = 0; time_t saveLast = time( 0 ); while ( 1 ) { { dbtemprelease r; if ( !c->more() ) break; } BSONObj tmp = c->next(); /* assure object is valid. note this will slow us down a good bit. */ if ( !tmp.valid() ) { out() << "skipping corrupt object from " << from_collection << '\n'; continue; } ++n; BSONObj js = tmp; if ( isindex ) { assert( strstr(from_collection, "system.indexes") ); js = fixindex(tmp); } try { theDataFileMgr.insert(to_collection, js); if ( logForRepl ) logOp("i", to_collection, js); } catch( UserException& e ) { log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; } RARELY if ( time( 0 ) - saveLast > 60 ) { log() << n << " objects cloned so far from collection " << from_collection << endl; saveLast = time( 0 ); } } }
/** { ..., capped: true, size: ..., max: ... } @param deferIdIndex - if not not, defers id index creation. sets the bool value to true if we wanted to create the id index. @return true if successful */ bool userCreateNS(const char *ns, BSONObj options, string& err, bool logForReplication, bool *deferIdIndex) { const char *coll = strchr( ns, '.' ) + 1; massert(10356 , str::stream() << "invalid ns: " << ns, NamespaceString::validCollectionComponent(ns)); bool ok = _userCreateNS(ns, options, err, deferIdIndex); if ( logForReplication && ok ) { if ( options.getField( "create" ).eoo() ) { BSONObjBuilder b; b << "create" << coll; b.appendElements( options ); options = b.obj(); } string logNs = nsToDatabase(ns) + ".$cmd"; logOp("c", logNs.c_str(), options); } return ok; }
/** * Perform a single insert into a collection. Requires the insert be preprocessed and the * collection already has been created. * * Might fault or error, otherwise populates the result. */ static void singleInsert( const BSONObj& docToInsert, Collection* collection, WriteOpResult* result ) { const string& insertNS = collection->ns().ns(); Lock::assertWriteLocked( insertNS ); StatusWith<DiskLoc> status = collection->insertDocument( docToInsert, true ); if ( !status.isOK() ) { result->setError(toWriteError(status.getStatus())); } else { logOp( "i", insertNS.c_str(), docToInsert ); getDur().commitIfNeeded(); result->getStats().n = 1; } }
/** { ..., capped: true, size: ..., max: ... } * @param createDefaultIndexes - if false, defers id (and other) index creation. * @return true if successful */ Status userCreateNS( OperationContext* txn, Database* db, const StringData& ns, BSONObj options, bool logForReplication, bool createDefaultIndexes ) { invariant( db ); LOG(1) << "create collection " << ns << ' ' << options; if ( !NamespaceString::validCollectionComponent(ns) ) return Status( ErrorCodes::InvalidNamespace, str::stream() << "invalid ns: " << ns ); Collection* collection = db->getCollection( ns ); if ( collection ) return Status( ErrorCodes::NamespaceExists, "collection already exists" ); CollectionOptions collectionOptions; Status status = collectionOptions.parse( options ); if ( !status.isOK() ) return status; invariant( db->createCollection( txn, ns, collectionOptions, true, createDefaultIndexes ) ); if ( logForReplication ) { if ( options.getField( "create" ).eoo() ) { BSONObjBuilder b; b << "create" << nsToCollectionSubstring( ns ); b.appendElements( options ); options = b.obj(); } string logNs = nsToDatabase(ns) + ".$cmd"; logOp(txn, "c", logNs.c_str(), options); } return Status::OK(); }
/** * Perform a single index insert into a collection. Requires the index descriptor be * preprocessed and the collection already has been created. * * Might fault or error, otherwise populates the result. */ static void singleCreateIndex( const BSONObj& indexDesc, Collection* collection, WriteOpResult* result ) { const string indexNS = collection->ns().getSystemIndexesCollection(); Lock::assertWriteLocked( indexNS ); Status status = collection->getIndexCatalog()->createIndex( indexDesc, true ); if ( status.code() == ErrorCodes::IndexAlreadyExists ) { result->getStats().n = 0; } else if ( !status.isOK() ) { result->setError(toWriteError(status)); } else { logOp( "i", indexNS.c_str(), indexDesc ); result->getStats().n = 1; } }
void checkAndInsert(const char *ns, /*modifies*/BSONObj& js) { uassert( 10059 , "object to insert too large", js.objsize() <= BSONObjMaxUserSize); { // check no $ modifiers. note we only check top level. (scanning deep would be quite expensive) BSONObjIterator i( js ); while ( i.more() ) { BSONElement e = i.next(); uassert( 13511 , "document to insert can't have $ fields" , e.fieldName()[0] != '$' ); } } theDataFileMgr.insertWithObjMod(ns, // May be modified in the call to add an _id field. js, // Only permit interrupting an (index build) insert if the // insert comes from a socket client request rather than a // parent operation using the client interface. The parent // operation might not support interrupts. cc().curop()->parent() == NULL, false); logOp("i", ns, js); }
virtual bool run(OperationContext* txn, const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string coll = cmdObj[ "emptycapped" ].valuestrsafe(); uassert( 13428, "emptycapped must specify a collection", !coll.empty() ); NamespaceString nss( dbname, coll ); Client::WriteContext ctx( nss.ns() ); Database* db = ctx.ctx().db(); Collection* collection = db->getCollection( nss.ns() ); massert( 13429, "emptycapped no such collection", collection ); std::vector<BSONObj> indexes = stopIndexBuilds(db, cmdObj); Status status = collection->truncate(txn); if ( !status.isOK() ) return appendCommandStatus( result, status ); IndexBuilder::restoreIndexes(indexes); if (!fromRepl) logOp(txn, "c",(dbname + ".$cmd").c_str(), cmdObj); return true; }
Status cloneCollectionAsCapped( Database* db, const string& shortFrom, const string& shortTo, double size, bool temp, bool logForReplication ) { string fromNs = db->name() + "." + shortFrom; string toNs = db->name() + "." + shortTo; Collection* fromCollection = db->getCollection( fromNs ); if ( !fromCollection ) return Status( ErrorCodes::NamespaceNotFound, str::stream() << "source collection " << fromNs << " does not exist" ); if ( db->getCollection( toNs ) ) return Status( ErrorCodes::NamespaceExists, "to collection already exists" ); // create new collection { Client::Context ctx( toNs ); BSONObjBuilder spec; spec.appendBool( "capped", true ); spec.append( "size", size ); if ( temp ) spec.appendBool( "temp", true ); Status status = userCreateNS( ctx.db(), toNs, spec.done(), logForReplication ); if ( !status.isOK() ) return status; } Collection* toCollection = db->getCollection( toNs ); invariant( toCollection ); // we created above // how much data to ignore because it won't fit anyway // datasize and extentSize can't be compared exactly, so add some padding to 'size' long long excessSize = static_cast<long long>( fromCollection->dataSize() - ( toCollection->storageSize() * 2 ) ); scoped_ptr<Runner> runner( InternalPlanner::collectionScan(fromNs, fromCollection, InternalPlanner::FORWARD ) ); while ( true ) { BSONObj obj; Runner::RunnerState state = runner->getNext(&obj, NULL); switch( state ) { case Runner::RUNNER_EOF: return Status::OK(); case Runner::RUNNER_DEAD: db->dropCollection( toNs ); return Status( ErrorCodes::InternalError, "runner turned dead while iterating" ); case Runner::RUNNER_ERROR: return Status( ErrorCodes::InternalError, "runner error while iterating" ); case Runner::RUNNER_ADVANCED: if ( excessSize > 0 ) { excessSize -= ( 4 * obj.objsize() ); // 4x is for padding, power of 2, etc... continue; } toCollection->insertDocument( obj, true ); if ( logForReplication ) logOp( "i", toNs.c_str(), obj ); getDur().commitIfNeeded(); } } invariant( false ); // unreachable }
void DatabaseWorker::doWork() { /* Run the dbcmd. Only inside a transaction if the cmd does mutates. If the cmd is modifying local content (ie source->isLocal()) then log to the database oplog for replication to peers. */ #ifdef DEBUG_TIMING QTime timer; timer.start(); #endif QList< QSharedPointer<DatabaseCommand> > cmdGroup; QSharedPointer<DatabaseCommand> cmd; { QMutexLocker lock( &m_mut ); cmd = m_commands.takeFirst(); } if ( cmd->doesMutates() ) { bool transok = m_dbimpl->database().transaction(); Q_ASSERT( transok ); Q_UNUSED( transok ); } unsigned int completed = 0; try { bool finished = false; { while ( !finished ) { completed++; cmd->_exec( m_dbimpl ); // runs actual SQL stuff if ( cmd->loggable() ) { // We only save our own ops to the oplog, since incoming ops from peers // are applied immediately. // // Crazy idea: if peers had keypairs and could sign ops/msgs, in theory it // would be safe to sync ops for friend A from friend B's cache, if he saved them, // which would mean you could get updates even if a peer was offline. if ( cmd->source()->isLocal() && !cmd->localOnly() ) { // save to op-log DatabaseCommandLoggable* command = (DatabaseCommandLoggable*)cmd.data(); logOp( command ); } else { // Make a note of the last guid we applied for this source // so we can always request just the newer ops in future. // if ( !cmd->singletonCmd() ) { TomahawkSqlQuery query = m_dbimpl->newquery(); query.prepare( "UPDATE source SET lastop = ? WHERE id = ?" ); query.addBindValue( cmd->guid() ); query.addBindValue( cmd->source()->id() ); if ( !query.exec() ) { throw "Failed to set lastop"; } } } } cmdGroup << cmd; if ( cmd->groupable() && !m_commands.isEmpty() ) { QMutexLocker lock( &m_mut ); if ( m_commands.first()->groupable() ) { cmd = m_commands.takeFirst(); } else { finished = true; } } else finished = true; } if ( cmd->doesMutates() ) { qDebug() << "Committing" << cmd->commandname() << cmd->guid(); if ( !m_dbimpl->database().commit() ) { tDebug() << "FAILED TO COMMIT TRANSACTION*"; throw "commit failed"; } } #ifdef DEBUG_TIMING uint duration = timer.elapsed(); tDebug() << "DBCmd Duration:" << duration << "ms, now running postcommit for" << cmd->commandname(); #endif foreach ( QSharedPointer<DatabaseCommand> c, cmdGroup ) c->postCommit(); #ifdef DEBUG_TIMING tDebug() << "Post commit finished in" << timer.elapsed() - duration << "ms for" << cmd->commandname(); #endif } } catch( const char * msg ) { tLog() << endl << "*ERROR* processing databasecommand:" << cmd->commandname() << msg << m_dbimpl->database().lastError().databaseText() << m_dbimpl->database().lastError().driverText() << endl; if ( cmd->doesMutates() ) m_dbimpl->database().rollback(); Q_ASSERT( false ); } catch(...) { qDebug() << "Uncaught exception processing dbcmd"; if ( cmd->doesMutates() ) m_dbimpl->database().rollback(); Q_ASSERT( false ); throw; } foreach ( QSharedPointer<DatabaseCommand> c, cmdGroup ) c->emitFinished(); QMutexLocker lock( &m_mut ); m_outstanding -= completed; if ( m_outstanding > 0 ) QTimer::singleShot( 0, this, SLOT( doWork() ) ); }
unsigned long long addExistingToIndex( OperationContext* txn, Collection* collection, const IndexDescriptor* descriptor, IndexAccessMethod* accessMethod, bool canBeKilled ) { string ns = collection->ns().ns(); // our copy for sanity bool dupsAllowed = !descriptor->unique(); bool dropDups = descriptor->dropDups(); string curopMessage; { stringstream ss; ss << "Index Build"; if ( canBeKilled ) ss << "(background)"; curopMessage = ss.str(); } ProgressMeter& progress = cc().curop()->setMessage( curopMessage.c_str(), curopMessage, collection->numRecords() ); unsigned long long n = 0; unsigned long long numDropped = 0; auto_ptr<Runner> runner(InternalPlanner::collectionScan(ns,collection)); std::string idxName = descriptor->indexName(); // After this yields in the loop, idx may point at a different index (if indexes get // flipped, see insert_makeIndex) or even an empty IndexDetails, so nothing below should // depend on idx. idxNo should be recalculated after each yield. BSONObj js; DiskLoc loc; while (Runner::RUNNER_ADVANCED == runner->getNext(&js, &loc)) { try { if ( !dupsAllowed && dropDups ) { LastError::Disabled led( lastError.get() ); addKeysToIndex(txn, collection, descriptor, accessMethod, js, loc); } else { addKeysToIndex(txn, collection, descriptor, accessMethod, js, loc); } } catch( AssertionException& e ) { if (ErrorCodes::isInterruption(DBException::convertExceptionCode(e.getCode()))) { txn->checkForInterrupt(); } // TODO: Does exception really imply dropDups exception? if (dropDups) { bool runnerEOF = runner->isEOF(); runner->saveState(); BSONObj toDelete; collection->deleteDocument( txn, loc, false, true, &toDelete ); logOp( txn, "d", ns.c_str(), toDelete ); if (!runner->restoreState(txn)) { // Runner got killed somehow. This probably shouldn't happen. if (runnerEOF) { // Quote: "We were already at the end. Normal. // TODO: Why is this normal? } else { uasserted(ErrorCodes::CursorNotFound, "cursor gone during bg index; dropDups"); } break; } // We deleted a record, but we didn't actually yield the dblock. // TODO: Why did the old code assume we yielded the lock? numDropped++; } else { log() << "background addExistingToIndex exception " << e.what() << endl; throw; } } n++; progress.hit(); txn->recoveryUnit()->commitIfNeeded(); if (canBeKilled) { // Checking for interrupt here is necessary because the bg index // interruptors can only interrupt this index build while they hold // a write lock, and yieldAndCheckIfOK only checks for // interrupt prior to yielding our write lock. We need to check the kill flag // here before another iteration of the loop. txn->checkForInterrupt(); } progress.setTotalWhileRunning( collection->numRecords() ); } progress.finished(); if ( dropDups && numDropped ) log() << "\t index build dropped: " << numDropped << " dups"; return n; }
void createOplog() { dblock lk; const char * ns = "local.oplog.$main"; Client::Context ctx(ns); NamespaceDetails * nsd = nsdetails( ns ); if ( nsd ) { if ( cmdLine.oplogSize != 0 ){ int o = (int)(nsd->storageSize() / ( 1024 * 1024 ) ); int n = (int)(cmdLine.oplogSize / ( 1024 * 1024 ) ); if ( n != o ){ stringstream ss; ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog"; log() << ss.str() << endl; throw UserException( 13257 , ss.str() ); } } DBDirectClient c; BSONObj lastOp = c.findOne( ns, Query().sort( BSON( "$natural" << -1 ) ) ); if ( !lastOp.isEmpty() ) { OpTime::setLast( lastOp[ "ts" ].date() ); } return; } /* create an oplog collection, if it doesn't yet exist. */ BSONObjBuilder b; double sz; if ( cmdLine.oplogSize != 0 ) sz = (double)cmdLine.oplogSize; else { /* not specified. pick a default size */ sz = 50.0 * 1000 * 1000; if ( sizeof(int *) >= 8 ) { #if defined(__APPLE__) // typically these are desktops (dev machines), so keep it smallish sz = (256-64) * 1000 * 1000; #else sz = 990.0 * 1000 * 1000; boost::intmax_t free = freeSpace(); //-1 if call not supported. double fivePct = free * 0.05; if ( fivePct > sz ) sz = fivePct; #endif } } log() << "******\n"; log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB (use --oplogSize to change)\n"; log() << "******" << endl; b.append("size", sz); b.appendBool("capped", 1); b.appendBool("autoIndexId", false); string err; BSONObj o = b.done(); userCreateNS(ns, o, err, false); logOp( "n", "dummy", BSONObj() ); }
// throws DBException void buildAnIndex( OperationContext* txn, Collection* collection, IndexCatalogEntry* btreeState, bool mayInterrupt ) { string ns = collection->ns().ns(); // our copy const IndexDescriptor* idx = btreeState->descriptor(); const BSONObj& idxInfo = idx->infoObj(); MONGO_TLOG(0) << "build index on: " << ns << " properties: " << idx->toString() << endl; audit::logCreateIndex( currentClient.get(), &idxInfo, idx->indexName(), ns ); Timer t; verify( Lock::isWriteLocked( ns ) ); // this is so that people know there are more keys to look at when doing // things like in place updates, etc... collection->infoCache()->addedIndex(); if ( collection->numRecords() == 0 ) { Status status = btreeState->accessMethod()->initializeAsEmpty(txn); massert( 17343, str::stream() << "IndexAccessMethod::initializeAsEmpty failed" << status.toString(), status.isOK() ); MONGO_TLOG(0) << "\t added index to empty collection"; return; } scoped_ptr<BackgroundOperation> backgroundOperation; bool doInBackground = false; if ( idxInfo["background"].trueValue() && !inDBRepair ) { doInBackground = true; backgroundOperation.reset( new BackgroundOperation(ns) ); uassert( 13130, "can't start bg index b/c in recursive lock (db.eval?)", !Lock::nested() ); log() << "\t building index in background"; } Status status = btreeState->accessMethod()->initializeAsEmpty(txn); massert( 17342, str::stream() << "IndexAccessMethod::initializeAsEmpty failed" << status.toString(), status.isOK() ); IndexAccessMethod* bulk = doInBackground ? NULL : btreeState->accessMethod()->initiateBulk(txn, collection->numRecords()); scoped_ptr<IndexAccessMethod> bulkHolder(bulk); IndexAccessMethod* iam = bulk ? bulk : btreeState->accessMethod(); if ( bulk ) log() << "\t building index using bulk method"; unsigned long long n = addExistingToIndex( txn, collection, btreeState->descriptor(), iam, doInBackground ); if ( bulk ) { LOG(1) << "\t bulk commit starting"; std::set<DiskLoc> dupsToDrop; Status status = btreeState->accessMethod()->commitBulk( bulk, mayInterrupt, &dupsToDrop ); // Code above us expects a uassert in case of dupkey errors. if (ErrorCodes::DuplicateKey == status.code()) { uassertStatusOK(status); } // Any other errors are probably bad and deserve a massert. massert( 17398, str::stream() << "commitBulk failed: " << status.toString(), status.isOK() ); if ( dupsToDrop.size() ) log() << "\t bulk dropping " << dupsToDrop.size() << " dups"; for( set<DiskLoc>::const_iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); ++i ) { BSONObj toDelete; collection->deleteDocument( txn, *i, false /* cappedOk */, true /* noWarn */, &toDelete ); if (isMasterNs(ns.c_str())) { logOp( txn, "d", ns.c_str(), toDelete ); } txn->recoveryUnit()->commitIfNeeded(); RARELY if ( mayInterrupt ) { txn->checkForInterrupt(); } } } verify( !btreeState->head().isNull() ); MONGO_TLOG(0) << "build index done. scanned " << n << " total records. " << t.millis() / 1000.0 << " secs" << endl; // this one is so people know that the index is finished collection->infoCache()->addedIndex(); }
/* copy the specified collection isindex - if true, this is system.indexes collection, in which we do some transformation when copying. */ void Cloner::copy(const char *from_collection, const char *to_collection, bool isindex, bool logForRepl, bool masterSameProcess, bool slaveOk, Query query) { auto_ptr<DBClientCursor> c; { dbtemprelease r; c = conn->query( from_collection, query, 0, 0, 0, Option_NoCursorTimeout | ( slaveOk ? Option_SlaveOk : 0 ) ); } list<BSONObj> storedForLater; assert( c.get() ); long long n = 0; time_t saveLast = time( 0 ); while ( 1 ) { { dbtemprelease r; if ( !c->more() ) break; } BSONObj tmp = c->next(); /* assure object is valid. note this will slow us down a little. */ if ( !tmp.valid() ) { stringstream ss; ss << "skipping corrupt object from " << from_collection; BSONElement e = tmp.firstElement(); try { e.validate(); ss << " firstElement: " << e; } catch( ... ){ ss << " firstElement corrupt"; } out() << ss.str() << endl; continue; } ++n; BSONObj js = tmp; if ( isindex ) { assert( strstr(from_collection, "system.indexes") ); js = fixindex(tmp); storedForLater.push_back( js.getOwned() ); continue; } try { theDataFileMgr.insert(to_collection, js); if ( logForRepl ) logOp("i", to_collection, js); } catch( UserException& e ) { log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; } RARELY if ( time( 0 ) - saveLast > 60 ) { log() << n << " objects cloned so far from collection " << from_collection << endl; saveLast = time( 0 ); } } if ( storedForLater.size() ){ for ( list<BSONObj>::iterator i = storedForLater.begin(); i!=storedForLater.end(); i++ ){ BSONObj js = *i; try { theDataFileMgr.insert(to_collection, js); if ( logForRepl ) logOp("i", to_collection, js); } catch( UserException& e ) { log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; } } } }
void operator()( DBClientCursorBatchIterator &i ) { mongolock l( true ); if ( context ) { context->relocked(); } while( i.moreInCurrentBatch() ) { if ( n % 128 == 127 /*yield some*/ ) { time_t now = time(0); if( now - lastLog >= 60 ) { // report progress if( lastLog ) log() << "clone " << to_collection << ' ' << n << endl; lastLog = now; } mayInterrupt( _mayBeInterrupted ); dbtempreleaseif t( _mayYield ); } BSONObj tmp = i.nextSafe(); /* assure object is valid. note this will slow us down a little. */ if ( !tmp.valid() ) { stringstream ss; ss << "Cloner: skipping corrupt object from " << from_collection; BSONElement e = tmp.firstElement(); try { e.validate(); ss << " firstElement: " << e; } catch( ... ) { ss << " firstElement corrupt"; } out() << ss.str() << endl; continue; } ++n; BSONObj js = tmp; if ( isindex ) { verify( strstr(from_collection, "system.indexes") ); js = fixindex(tmp); storedForLater->push_back( js.getOwned() ); continue; } try { theDataFileMgr.insertWithObjMod(to_collection, js); if ( logForRepl ) logOp("i", to_collection, js); getDur().commitIfNeeded(); } catch( UserException& e ) { log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; } RARELY if ( time( 0 ) - saveLast > 60 ) { log() << n << " objects cloned so far from collection " << from_collection << endl; saveLast = time( 0 ); } } }
/* ns: namespace, e.g. <database>.<collection> pattern: the "where" clause / criteria justOne: stop after 1 match god: allow access to system namespaces, and don't yield */ long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) { if( !god ) { if ( strstr(ns, ".system.") ) { /* note a delete from system.indexes would corrupt the db if done here, as there are pointers into those objects in NamespaceDetails. */ uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) ); } if ( strchr( ns , '$' ) ) { log() << "cannot delete from collection with reserved $ in name: " << ns << endl; uassert( 10100 , "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 ); } } { NamespaceDetails *d = nsdetails( ns ); if ( ! d ) return 0; uassert( 10101 , "can't remove from a capped collection" , ! d->capped ); } long long nDeleted = 0; shared_ptr< Cursor > creal = NamespaceDetailsTransient::getCursor( ns, pattern, BSONObj(), false, 0 ); if( !creal->ok() ) return nDeleted; shared_ptr< Cursor > cPtr = creal; auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) ); cc->setDoingDeletes( true ); CursorId id = cc->cursorid(); bool justOne = justOneOrig; bool canYield = !god && !(creal->matcher() && creal->matcher()->docMatcher().atomic()); do { // TODO: we can generalize this I believe // bool willNeedRecord = (creal->matcher() && creal->matcher()->needRecord()) || pattern.isEmpty() || isSimpleIdQuery( pattern ); if ( ! willNeedRecord ) { // TODO: this is a total hack right now // check if the index full encompasses query if ( pattern.nFields() == 1 && str::equals( pattern.firstElement().fieldName() , creal->indexKeyPattern().firstElement().fieldName() ) ) willNeedRecord = true; } if ( canYield && ! cc->yieldSometimes( willNeedRecord ? ClientCursor::WillNeed : ClientCursor::MaybeCovered ) ) { cc.release(); // has already been deleted elsewhere // TODO should we assert or something? break; } if ( !cc->ok() ) { break; // if we yielded, could have hit the end } // this way we can avoid calling updateLocation() every time (expensive) // as well as some other nuances handled cc->setDoingDeletes( true ); DiskLoc rloc = cc->currLoc(); BSONObj key = cc->currKey(); bool match = creal->currentMatches(); bool dup = cc->c()->getsetdup(rloc); if ( ! cc->advance() ) justOne = true; if ( ! match ) continue; assert( !dup ); // can't be a dup, we deleted it! if ( !justOne ) { /* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore blocks. here we might call millions of times which would be bad. */ cc->c()->prepareToTouchEarlierIterate(); } if ( logop ) { BSONElement e; if( BSONObj( rloc.rec() ).getObjectID( e ) ) { BSONObjBuilder b; b.append( e ); bool replJustOne = true; logOp( "d", ns, b.done(), 0, &replJustOne ); } else { problem() << "deleted object without id, not logging" << endl; } } if ( rs ) rs->goingToDelete( rloc.obj() /*cc->c->current()*/ ); theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc); nDeleted++; if ( justOne ) { break; } cc->c()->recoverFromTouchingEarlierIterate(); if( !god ) getDur().commitIfNeeded(); if( debug && god && nDeleted == 100 ) log() << "warning high number of deletes with god=true which could use significant memory" << endl; } while ( cc->ok() ); if ( cc.get() && ClientCursor::find( id , false ) == 0 ) { // TODO: remove this and the id declaration above if this doesn't trigger // if it does, then i'm very confused (ERH 06/2011) error() << "this should be impossible" << endl; printStackTrace(); cc.release(); } return nDeleted; }
void createOplog() { Lock::GlobalWrite lk; const char * ns = "local.oplog.$main"; bool rs = !cmdLine._replSet.empty(); if( rs ) ns = rsoplog; Client::Context ctx(ns); NamespaceDetails * nsd = nsdetails( ns ); if ( nsd ) { if ( cmdLine.oplogSize != 0 ) { int o = (int)(nsd->storageSize() / ( 1024 * 1024 ) ); int n = (int)(cmdLine.oplogSize / ( 1024 * 1024 ) ); if ( n != o ) { stringstream ss; ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog"; log() << ss.str() << endl; throw UserException( 13257 , ss.str() ); } } if( rs ) return; DBDirectClient c; BSONObj lastOp = c.findOne( ns, Query().sort(reverseNaturalObj) ); if ( !lastOp.isEmpty() ) { OpTime::setLast( lastOp[ "ts" ].date() ); } return; } /* create an oplog collection, if it doesn't yet exist. */ BSONObjBuilder b; double sz; if ( cmdLine.oplogSize != 0 ) sz = (double)cmdLine.oplogSize; else { /* not specified. pick a default size */ sz = 50.0 * 1000 * 1000; if ( sizeof(int *) >= 8 ) { #if defined(__APPLE__) // typically these are desktops (dev machines), so keep it smallish sz = (256-64) * 1000 * 1000; #else sz = 990.0 * 1000 * 1000; boost::intmax_t free = File::freeSpace(dbpath); //-1 if call not supported. double fivePct = free * 0.05; if ( fivePct > sz ) sz = fivePct; #endif } } log() << "******" << endl; log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB..." << endl; b.append("size", sz); b.appendBool("capped", 1); b.appendBool("autoIndexId", false); string err; BSONObj o = b.done(); userCreateNS(ns, o, err, false); if( !rs ) logOp( "n", "", BSONObj() ); /* sync here so we don't get any surprising lag later when we try to sync */ MemoryMappedFile::flushAll(true); log() << "******" << endl; }
UpdateResult _updateObjects( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { DEBUGUPDATE( "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi ); Client& client = cc(); debug.updateobj = updateobj; // The idea with these here it to make them loop invariant for // multi updates, and thus be a bit faster for that case. The // pointers may be left invalid on a failed or terminal yield // recovery. NamespaceDetails* d = nsdetails(ns); // can be null if an upsert... NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get(ns); auto_ptr<ModSet> mods; bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; int modsIsIndexed = false; // really the # of indexes if ( isOperatorUpdate ) { mods.reset( new ModSet(updateobj, nsdt->indexKeys(), forReplication) ); modsIsIndexed = mods->maxNumIndexUpdated(); } if( planPolicy.permitOptimalIdPlan() && !multi && isSimpleIdQuery(patternOrig) && d && !modsIsIndexed ) { int idxNo = d->findIdIndex(); if( idxNo >= 0 ) { debug.idhack = true; UpdateResult result = _updateById( isOperatorUpdate, idxNo, mods.get(), d, nsdt, su, ns, updateobj, patternOrig, logop, debug, fromMigrate); if ( result.existing || ! upsert ) { return result; } else if ( upsert && ! isOperatorUpdate ) { // this handles repl inserts checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate, &no ); return UpdateResult( 0 , 0 , 1 , no ); } } } int numModded = 0; debug.nscanned = 0; shared_ptr<Cursor> c = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); d = nsdetails(ns); nsdt = &NamespaceDetailsTransient::get(ns); bool autoDedup = c->autoDedup(); if( c->ok() ) { set<DiskLoc> seenObjects; MatchDetails details; auto_ptr<ClientCursor> cc; do { if ( cc.get() == 0 && client.allowedToThrowPageFaultException() && ! c->currLoc().isNull() && ! c->currLoc().rec()->likelyInPhysicalMemory() ) { throw PageFaultException( c->currLoc().rec() ); } bool atomic = c->matcher() && c->matcher()->docMatcher().atomic(); if ( ! atomic && debug.nscanned > 0 ) { // we need to use a ClientCursor to yield if ( cc.get() == 0 ) { shared_ptr< Cursor > cPtr = c; cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); } bool didYield; if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) { cc.release(); break; } if ( !c->ok() ) { break; } if ( didYield ) { d = nsdetails(ns); if ( ! d ) break; nsdt = &NamespaceDetailsTransient::get(ns); if ( mods.get() ) { mods->setIndexedStatus( nsdt->indexKeys() ); modsIsIndexed = mods->maxNumIndexUpdated(); } } } // end yielding block debug.nscanned++; if ( mods.get() && mods->hasDynamicArray() ) { details.requestElemMatchKey(); } if ( !c->currentMatches( &details ) ) { c->advance(); continue; } Record* r = c->_current(); DiskLoc loc = c->currLoc(); if ( c->getsetdup( loc ) && autoDedup ) { c->advance(); continue; } BSONObj js = BSONObj::make(r); BSONObj pattern = patternOrig; if ( logop ) { BSONObjBuilder idPattern; BSONElement id; // NOTE: If the matching object lacks an id, we'll log // with the original pattern. This isn't replay-safe. // It might make sense to suppress the log instead // if there's no id. if ( js.getObjectID( id ) ) { idPattern.append( id ); pattern = idPattern.obj(); } else { uassert( 10157 , "multi-update requires all modified objects to have an _id" , ! multi ); } } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ if ( isOperatorUpdate ) { if ( multi ) { // go to next record in case this one moves c->advance(); // Update operations are deduped for cursors that implement their own // deduplication. In particular, some geo cursors are excluded. if ( autoDedup ) { if ( seenObjects.count( loc ) ) { continue; } // SERVER-5198 Advance past the document to be modified, provided // deduplication is enabled, but see SERVER-5725. while( c->ok() && loc == c->currLoc() ) { c->advance(); } } } const BSONObj& onDisk = loc.obj(); ModSet* useMods = mods.get(); auto_ptr<ModSet> mymodset; if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) { useMods = mods->fixDynamicArray( details.elemMatchKey() ); mymodset.reset( useMods ); } auto_ptr<ModSetState> mss = useMods->prepare( onDisk, false /* not an insertion */ ); bool willAdvanceCursor = multi && c->ok() && ( modsIsIndexed || ! mss->canApplyInPlace() ); if ( willAdvanceCursor ) { if ( cc.get() ) { cc->setDoingDeletes( true ); } c->prepareToTouchEarlierIterate(); } // If we've made it this far, "ns" must contain a valid collection name, and so // is of the form "db.collection". Therefore, the following expression must // always be valid. "system.users" updates must never be done in place, in // order to ensure that they are validated inside DataFileMgr::updateRecord(.). bool isSystemUsersMod = (NamespaceString(ns).coll == "system.users"); BSONObj newObj; if ( !mss->isUpdateIndexed() && mss->canApplyInPlace() && !isSystemUsersMod ) { mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) ); DEBUGUPDATE( "\t\t\t doing in place update" ); if ( !multi ) debug.fastmod = true; if ( modsIsIndexed ) { seenObjects.insert( loc ); } newObj = loc.obj(); d->paddingFits(); } else { newObj = mss->createNewFromMods(); checkTooLarge(newObj); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); if ( newLoc != loc || modsIsIndexed ){ // log() << "Moved obj " << newLoc.obj()["_id"] << " from " << loc << " to " << newLoc << endl; // object moved, need to make sure we don' get again seenObjects.insert( newLoc ); } } if ( logop ) { DEV verify( mods->size() ); BSONObj logObj = mss->getOpLogRewrite(); DEBUGUPDATE( "\t rewrite update: " << logObj ); // It is possible that the entire mod set was a no-op over this // document. We would have an empty log record in that case. If we // call logOp, with an empty record, that would be replicated as "clear // this record", which is not what we want. Therefore, to get a no-op // in the replica, we simply don't log. if ( logObj.nFields() ) { logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj ); } } numModded++; if ( ! multi ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( willAdvanceCursor ) c->recoverFromTouchingEarlierIterate(); getDur().commitIfNeeded(); continue; } uassert( 10158 , "multi update only works with $ operators" , ! multi ); BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, su); if ( logop ) { DEV wassert( !su ); // super used doesn't get logged, this would be bad. logOp("u", ns, updateobj, &pattern, 0, fromMigrate, &updateobj ); } return UpdateResult( 1 , 0 , 1 , BSONObj() ); } while ( c->ok() ); } // endif if ( numModded ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( upsert ) { if ( updateobj.firstElementFieldName()[0] == '$' ) { // upsert of an $operation. build a default object BSONObj newObj = mods->createNewFromQuery( patternOrig ); checkNoMods( newObj ); debug.fastmodinsert = true; theDataFileMgr.insertWithObjMod(ns, newObj, false, su); if ( logop ) logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj ); return UpdateResult( 0 , 1 , 1 , newObj ); } uassert( 10159 , "multi update only works with $ operators" , ! multi ); checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate, &no ); return UpdateResult( 0 , 0 , 1 , no ); } return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() ); }
void DataFileMgr::insertAndLog( const char *ns, const BSONObj &o, bool god, bool fromMigrate ) { BSONObj tmp = o; insertWithObjMod( ns, tmp, false, god ); logOp( "i", ns, tmp, 0, 0, fromMigrate ); }
/* note: this is only (as-is) called for - not multi - not mods is indexed - not upsert */ static UpdateResult _updateById(bool isOperatorUpdate, int idIdxNo, ModSet* mods, NamespaceDetails* d, NamespaceDetailsTransient *nsdt, bool su, const char* ns, const BSONObj& updateobj, BSONObj patternOrig, bool logop, OpDebug& debug, bool fromMigrate = false) { DiskLoc loc; { IndexDetails& i = d->idx(idIdxNo); BSONObj key = i.getKeyFromQuery( patternOrig ); loc = QueryRunner::fastFindSingle(i, key); if( loc.isNull() ) { // no upsert support in _updateById yet, so we are done. return UpdateResult( 0 , 0 , 0 , BSONObj() ); } } Record* r = loc.rec(); if ( cc().allowedToThrowPageFaultException() && ! r->likelyInPhysicalMemory() ) { throw PageFaultException( r ); } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ BSONObj newObj; if ( isOperatorUpdate ) { const BSONObj& onDisk = loc.obj(); auto_ptr<ModSetState> mss = mods->prepare( onDisk, false /* not an insertion */ ); if( mss->canApplyInPlace() ) { mss->applyModsInPlace(true); debug.fastmod = true; DEBUGUPDATE( "\t\t\t updateById doing in place update" ); newObj = onDisk; } else { newObj = mss->createNewFromMods(); checkTooLarge(newObj); verify(nsdt); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug); } if ( logop ) { DEV verify( mods->size() ); BSONObj pattern = patternOrig; BSONObj logObj = mss->getOpLogRewrite(); DEBUGUPDATE( "\t rewrite update: " << logObj ); // It is possible that the entire mod set was a no-op over this document. We // would have an empty log record in that case. If we call logOp, with an empty // record, that would be replicated as "clear this record", which is not what // we want. Therefore, to get a no-op in the replica, we simply don't log. if ( logObj.nFields() ) { logOp("u", ns, logObj, &pattern, 0, fromMigrate, &newObj ); } } return UpdateResult( 1 , 1 , 1 , BSONObj() ); } // end $operator update // regular update BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); verify(nsdt); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug ); if ( logop ) { logOp("u", ns, updateobj, &patternOrig, 0, fromMigrate, &updateobj ); } return UpdateResult( 1 , 0 , 1 , BSONObj() ); }