bool Sync::shouldRetry(const BSONObj& o) { // should already have write lock const char *ns = o.getStringField("ns"); Client::Context ctx(ns); OperationContextImpl txn; // we don't have the object yet, which is possible on initial sync. get it. log() << "replication info adding missing object" << endl; // rare enough we can log BSONObj missingObj = getMissingDoc(ctx.db(), o); if( missingObj.isEmpty() ) { log() << "replication missing object not found on source. presumably deleted later in oplog" << endl; log() << "replication o2: " << o.getObjectField("o2").toString() << endl; log() << "replication o firstfield: " << o.getObjectField("o").firstElementFieldName() << endl; return false; } else { Collection* collection = ctx.db()->getOrCreateCollection( ns ); verify( collection ); // should never happen StatusWith<DiskLoc> result = collection->insertDocument( &txn, missingObj, true ); uassert(15917, str::stream() << "failed to insert missing doc: " << result.toString(), result.isOK() ); LOG(1) << "replication inserted missing doc: " << missingObj.toString() << endl; return true; } }
bool Sync::shouldRetry(OperationContext* txn, const BSONObj& o) { const NamespaceString nss(o.getStringField("ns")); // Take an X lock on the database in order to preclude other modifications. Also, the // database might not exist yet, so create it. AutoGetOrCreateDb autoDb(txn, nss.db(), MODE_X); Database* const db = autoDb.getDb(); // we don't have the object yet, which is possible on initial sync. get it. log() << "replication info adding missing object" << endl; // rare enough we can log BSONObj missingObj = getMissingDoc(txn, db, o); if( missingObj.isEmpty() ) { log() << "replication missing object not found on source. presumably deleted later in oplog" << endl; log() << "replication o2: " << o.getObjectField("o2").toString() << endl; log() << "replication o firstfield: " << o.getObjectField("o").firstElementFieldName() << endl; return false; } else { WriteUnitOfWork wunit(txn); Collection* const collection = db->getOrCreateCollection(txn, nss.toString()); invariant(collection); StatusWith<RecordId> result = collection->insertDocument(txn, missingObj, true); uassert(15917, str::stream() << "failed to insert missing doc: " << result.toString(), result.isOK() ); LOG(1) << "replication inserted missing doc: " << missingObj.toString() << endl; wunit.commit(); return true; } }
void Balancer::run() { // this is the body of a BackgroundJob so if we throw here we're basically ending the balancer thread prematurely while ( ! inShutdown() ) { if ( ! _init() ) { log() << "will retry to initialize balancer in one minute" << endl; sleepsecs( 60 ); continue; } break; } int sleepTime = 10; // getConnectioString and dist lock constructor does not throw, which is what we expect on while // on the balancer thread ConnectionString config = configServer.getConnectionString(); DistributedLock balanceLock( config , "balancer" ); while ( ! inShutdown() ) { try { ScopedDbConnection conn(config.toString(), 30); // ping has to be first so we keep things in the config server in sync _ping(); // use fresh shard state Shard::reloadShardInfo(); // refresh chunk size (even though another balancer might be active) Chunk::refreshChunkSize(); SettingsType balancerConfig; string errMsg; if (!grid.getBalancerSettings(&balancerConfig, &errMsg)) { warning() << errMsg; return ; } // now make sure we should even be running if ((balancerConfig.isKeySet() && // balancer config doc exists !grid.shouldBalance(balancerConfig)) || MONGO_FAIL_POINT(skipBalanceRound)) { LOG(1) << "skipping balancing round because balancing is disabled" << endl; // Ping again so scripts can determine if we're active without waiting _ping( true ); conn.done(); sleepsecs( sleepTime ); continue; } uassert( 13258 , "oids broken after resetting!" , _checkOIDs() ); { dist_lock_try lk( &balanceLock , "doing balance round" ); if ( ! lk.got() ) { LOG(1) << "skipping balancing round because another balancer is active" << endl; // Ping again so scripts can determine if we're active without waiting _ping( true ); conn.done(); sleepsecs( sleepTime ); // no need to wake up soon continue; } if ( !isConfigServerConsistent() ) { conn.done(); warning() << "Skipping balancing round because data inconsistency" << " was detected amongst the config servers." << endl; sleepsecs( sleepTime ); continue; } const bool waitForDelete = (balancerConfig.isWaitForDeleteSet() ? balancerConfig.getWaitForDelete() : false); scoped_ptr<WriteConcernOptions> writeConcern; if (balancerConfig.isKeySet()) { // if balancer doc exists. StatusWith<WriteConcernOptions*> extractStatus = balancerConfig.extractWriteConcern(); if (extractStatus.isOK()) { writeConcern.reset(extractStatus.getValue()); } else { warning() << extractStatus.toString(); } } LOG(1) << "*** start balancing round. " << "waitForDelete: " << waitForDelete << ", secondaryThrottle: " << (writeConcern.get() ? writeConcern->toBSON().toString() : "default") << endl; vector<CandidateChunkPtr> candidateChunks; _doBalanceRound( conn.conn() , &candidateChunks ); if ( candidateChunks.size() == 0 ) { LOG(1) << "no need to move any chunk" << endl; _balancedLastTime = 0; } else { _balancedLastTime = _moveChunks(&candidateChunks, writeConcern.get(), waitForDelete ); } LOG(1) << "*** end of balancing round" << endl; } // Ping again so scripts can determine if we're active without waiting _ping( true ); conn.done(); sleepsecs( _balancedLastTime ? sleepTime / 10 : sleepTime ); } catch ( std::exception& e ) { log() << "caught exception while doing balance: " << e.what() << endl; // Just to match the opening statement if in log level 1 LOG(1) << "*** End of balancing round" << endl; sleepsecs( sleepTime ); // sleep a fair amount b/c of error continue; } } }
void operator()( DBClientCursorBatchIterator &i ) { Lock::GlobalWrite lk; context.relocked(); bool createdCollection = false; Collection* collection = NULL; while( i.moreInCurrentBatch() ) { if ( numSeen % 128 == 127 /*yield some*/ ) { collection = NULL; time_t now = time(0); if( now - lastLog >= 60 ) { // report progress if( lastLog ) log() << "clone " << to_collection << ' ' << numSeen << endl; lastLog = now; } mayInterrupt( _mayBeInterrupted ); dbtempreleaseif t( _mayYield ); } if ( isindex == false && collection == NULL ) { collection = context.db()->getCollection( to_collection ); if ( !collection ) { massert( 17321, str::stream() << "collection dropped during clone [" << to_collection << "]", !createdCollection ); createdCollection = true; collection = context.db()->createCollection( txn, to_collection ); verify( collection ); } } BSONObj tmp = i.nextSafe(); /* assure object is valid. note this will slow us down a little. */ const Status status = validateBSON(tmp.objdata(), tmp.objsize()); if (!status.isOK()) { out() << "Cloner: skipping corrupt object from " << from_collection << ": " << status.reason(); continue; } ++numSeen; BSONObj js = tmp; if ( isindex ) { verify(nsToCollectionSubstring(from_collection) == "system.indexes"); js = fixindex(context.db()->name(), tmp); indexesToBuild->push_back( js.getOwned() ); continue; } verify(nsToCollectionSubstring(from_collection) != "system.indexes"); StatusWith<DiskLoc> loc = collection->insertDocument( txn, js, true ); if ( !loc.isOK() ) { error() << "error: exception cloning object in " << from_collection << ' ' << loc.toString() << " obj:" << js; } uassertStatusOK( loc.getStatus() ); if ( logForRepl ) logOp(txn, "i", to_collection, js); getDur().commitIfNeeded(); RARELY if ( time( 0 ) - saveLast > 60 ) { log() << numSeen << " objects cloned so far from collection " << from_collection; saveLast = time( 0 ); } } }