virtual BSONObj generateSection( const BSONElement& configElement ) const { BSONObjBuilder t; t.append( "totalTime" , (long long)(1000 * ( curTimeMillis64() - _started ) ) ); t.append( "lockTime" , Lock::globalLockStat()->getTimeLocked( 'W' ) ); { BSONObjBuilder ttt( t.subobjStart( "currentQueue" ) ); int w=0, r=0; Client::recommendedYieldMicros( &w , &r, true ); ttt.append( "total" , w + r ); ttt.append( "readers" , r ); ttt.append( "writers" , w ); ttt.done(); } { BSONObjBuilder ttt( t.subobjStart( "activeClients" ) ); int w=0, r=0; Client::getActiveClientCount( w , r ); ttt.append( "total" , w + r ); ttt.append( "readers" , r ); ttt.append( "writers" , w ); ttt.done(); } return t.obj(); }
// This function is meant to only be called on a primary, // it assumes that we are fully up to date and are the ones // getting GTIDs for transactions that will be applying // new data to the replica set. // // returns a GTID that is an increment of _lastGTID // also notes that GTID has been handed out void GTIDManager::getGTIDForPrimary(GTID* gtid, uint64_t* timestamp, uint64_t* hash) { // it is ok for this to be racy. It is used for heuristic purposes *timestamp = curTimeMillis64(); boost::unique_lock<boost::mutex> lock(_lock); dassert(GTID::cmp(_lastLiveGTID, _lastUnappliedGTID) == 0); if (_incPrimary) { _incPrimary = false; _lastLiveGTID.inc_primary(); } else { _lastLiveGTID.inc(); } if (_liveGTIDs.size() == 0) { _minLiveGTID = _lastLiveGTID; } _lastUnappliedGTID = _lastLiveGTID; *gtid = _lastLiveGTID; _liveGTIDs.insert(*gtid); _lastTimestamp = *timestamp; *hash = (_lastHash* 131 + *timestamp) * 17 + _selfID; _lastHash = *hash; }
void logStartup() { BSONObjBuilder toLog; stringstream id; id << getHostNameCached() << "-" << jsTime(); toLog.append( "_id", id.str() ); toLog.append( "hostname", getHostNameCached() ); toLog.appendTimeT( "startTime", time(0) ); toLog.append( "startTimeLocal", dateToCtimeString(curTimeMillis64()) ); toLog.append("cmdLine", serverGlobalParams.parsedOpts); toLog.append( "pid", ProcessId::getCurrent().asLongLong() ); BSONObjBuilder buildinfo( toLog.subobjStart("buildinfo")); appendBuildInfo(buildinfo); buildinfo.doneFast(); BSONObj o = toLog.obj(); Lock::GlobalWrite lk; DBDirectClient c; const char* name = "local.startup_log"; c.createCollection( name, 10 * 1024 * 1024, true ); c.insert( name, o); }
void UserCacheInvalidator::run() { Client::initThread("UserCacheInvalidatorThread"); lastInvalidationTime = Date_t(curTimeMillis64()); while (true) { boost::unique_lock<boost::mutex> lock(invalidationIntervalMutex); Date_t sleepUntil = Date_t( lastInvalidationTime.millis + userCacheInvalidationIntervalSecs * 1000); Date_t now(curTimeMillis64()); while (now.millis < sleepUntil.millis) { invalidationIntervalChangedCondition.timed_wait(lock, Milliseconds(sleepUntil - now)); sleepUntil = Date_t( lastInvalidationTime.millis + (userCacheInvalidationIntervalSecs * 1000)); now = Date_t(curTimeMillis64()); } lastInvalidationTime = now; lock.unlock(); if (inShutdown()) { break; } StatusWith<OID> currentGeneration = getCurrentCacheGeneration(); if (!currentGeneration.isOK()) { if (currentGeneration.getStatus().code() == ErrorCodes::CommandNotFound) { warning() << "_getUserCacheGeneration command not found on config server(s), " "this most likely means you are running an outdated version of mongod " "on the config servers" << std::endl; } else { warning() << "An error occurred while fetching current user cache generation " "to check if user cache needs invalidation: " << currentGeneration.getStatus() << std::endl; } // When in doubt, invalidate the cache _authzManager->invalidateUserCache(); } if (currentGeneration.getValue() != _previousCacheGeneration) { log() << "User cache generation changed from " << _previousCacheGeneration << " to " << currentGeneration.getValue() << "; invalidating user cache" << std::endl; _authzManager->invalidateUserCache(); _previousCacheGeneration = currentGeneration.getValue(); } } }
Status SyncSourceFeedback::updateUpstream(OperationContext* txn) { ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); if (replCoord->getMemberState().primary()) { // primary has no one to update to return Status::OK(); } BSONObjBuilder cmd; { boost::unique_lock<boost::mutex> lock(_mtx); if (_handshakeNeeded) { // Don't send updates if there are nodes that haven't yet been handshaked return Status(ErrorCodes::NodeNotFound, "Need to send handshake before updating position upstream"); } // the command could not be created, likely because the node was removed from the set if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) { return Status::OK(); } } BSONObj res; LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done(); try { _connection->runCommand("admin", cmd.obj(), res); } catch (const DBException& e) { log() << "SyncSourceFeedback error sending update: " << e.what() << endl; // blacklist sync target for .5 seconds and find a new one replCoord->blacklistSyncSource(_syncTarget, Date_t(curTimeMillis64() + 500)); BackgroundSync::get()->clearSyncTarget(); _resetConnection(); return e.toStatus(); } Status status = Command::getStatusFromCommandResult(res); if (!status.isOK()) { log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl; // blacklist sync target for .5 seconds and find a new one replCoord->blacklistSyncSource(_syncTarget, Date_t(curTimeMillis64() + 500)); BackgroundSync::get()->clearSyncTarget(); _resetConnection(); } return status; }
HybridReplicationCoordinator::HybridReplicationCoordinator(const ReplSettings& settings) : _legacy(settings), _impl(settings, new ReplicationCoordinatorExternalStateImpl, new NetworkInterfaceImpl, new TopologyCoordinatorImpl(Seconds(maxSyncSourceLagSecs)), static_cast<int64_t>(curTimeMillis64())) { getGlobalEnvironment()->registerKillOpListener(&_impl); }
bool opForSlaveTooOld(uint64_t ts) { const uint64_t expireMillis = expireOplogMilliseconds(); if (ts && expireMillis) { const uint64_t minTime = curTimeMillis64() - expireMillis; if (ts < minTime) { return true; } } return false; }
void BackgroundSync::handleSlaveDelay(uint64_t opTimestamp) { dassert(_opSyncRunning); uint64_t slaveDelayMillis = theReplSet->myConfig().slaveDelay * 1000; uint64_t currTime = curTimeMillis64(); uint64_t timeOpShouldBeApplied = opTimestamp + slaveDelayMillis; while (currTime < timeOpShouldBeApplied) { uint64_t sleepTime = (timeOpShouldBeApplied - currTime); // let's sleep for at most one second sleepmillis((sleepTime < 1000) ? sleepTime : 1000); // check if we should bail out, as we don't want to // sleep the whole time possibly long delay time // if we see we should be stopping { boost::unique_lock<boost::mutex> lck(_mutex); if (!_opSyncShouldRun) { break; } } // reset currTime currTime = curTimeMillis64(); } }
long long ClientCursor::allocCursorId_inlock() { long long ctm = curTimeMillis64(); dassert( ctm ); long long x; while ( 1 ) { x = (((long long)rand()) << 32); x = x ^ ctm; if ( ctm != ctmLast || ClientCursor::find_inlock(x, false) == 0 ) break; } ctmLast = ctm; return x; }
bool SyncSourceFeedback::replHandshake(OperationContext* txn) { ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); if (replCoord->getCurrentMemberState().primary()) { // primary has no one to handshake to return true; } // construct a vector of handshake obj for us as well as all chained members std::vector<BSONObj> handshakeObjs; replCoord->prepareReplSetUpdatePositionCommandHandshakes(txn, &handshakeObjs); LOG(1) << "handshaking upstream updater"; for (std::vector<BSONObj>::iterator it = handshakeObjs.begin(); it != handshakeObjs.end(); ++it) { BSONObj res; try { LOG(2) << "Sending to " << _connection.get()->toString() << " the replication " "handshake: " << *it; if (!_connection->runCommand("admin", *it, res)) { std::string errMsg = res["errmsg"].valuestrsafe(); massert(17447, "upstream updater is not supported by the member from which we" " are syncing, please update all nodes to 2.6 or later.", errMsg.find("no such cmd") == std::string::npos); log() << "replSet error while handshaking the upstream updater: " << errMsg; // sleep half a second if we are not in our sync source's config // TODO(dannenberg) after 2.8, remove the string comparison if (res["code"].numberInt() == ErrorCodes::NodeNotFound || errMsg.find("could not be found in replica set config while attempting " "to associate it with") != std::string::npos) { // black list sync target for 10 seconds and find a new one replCoord->blacklistSyncSource(_syncTarget, Date_t(curTimeMillis64() + 10*1000)); BackgroundSync::get()->clearSyncTarget(); } _resetConnection(); return false; } } catch (const DBException& e) { log() << "SyncSourceFeedback error sending handshake: " << e.what() << endl; _resetConnection(); return false; } } return true; }
Status ReplicationCoordinatorImpl::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) { Status result(ErrorCodes::InternalError, "didn't set status in prepareFreezeResponse"); CBHStatus cbh = _replExecutor->scheduleWork( stdx::bind(&TopologyCoordinator::prepareFreezeResponse, _topCoord.get(), stdx::placeholders::_1, Date_t(curTimeMillis64()), secs, resultObj, &result)); if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) { return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress"); } fassert(18641, cbh.getStatus()); _replExecutor->wait(cbh.getValue()); return result; }
Status ReplicationCoordinatorImpl::processHeartbeat(const BSONObj& cmdObj, BSONObjBuilder* resultObj) { Status result(ErrorCodes::InternalError, "didn't set status in prepareHeartbeatResponse"); StatusWith<ReplicationExecutor::CallbackHandle> cbh = _replExecutor->scheduleWork( stdx::bind(&TopologyCoordinator::prepareHeartbeatResponse, _topCoord.get(), stdx::placeholders::_1, Date_t(curTimeMillis64()), cmdObj, resultObj, &result)); if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) { return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress"); } fassert(18508, cbh.getStatus()); _replExecutor->wait(cbh.getValue()); return result; }
Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* response) { Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse"); CBHStatus cbh = _replExecutor->scheduleWork( stdx::bind(&TopologyCoordinator::prepareStatusResponse, _topCoord.get(), stdx::placeholders::_1, Date_t(curTimeMillis64()), time(0) - serverGlobalParams.started, _getLastOpApplied(), response, &result)); if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) { return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress"); } fassert(18640, cbh.getStatus()); _replExecutor->wait(cbh.getValue()); return result; }
Status ReplicationCoordinatorImpl::processHeartbeat(const ReplSetHeartbeatArgs& args, ReplSetHeartbeatResponse* response) { Status result(ErrorCodes::InternalError, "didn't set status in prepareHeartbeatResponse"); CBHStatus cbh = _replExecutor->scheduleWork( stdx::bind(&TopologyCoordinator::prepareHeartbeatResponse, _topCoord.get(), stdx::placeholders::_1, Date_t(curTimeMillis64()), args, _settings.ourSetName(), _getLastOpApplied(), response, &result)); if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) { return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress"); } fassert(18508, cbh.getStatus()); _replExecutor->wait(cbh.getValue()); return result; }
LogstreamBuilder::~LogstreamBuilder() { if (_os) { if (!_baseMessage.empty()) _baseMessage.push_back(' '); _baseMessage += _os->str(); MessageEventEphemeral message( curTimeMillis64(), _severity, _component, _contextName, _baseMessage); _domain->append(message); if (_tee) { _os->str(""); logger::MessageEventDetailsEncoder teeEncoder; teeEncoder.encode(message, *_os); _tee->write(_os->str()); } _os->str(""); if (isThreadOstreamCacheInitialized && threadOstreamCache.getMake()->vector().empty()) { threadOstreamCache.get()->mutableVector().push_back(_os); } else { delete _os; } } }
virtual BSONObj generateSection( const BSONElement& configElement ) const { BSONObjBuilder t; t.append( "totalTime" , (long long)(1000 * ( curTimeMillis64() - _started ) ) ); // SERVER-14978: Need to report the global lock statistics somehow // // t.append( "lockTime" , qlk.stats.getTimeLocked( 'W' ) ); // This returns the blocked lock states { BSONObjBuilder ttt( t.subobjStart( "currentQueue" ) ); LockerAggregator blocked(true); getGlobalEnvironment()->forEachOperationContext(&blocked); ttt.append("total", blocked.numReadLocked + blocked.numWriteLocked); ttt.append("readers", blocked.numReadLocked); ttt.append("writers", blocked.numWriteLocked); ttt.done(); } // This returns all the active clients (including those holding locks) { BSONObjBuilder ttt( t.subobjStart( "activeClients" ) ); LockerAggregator active(false); getGlobalEnvironment()->forEachOperationContext(&active); ttt.append("total", active.numReadLocked + active.numWriteLocked); ttt.append("readers", active.numReadLocked); ttt.append("writers", active.numWriteLocked); ttt.done(); } return t.obj(); }
bool RangeDeleter::deleteNow(OperationContext* txn, const std::string& ns, const BSONObj& min, const BSONObj& max, const BSONObj& shardKeyPattern, bool secondaryThrottle, string* errMsg) { if (stopRequested()) { *errMsg = "deleter is already stopped."; return false; } string dummy; if (errMsg == NULL) errMsg = &dummy; NSMinMax deleteRange(ns, min, max); { scoped_lock sl(_queueMutex); if (!canEnqueue_inlock(ns, min, max, errMsg)) { return false; } _deleteSet.insert(&deleteRange); // Note: count for pending deletes is an integral part of the shutdown story. // Therefore, to simplify things, there is no "pending" state for deletes in // deleteNow, the state transition is simply inProgress -> done. _deletesInProgress++; } set<CursorId> cursorsToWait; _env->getCursorIds(txn, ns, &cursorsToWait); long long checkIntervalMillis = 5; if (!cursorsToWait.empty()) { log() << "rangeDeleter waiting for " << cursorsToWait.size() << " cursors in " << ns << " to finish" << endl; } RangeDeleteEntry taskDetails(ns, min, max, shardKeyPattern, secondaryThrottle); taskDetails.stats.queueStartTS = jsTime(); Date_t timeSinceLastLog; for (; !cursorsToWait.empty(); sleepmillis(checkIntervalMillis)) { const unsigned long long timeNow = curTimeMillis64(); const unsigned long long elapsedTimeMillis = timeNow - taskDetails.stats.queueStartTS.millis; const unsigned long long lastLogMillis = timeNow - timeSinceLastLog.millis; if (elapsedTimeMillis > LogCursorsThresholdMillis && lastLogMillis > LogCursorsIntervalMillis) { timeSinceLastLog = jsTime(); logCursorsWaiting(ns, min, max, elapsedTimeMillis, cursorsToWait); } set<CursorId> cursorsNow; _env->getCursorIds(txn, ns, &cursorsNow); set<CursorId> cursorsLeft; std::set_intersection(cursorsToWait.begin(), cursorsToWait.end(), cursorsNow.begin(), cursorsNow.end(), std::inserter(cursorsLeft, cursorsLeft.end())); cursorsToWait.swap(cursorsLeft); if (stopRequested()) { *errMsg = "deleter was stopped."; scoped_lock sl(_queueMutex); _deleteSet.erase(&deleteRange); _deletesInProgress--; if (_deletesInProgress == 0) { _nothingInProgressCV.notify_one(); } return false; } if (checkIntervalMillis < MaxCurorCheckIntervalMillis) { checkIntervalMillis *= 2; } } taskDetails.stats.queueEndTS = jsTime(); ReplTime lastOp; taskDetails.stats.deleteStartTS = jsTime(); bool result = _env->deleteRange(txn, taskDetails, &taskDetails.stats.deletedDocCount, &lastOp, errMsg); taskDetails.stats.deleteEndTS = jsTime(); if (result) { taskDetails.stats.waitForReplStartTS = jsTime(); result = _env->waitForReplication(lastOp, DelWriteConcern, WaitForReplTimeoutSecs, errMsg); taskDetails.stats.waitForReplEndTS = jsTime(); } { scoped_lock sl(_queueMutex); _deleteSet.erase(&deleteRange); _deletesInProgress--; if (_deletesInProgress == 0) { _nothingInProgressCV.notify_one(); } } recordDelStats(new DeleteJobStats(taskDetails.stats)); return result; }
void OplogReader::connectToSyncSource(OperationContext* txn, OpTime lastOpTimeFetched, ReplicationCoordinator* replCoord) { const OpTime sentinel(Milliseconds(curTimeMillis64()).total_seconds(), 0); OpTime oldestOpTimeSeen = sentinel; invariant(conn() == NULL); while (true) { HostAndPort candidate = replCoord->chooseNewSyncSource(lastOpTimeFetched); if (candidate.empty()) { if (oldestOpTimeSeen == sentinel) { // If, in this invocation of connectToSyncSource(), we did not successfully // connect to any node ahead of us, // we apparently have no sync sources to connect to. // This situation is common; e.g. if there are no writes to the primary at // the moment. return; } // Connected to at least one member, but in all cases we were too stale to use them // as a sync source. log() << "replSet error RS102 too stale to catch up"; log() << "replSet our last optime : " << lastOpTimeFetched.toStringLong(); log() << "replSet oldest available is " << oldestOpTimeSeen.toStringLong(); log() << "replSet " "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember"; setMinValid(txn, oldestOpTimeSeen); bool worked = replCoord->setFollowerMode(MemberState::RS_RECOVERING); if (!worked) { warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) << ". Current state: " << replCoord->getMemberState(); } return; } if (!connect(candidate)) { LOG(2) << "replSet can't connect to " << candidate.toString() << " to read operations"; resetConnection(); replCoord->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 10 * 1000)); continue; } // Read the first (oldest) op and confirm that it's not newer than our last // fetched op. Otherwise, we have fallen off the back of that source's oplog. BSONObj remoteOldestOp(findOne(rsoplog, Query())); BSONElement tsElem(remoteOldestOp["ts"]); if (tsElem.type() != Timestamp) { // This member's got a bad op in its oplog. warning() << "oplog invalid format on node " << candidate.toString(); resetConnection(); replCoord->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 600 * 1000)); continue; } OpTime remoteOldOpTime = tsElem._opTime(); if (!lastOpTimeFetched.isNull() && lastOpTimeFetched < remoteOldOpTime) { // We're too stale to use this sync source. resetConnection(); replCoord->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 600 * 1000)); if (oldestOpTimeSeen > remoteOldOpTime) { warning() << "we are too stale to use " << candidate.toString() << " as a sync source"; oldestOpTimeSeen = remoteOldOpTime; } continue; } // Got a valid sync source. return; } // while (true) }
void OplogReader::connectToSyncSource(OperationContext* txn, const OpTime& lastOpTimeFetched, ReplicationCoordinator* replCoord) { const Timestamp sentinelTimestamp(duration_cast<Seconds>(Milliseconds(curTimeMillis64())), 0); const OpTime sentinel(sentinelTimestamp, std::numeric_limits<long long>::max()); OpTime oldestOpTimeSeen = sentinel; invariant(conn() == NULL); while (true) { HostAndPort candidate = replCoord->chooseNewSyncSource(lastOpTimeFetched.getTimestamp()); if (candidate.empty()) { if (oldestOpTimeSeen == sentinel) { // If, in this invocation of connectToSyncSource(), we did not successfully // connect to any node ahead of us, // we apparently have no sync sources to connect to. // This situation is common; e.g. if there are no writes to the primary at // the moment. return; } // Connected to at least one member, but in all cases we were too stale to use them // as a sync source. error() << "too stale to catch up"; log() << "our last optime : " << lastOpTimeFetched; log() << "oldest available is " << oldestOpTimeSeen; log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember"; setMinValid(txn, oldestOpTimeSeen); bool worked = replCoord->setFollowerMode(MemberState::RS_RECOVERING); if (!worked) { warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) << ". Current state: " << replCoord->getMemberState(); } return; } if (!connect(candidate)) { LOG(2) << "can't connect to " << candidate.toString() << " to read operations"; resetConnection(); replCoord->blacklistSyncSource(candidate, Date_t::now() + Seconds(10)); continue; } // Read the first (oldest) op and confirm that it's not newer than our last // fetched op. Otherwise, we have fallen off the back of that source's oplog. BSONObj remoteOldestOp(findOne(rsOplogName.c_str(), Query())); OpTime remoteOldOpTime = fassertStatusOK(28776, OpTime::parseFromBSON(remoteOldestOp)); // remoteOldOpTime may come from a very old config, so we cannot compare their terms. if (!lastOpTimeFetched.isNull() && lastOpTimeFetched.getTimestamp() < remoteOldOpTime.getTimestamp()) { // We're too stale to use this sync source. resetConnection(); replCoord->blacklistSyncSource(candidate, Date_t::now() + Minutes(1)); if (oldestOpTimeSeen.getTimestamp() > remoteOldOpTime.getTimestamp()) { warning() << "we are too stale to use " << candidate.toString() << " as a sync source"; oldestOpTimeSeen = remoteOldOpTime; } continue; } // Got a valid sync source. return; } // while (true) }
// Returns false when request includes 'end' void assembleResponse( Message &m, DbResponse &dbresponse, const HostAndPort& remote ) { // before we lock... int op = m.operation(); bool isCommand = false; const char *ns = m.singleData()->_data + 4; if ( op == dbQuery ) { if( strstr(ns, ".$cmd") ) { isCommand = true; opwrite(m); if( strstr(ns, ".$cmd.sys.") ) { if( strstr(ns, "$cmd.sys.inprog") ) { inProgCmd(m, dbresponse); return; } if( strstr(ns, "$cmd.sys.killop") ) { killOp(m, dbresponse); return; } if( strstr(ns, "$cmd.sys.unlock") ) { unlockFsync(ns, m, dbresponse); return; } } } else { opread(m); } } else if( op == dbGetMore ) { opread(m); } else { opwrite(m); } globalOpCounters.gotOp( op , isCommand ); Client& c = cc(); auto_ptr<CurOp> nestedOp; CurOp* currentOpP = c.curop(); if ( currentOpP->active() ) { nestedOp.reset( new CurOp( &c , currentOpP ) ); currentOpP = nestedOp.get(); } CurOp& currentOp = *currentOpP; currentOp.reset(remote,op); OpDebug& debug = currentOp.debug(); debug.op = op; int logThreshold = cmdLine.slowMS; bool log = logLevel >= 1; if ( op == dbQuery ) { if ( handlePossibleShardedMessage( m , &dbresponse ) ) return; receivedQuery(c , dbresponse, m ); } else if ( op == dbGetMore ) { if ( ! receivedGetMore(dbresponse, m, currentOp) ) log = true; } else if ( op == dbMsg ) { // deprecated - replaced by commands char *p = m.singleData()->_data; int len = strlen(p); if ( len > 400 ) out() << curTimeMillis64() % 10000 << " long msg received, len:" << len << endl; Message *resp = new Message(); if ( strcmp( "end" , p ) == 0 ) resp->setData( opReply , "dbMsg end no longer supported" ); else resp->setData( opReply , "i am fine - dbMsg deprecated"); dbresponse.response = resp; dbresponse.responseTo = m.header()->id; } else { const char *ns = m.singleData()->_data + 4; char cl[256]; nsToDatabase(ns, cl); if( ! c.getAuthenticationInfo()->isAuthorized(cl) ) { uassert_nothrow("unauthorized"); } else { try { if ( op == dbInsert ) { receivedInsert(m, currentOp); } else if ( op == dbUpdate ) { receivedUpdate(m, currentOp); } else if ( op == dbDelete ) { receivedDelete(m, currentOp); } else if ( op == dbKillCursors ) { currentOp.ensureStarted(); logThreshold = 10; receivedKillCursors(m); } else { mongo::log() << " operation isn't supported: " << op << endl; currentOp.done(); log = true; } } catch ( UserException& ue ) { tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << ue.toString() << endl; debug.exceptionInfo = ue.getInfo(); } catch ( AssertionException& e ) { tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << e.toString() << endl; debug.exceptionInfo = e.getInfo(); log = true; } } } currentOp.ensureStarted(); currentOp.done(); int ms = currentOp.totalTimeMillis(); //DEV log = true; if ( log || ms > logThreshold ) { if( logLevel < 3 && op == dbGetMore && strstr(ns, ".oplog.") && ms < 4300 && !log ) { /* it's normal for getMore on the oplog to be slow because of use of awaitdata flag. */ } else { debug.executionTime = ms; mongo::tlog() << debug << endl; } } if ( currentOp.shouldDBProfile( ms ) ) { // performance profiling is on if ( dbMutex.getState() < 0 ) { mongo::log(1) << "note: not profiling because recursive read lock" << endl; } else { writelock lk; if ( dbHolder.isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ) { Client::Context cx( currentOp.getNS() ); profile(c , currentOp ); } else { mongo::log() << "note: not profiling because db went away - probably a close on: " << currentOp.getNS() << endl; } } } debug.reset(); } /* assembleResponse() */
void doTTLForDB( const string& dbName ) { //check isMaster before becoming god bool isMaster = isMasterNs( dbName.c_str() ); Client::GodScope god; vector<BSONObj> indexes; { auto_ptr<DBClientCursor> cursor = db.query( dbName + ".system.indexes" , BSON( secondsExpireField << BSON( "$exists" << true ) ) , 0 , /* default nToReturn */ 0 , /* default nToSkip */ 0 , /* default fieldsToReturn */ QueryOption_SlaveOk ); /* perform on secondaries too */ if ( cursor.get() ) { while ( cursor->more() ) { indexes.push_back( cursor->next().getOwned() ); } } } for ( unsigned i=0; i<indexes.size(); i++ ) { BSONObj idx = indexes[i]; BSONObj key = idx["key"].Obj(); if ( key.nFields() != 1 ) { error() << "key for ttl index can only have 1 field" << endl; continue; } BSONObj query; { BSONObjBuilder b; b.appendDate( "$lt" , curTimeMillis64() - ( 1000 * idx[secondsExpireField].numberLong() ) ); query = BSON( key.firstElement().fieldName() << b.obj() ); } LOG(1) << "TTL: " << key << " \t " << query << endl; long long n = 0; { string ns = idx["ns"].String(); Client::WriteContext ctx( ns ); NamespaceDetails* nsd = nsdetails( ns ); if ( ! nsd ) { // collection was dropped continue; } if ( nsd->setUserFlag( NamespaceDetails::Flag_UsePowerOf2Sizes ) ) { nsd->syncUserFlags( ns ); } // only do deletes if on master if ( ! isMaster ) { continue; } n = deleteObjects( ns.c_str() , query , false , true ); ttlDeletedDocuments.increment( n ); } LOG(1) << "\tTTL deleted: " << n << endl; } }
// Returns false when request includes 'end' void assembleResponse( Message &m, DbResponse &dbresponse, const HostAndPort& remote ) { // before we lock... int op = m.operation(); bool isCommand = false; const char *ns = m.singleData()->_data + 4; if ( op == dbQuery ) { if( strstr(ns, ".$cmd") ) { isCommand = true; opwrite(m); if( strstr(ns, ".$cmd.sys.") ) { if( strstr(ns, "$cmd.sys.inprog") ) { inProgCmd(m, dbresponse); return; } if( strstr(ns, "$cmd.sys.killop") ) { killOp(m, dbresponse); return; } if( strstr(ns, "$cmd.sys.unlock") ) { unlockFsync(ns, m, dbresponse); return; } } } else { opread(m); } } else if( op == dbGetMore ) { opread(m); } else { opwrite(m); } globalOpCounters.gotOp( op , isCommand ); Client& c = cc(); c.getAuthorizationManager()->startRequest(); auto_ptr<CurOp> nestedOp; CurOp* currentOpP = c.curop(); if ( currentOpP->active() ) { nestedOp.reset( new CurOp( &c , currentOpP ) ); currentOpP = nestedOp.get(); } else { c.newTopLevelRequest(); } CurOp& currentOp = *currentOpP; currentOp.reset(remote,op); OpDebug& debug = currentOp.debug(); debug.op = op; long long logThreshold = cmdLine.slowMS; bool shouldLog = logLevel >= 1; if ( op == dbQuery ) { if ( handlePossibleShardedMessage( m , &dbresponse ) ) return; receivedQuery(c , dbresponse, m ); } else if ( op == dbGetMore ) { if ( ! receivedGetMore(dbresponse, m, currentOp) ) shouldLog = true; } else if ( op == dbMsg ) { // deprecated - replaced by commands char *p = m.singleData()->_data; int len = strlen(p); if ( len > 400 ) out() << curTimeMillis64() % 10000 << " long msg received, len:" << len << endl; Message *resp = new Message(); if ( strcmp( "end" , p ) == 0 ) resp->setData( opReply , "dbMsg end no longer supported" ); else resp->setData( opReply , "i am fine - dbMsg deprecated"); dbresponse.response = resp; dbresponse.responseTo = m.header()->id; } else { try { const NamespaceString nsString( ns ); // The following operations all require authorization. // dbInsert, dbUpdate and dbDelete can be easily pre-authorized, // here, but dbKillCursors cannot. if ( op == dbKillCursors ) { currentOp.ensureStarted(); logThreshold = 10; receivedKillCursors(m); } else if ( !nsString.isValid() ) { // Only killCursors doesn't care about namespaces uassert( 16257, str::stream() << "Invalid ns [" << ns << "]", false ); } else if ( op == dbInsert ) { receivedInsert(m, currentOp); } else if ( op == dbUpdate ) { receivedUpdate(m, currentOp); } else if ( op == dbDelete ) { receivedDelete(m, currentOp); } else { mongo::log() << " operation isn't supported: " << op << endl; currentOp.done(); shouldLog = true; } } catch ( UserException& ue ) { tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << ue.toString() << endl; debug.exceptionInfo = ue.getInfo(); } catch ( AssertionException& e ) { tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << e.toString() << endl; debug.exceptionInfo = e.getInfo(); shouldLog = true; } } currentOp.ensureStarted(); currentOp.done(); debug.executionTime = currentOp.totalTimeMillis(); logThreshold += currentOp.getExpectedLatencyMs(); if ( shouldLog || debug.executionTime > logThreshold ) { mongo::tlog() << debug.report( currentOp ) << endl; } if ( currentOp.shouldDBProfile( debug.executionTime ) ) { // performance profiling is on if ( Lock::isReadLocked() ) { LOG(1) << "note: not profiling because recursive read lock" << endl; } else if ( lockedForWriting() ) { LOG(1) << "note: not profiling because doing fsync+lock" << endl; } else { profile(c, op, currentOp); } } debug.recordStats(); debug.reset(); } /* assembleResponse() */
void TxnContext::commit(int flags) { verify(!_retired); bool gotGTID = false; GTID gtid; // do this in case we are writing the first entry // we put something in that can be distinguished from // an initialized GTID that has never been touched gtid.inc_primary(); // handle work related to logging of transaction for replication // this piece must be done before the _txn.commit try { if (hasParent()) { // This does something // a bit dangerous in that it may spill parent's stuff // with this child transaction that is committing. If something // goes wrong and this child transaction aborts, we will miss // some ops // // This ought to be ok, because we are in this try/catch block // where if something goes wrong, we will crash the server. // NOTHING better go wrong here, unless under bad rare // circumstances _txnOps.finishChildCommit(); } else if (!_txnOps.empty()) { uint64_t timestamp = 0; uint64_t hash = 0; if (!_initiatingRS) { dassert(txnGTIDManager); txnGTIDManager->getGTIDForPrimary(>id, ×tamp, &hash); } else { dassert(!txnGTIDManager); timestamp = curTimeMillis64(); } gotGTID = true; // In this case, the transaction we are committing has // no parent, so we must write the transaction's // logged operations to the opLog, as part of this transaction dassert(logTxnOpsForReplication()); dassert(_logTxnToOplog); _txnOps.rootCommit(gtid, timestamp, hash); } // handle work related to logging of transaction for chunk migrations if (!_txnOpsForSharding.empty()) { if (hasParent()) { transferOpsForShardingToParent(); } else { writeTxnOpsToMigrateLog(); } } _clientCursorRollback.preComplete(); _txn.commit(flags); // if the commit of this transaction got a GTID, then notify // the GTIDManager that the commit is now done. if (gotGTID && !_initiatingRS) { dassert(txnGTIDManager); // save the GTID for the client so that // getLastError will know what GTID slaves // need to be caught up to. cc().setLastOp(gtid); txnGTIDManager->noteLiveGTIDDone(gtid); } } catch (std::exception &e) { log() << "exception during critical section of txn commit, aborting system: " << e.what() << endl; printStackTrace(); logflush(); ::abort(); } // These rollback items must be processed after the ydb transaction completes. if (hasParent()) { _cappedRollback.transfer(_parent->_cappedRollback); _nsIndexRollback.transfer(_parent->_nsIndexRollback); } else { _cappedRollback.commit(); _nsIndexRollback.commit(); } _retired = true; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { _runCalled = true; long long start = Listener::getElapsedTimeMillis(); BSONObjBuilder timeBuilder(256); const ClientBasic* myClientBasic = ClientBasic::getCurrent(); AuthorizationManager* authManager = myClientBasic->getAuthorizationManager(); // --- basic fields that are global result.append("host", prettyHostName() ); result.append("version", versionString); result.append("process",cmdLine.binaryName); result.append("pid", (int)getpid()); result.append("uptime",(double) (time(0)-cmdLine.started)); result.append("uptimeMillis", (long long)(curTimeMillis64()-_started)); result.append("uptimeEstimate",(double) (start/1000)); result.appendDate( "localTime" , jsTime() ); timeBuilder.appendNumber( "after basic" , Listener::getElapsedTimeMillis() - start ); // --- all sections for ( SectionMap::const_iterator i = _sections->begin(); i != _sections->end(); ++i ) { ServerStatusSection* section = i->second; std::vector<Privilege> requiredPrivileges; section->addRequiredPrivileges(&requiredPrivileges); if (!authManager->checkAuthForPrivileges(requiredPrivileges).isOK()) continue; bool include = section->includeByDefault(); BSONElement e = cmdObj[section->getSectionName()]; if ( e.type() ) { include = e.trueValue(); } if ( ! include ) continue; BSONObj data = section->generateSection(e); if ( data.isEmpty() ) continue; result.append( section->getSectionName(), data ); timeBuilder.appendNumber( static_cast<string>(str::stream() << "after " << section->getSectionName()), Listener::getElapsedTimeMillis() - start ); } // --- counters if ( MetricTree::theMetricTree ) { MetricTree::theMetricTree->appendTo( result ); } // --- some hard coded global things hard to pull out { RamLog* rl = RamLog::get( "warnings" ); massert(15880, "no ram log for warnings?" , rl); if (rl->lastWrite() >= time(0)-(10*60)){ // only show warnings from last 10 minutes vector<const char*> lines; rl->get( lines ); BSONArrayBuilder arr( result.subarrayStart( "warnings" ) ); for ( unsigned i=std::max(0,(int)lines.size()-10); i<lines.size(); i++ ) arr.append( lines[i] ); arr.done(); } } timeBuilder.appendNumber( "at end" , Listener::getElapsedTimeMillis() - start ); if ( Listener::getElapsedTimeMillis() - start > 1000 ) { BSONObj t = timeBuilder.obj(); log() << "serverStatus was very slow: " << t << endl; result.append( "timing" , t ); } return true; }
CmdServerStatus() : Command("serverStatus", true), _started( curTimeMillis64() ), _runCalled( false ) { }
Date_t NetworkInterfaceImpl::now() { return curTimeMillis64(); }
GlobalLockServerStatusSection() : ServerStatusSection( "globalLock" ){ _started = curTimeMillis64(); }
void BackgroundSync::connectOplogReader(OperationContext* txn, ReplicationCoordinatorImpl* replCoordImpl, OplogReader* reader) { OpTime lastOpTimeFetched; { boost::unique_lock<boost::mutex> lock(_mutex); lastOpTimeFetched = _lastOpTimeFetched; } Date_t now(curTimeMillis64()); OpTime oldestOpTimeSeen(now,0); while (true) { HostAndPort candidate = replCoordImpl->chooseNewSyncSource(); if (candidate.empty()) { if (oldestOpTimeSeen == OpTime(now,0)) { // If, in this invocation of connectOplogReader(), we did not successfully // connect to any node ahead of us, // we apparently have no sync sources to connect to. // This situation is common; e.g. if there are no writes to the primary at // the moment. return; } // Connected to at least one member, but in all cases we were too stale to use them // as a sync source. log() << "replSet error RS102 too stale to catch up" << rsLog; log() << "replSet our last optime : " << lastOpTimeFetched.toStringLong() << rsLog; log() << "replSet oldest available is " << oldestOpTimeSeen.toStringLong() << rsLog; log() << "replSet " "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember" << rsLog; sethbmsg("error RS102 too stale to catch up"); theReplSet->setMinValid(txn, oldestOpTimeSeen); replCoordImpl->setFollowerMode(MemberState::RS_RECOVERING); return; } if (!reader->connect(candidate)) { LOG(2) << "replSet can't connect to " << candidate.toString() << " to read operations" << rsLog; reader->resetConnection(); replCoordImpl->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 10*1000)); continue; } // Read the first (oldest) op and confirm that it's not newer than our last // fetched op. Otherwise, we have fallen off the back of that source's oplog. BSONObj remoteOldestOp(reader->findOne(rsoplog, Query())); BSONElement tsElem(remoteOldestOp["ts"]); if (tsElem.type() != Timestamp) { // This member's got a bad op in its oplog. warning() << "oplog invalid format on node " << candidate.toString(); reader->resetConnection(); replCoordImpl->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 600*1000)); continue; } OpTime remoteOldOpTime = tsElem._opTime(); if (lastOpTimeFetched < remoteOldOpTime) { // We're too stale to use this sync source. reader->resetConnection(); replCoordImpl->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 600*1000)); if (oldestOpTimeSeen > remoteOldOpTime) { warning() << "we are too stale to use " << candidate.toString() << " as a sync source"; oldestOpTimeSeen = remoteOldOpTime; } continue; } // Got a valid sync source. return; } // while (true) }
void ReplicationCoordinatorImpl::doMemberHeartbeat(ReplicationExecutor::CallbackData cbData, const HostAndPort& hap) { if (cbData.status == ErrorCodes::CallbackCanceled) { return; } // Are we blind, or do we have a failpoint setup to ignore this member? bool dontHeartbeatMember = false; // TODO: replSetBlind should be here as the default MONGO_FAIL_POINT_BLOCK(rsHeartbeatRequestNoopByMember, member) { const StringData& stopMember = member.getData()["member"].valueStringData(); HostAndPort ignoreHAP; Status status = ignoreHAP.initialize(stopMember); // Ignore if (status.isOK()) { if (hap == ignoreHAP) { dontHeartbeatMember = true; } } else { log() << "replset: Bad member for rsHeartbeatRequestNoopByMember failpoint " << member.getData() << ". 'member' failed to parse into HostAndPort -- " << status; } } if (dontHeartbeatMember) { // Don't issue real heartbeats, just call start again after the timeout. ReplicationExecutor::CallbackFn restartCB = stdx::bind( &ReplicationCoordinatorImpl::doMemberHeartbeat, this, stdx::placeholders::_1, hap); CBHStatus status = _replExecutor->scheduleWorkAt( Date_t(curTimeMillis64() + heartbeatFrequencyMillis), restartCB); if (!status.isOK()) { log() << "replset: aborting heartbeats for " << hap << " due to scheduling error" << " -- "<< status; return; } _trackHeartbeatHandle(status.getValue()); return; } // Compose heartbeat command message BSONObj hbCommandBSON; { // take lock to build request boost::lock_guard<boost::mutex> lock(_mutex); BSONObjBuilder cmdBuilder; const MemberConfig me = _rsConfig.getMemberAt(_thisMembersConfigIndex); cmdBuilder.append("replSetHeartbeat", _rsConfig.getReplSetName()); cmdBuilder.append("v", _rsConfig.getConfigVersion()); cmdBuilder.append("pv", 1); cmdBuilder.append("checkEmpty", false); cmdBuilder.append("from", me.getHostAndPort().toString()); cmdBuilder.append("fromId", me.getId()); hbCommandBSON = cmdBuilder.done(); } const ReplicationExecutor::RemoteCommandRequest request(hap, "admin", hbCommandBSON); ReplicationExecutor::RemoteCommandCallbackFn callback = stdx::bind( &ReplicationCoordinatorImpl::_handleHeartbeatResponse, this, stdx::placeholders::_1, hap, curTimeMillis64(), heartbeatRetries); CBHStatus status = _replExecutor->scheduleRemoteCommand(request, callback); if (!status.isOK()) { log() << "replset: aborting heartbeats for " << hap << " due to scheduling error" << status; return; } _trackHeartbeatHandle(status.getValue()); }
void RangeDeleter::doWork() { _env->initThread(); while (!inShutdown() && !stopRequested()) { string errMsg; RangeDeleteEntry* nextTask = NULL; { scoped_lock sl(_queueMutex); while (_taskQueue.empty()) { _taskQueueNotEmptyCV.timed_wait( sl.boost(), duration::milliseconds(NotEmptyTimeoutMillis)); if (stopRequested()) { log() << "stopping range deleter worker" << endl; return; } if (_taskQueue.empty()) { // Try to check if some deletes are ready and move them to the // ready queue. TaskList::iterator iter = _notReadyQueue.begin(); while (iter != _notReadyQueue.end()) { RangeDeleteEntry* entry = *iter; set<CursorId> cursorsNow; { boost::scoped_ptr<OperationContext> txn(getGlobalEnvironment()->newOpCtx()); _env->getCursorIds(txn.get(), entry->ns, &cursorsNow); } set<CursorId> cursorsLeft; std::set_intersection(entry->cursorsToWait.begin(), entry->cursorsToWait.end(), cursorsNow.begin(), cursorsNow.end(), std::inserter(cursorsLeft, cursorsLeft.end())); entry->cursorsToWait.swap(cursorsLeft); if (entry->cursorsToWait.empty()) { (*iter)->stats.queueEndTS = jsTime(); _taskQueue.push_back(*iter); _taskQueueNotEmptyCV.notify_one(); iter = _notReadyQueue.erase(iter); } else { const unsigned long long int elapsedMillis = entry->stats.queueStartTS.millis - curTimeMillis64(); if ( elapsedMillis > LogCursorsThresholdMillis && entry->timeSinceLastLog.millis > LogCursorsIntervalMillis) { entry->timeSinceLastLog = jsTime(); logCursorsWaiting(entry->ns, entry->min, entry->max, elapsedMillis, entry->cursorsToWait); } ++iter; } } } } if (stopRequested()) { log() << "stopping range deleter worker" << endl; return; } nextTask = _taskQueue.front(); _taskQueue.pop_front(); _deletesInProgress++; } { boost::scoped_ptr<OperationContext> txn(getGlobalEnvironment()->newOpCtx()); ReplTime lastOp; nextTask->stats.deleteStartTS = jsTime(); bool delResult = _env->deleteRange(txn.get(), *nextTask, &nextTask->stats.deletedDocCount, &lastOp, &errMsg); nextTask->stats.deleteEndTS = jsTime(); if (delResult) { nextTask->stats.waitForReplStartTS = jsTime(); if (!_env->waitForReplication(lastOp, DelWriteConcern, WaitForReplTimeoutSecs, &errMsg)) { warning() << "Error encountered while waiting for replication: " << errMsg << endl; } nextTask->stats.waitForReplEndTS = jsTime(); } else { warning() << "Error encountered while trying to delete range: " << errMsg << endl; } } { scoped_lock sl(_queueMutex); NSMinMax setEntry(nextTask->ns, nextTask->min, nextTask->max); deletePtrElement(&_deleteSet, &setEntry); _deletesInProgress--; if (nextTask->notifyDone) { nextTask->notifyDone->notifyOne(); } } recordDelStats(new DeleteJobStats(nextTask->stats)); delete nextTask; nextTask = NULL; } }