void run() { int iterations = 1000*1000; while(iterations--){ jsTime(); } }
void DataFileSync::_flushed(int ms) { _flushes++; _total_time += ms; _last_time = ms; _last = jsTime(); }
void RangeDeleter::doWork() { _env->initThread(); while (!inShutdown() && !stopRequested()) { string errMsg; boost::scoped_ptr<OperationContext> txn(getGlobalServiceContext()->newOpCtx()); RangeDeleteEntry* nextTask = NULL; { boost::unique_lock<boost::mutex> sl(_queueMutex); while (_taskQueue.empty()) { _taskQueueNotEmptyCV.timed_wait( sl, duration::milliseconds(kNotEmptyTimeoutMillis)); if (stopRequested()) { log() << "stopping range deleter worker" << endl; return; } if (_taskQueue.empty()) { // Try to check if some deletes are ready and move them to the // ready queue. TaskList::iterator iter = _notReadyQueue.begin(); while (iter != _notReadyQueue.end()) { RangeDeleteEntry* entry = *iter; set<CursorId> cursorsNow; { if (entry->options.waitForOpenCursors) { _env->getCursorIds(txn.get(), entry->options.range.ns, &cursorsNow); } } set<CursorId> cursorsLeft; std::set_intersection(entry->cursorsToWait.begin(), entry->cursorsToWait.end(), cursorsNow.begin(), cursorsNow.end(), std::inserter(cursorsLeft, cursorsLeft.end())); entry->cursorsToWait.swap(cursorsLeft); if (entry->cursorsToWait.empty()) { (*iter)->stats.queueEndTS = jsTime(); _taskQueue.push_back(*iter); _taskQueueNotEmptyCV.notify_one(); iter = _notReadyQueue.erase(iter); } else { logCursorsWaiting(entry); ++iter; } } } } if (stopRequested()) { log() << "stopping range deleter worker" << endl; return; } nextTask = _taskQueue.front(); _taskQueue.pop_front(); _deletesInProgress++; } { nextTask->stats.deleteStartTS = jsTime(); bool delResult = _env->deleteRange(txn.get(), *nextTask, &nextTask->stats.deletedDocCount, &errMsg); nextTask->stats.deleteEndTS = jsTime(); if (delResult) { nextTask->stats.waitForReplStartTS = jsTime(); if (!_waitForMajority(txn.get(), &errMsg)) { warning() << "Error encountered while waiting for replication: " << errMsg; } nextTask->stats.waitForReplEndTS = jsTime(); } else { warning() << "Error encountered while trying to delete range: " << errMsg << endl; } } { boost::lock_guard<boost::mutex> sl(_queueMutex); NSMinMax setEntry(nextTask->options.range.ns, nextTask->options.range.minKey, nextTask->options.range.maxKey); deletePtrElement(&_deleteSet, &setEntry); _deletesInProgress--; if (nextTask->notifyDone) { nextTask->notifyDone->notifyOne(); } } recordDelStats(new DeleteJobStats(nextTask->stats)); delete nextTask; nextTask = NULL; } }
void _flushed(int ms) { _flushes++; _total_time += ms; _last_time = ms; _last = jsTime(); }
// Semantics of this method are basically that if the lock cannot be acquired, returns false, // can be retried. If the lock should not be tried again (some unexpected error), // a LockException is thrown. bool DistributedLock::lock_try(const OID& lockID, const string& why, BSONObj* other, double timeout) { // This should always be true, if not, we are using the lock incorrectly. verify(_name != ""); auto lockTimeout = _lockTimeout; MONGO_FAIL_POINT_BLOCK(setSCCCDistLockTimeout, customTimeout) { const BSONObj& data = customTimeout.getData(); lockTimeout = data["timeoutMs"].numberInt(); } LOG(logLvl) << "trying to acquire new distributed lock for " << _name << " on " << _conn << " ( lock timeout : " << lockTimeout << ", ping interval : " << _lockPing << ", process : " << _processId << " )" << endl; // write to dummy if 'other' is null BSONObj dummyOther; if (other == NULL) other = &dummyOther; ScopedDbConnection conn(_conn.toString(), timeout); BSONObjBuilder queryBuilder; queryBuilder.append(LocksType::name(), _name); queryBuilder.append(LocksType::state(), LocksType::UNLOCKED); { // make sure its there so we can use simple update logic below BSONObj o = conn->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))).getOwned(); // Case 1: No locks if (o.isEmpty()) { try { LOG(logLvl) << "inserting initial doc in " << LocksType::ConfigNS << " for lock " << _name << endl; conn->insert(LocksType::ConfigNS, BSON(LocksType::name(_name) << LocksType::state(LocksType::UNLOCKED) << LocksType::who("") << LocksType::lockID(OID()))); } catch (UserException& e) { warning() << "could not insert initial doc for distributed lock " << _name << causedBy(e) << endl; } } // Case 2: A set lock that we might be able to force else if (o[LocksType::state()].numberInt() > LocksType::UNLOCKED) { string lockName = o[LocksType::name()].String() + string("/") + o[LocksType::process()].String(); BSONObj lastPing = conn->findOne( LockpingsType::ConfigNS, o[LocksType::process()].wrap(LockpingsType::process())); if (lastPing.isEmpty()) { LOG(logLvl) << "empty ping found for process in lock '" << lockName << "'" << endl; // TODO: Using 0 as a "no time found" value Will fail if dates roll over, but then, // so will a lot. lastPing = BSON(LockpingsType::process(o[LocksType::process()].String()) << LockpingsType::ping(Date_t())); } unsigned long long elapsed = 0; unsigned long long takeover = lockTimeout; DistLockPingInfo lastPingEntry = getLastPing(); LOG(logLvl) << "checking last ping for lock '" << lockName << "' against process " << lastPingEntry.processId << " and ping " << lastPingEntry.lastPing; try { Date_t remote = remoteTime(_conn); auto pingDocProcessId = lastPing[LockpingsType::process()].String(); auto pingDocPingValue = lastPing[LockpingsType::ping()].Date(); // Timeout the elapsed time using comparisons of remote clock // For non-finalized locks, timeout 15 minutes since last seen (ts) // For finalized locks, timeout 15 minutes since last ping bool recPingChange = o[LocksType::state()].numberInt() == LocksType::LOCKED && (lastPingEntry.processId != pingDocProcessId || lastPingEntry.lastPing != pingDocPingValue); bool recTSChange = lastPingEntry.lockSessionId != o[LocksType::lockID()].OID(); if (recPingChange || recTSChange) { // If the ping has changed since we last checked, mark the current date and time setLastPing(DistLockPingInfo(pingDocProcessId, pingDocPingValue, remote, o[LocksType::lockID()].OID(), OID())); } else { // GOTCHA! Due to network issues, it is possible that the current time // is less than the remote time. We *have* to check this here, otherwise // we overflow and our lock breaks. if (lastPingEntry.configLocalTime >= remote) elapsed = 0; else elapsed = durationCount<Milliseconds>(remote - lastPingEntry.configLocalTime); } } catch (LockException& e) { // Remote server cannot be found / is not responsive warning() << "Could not get remote time from " << _conn << causedBy(e); // If our config server is having issues, forget all the pings until we can see it // again resetLastPing(); } if (elapsed <= takeover) { LOG(1) << "could not force lock '" << lockName << "' because elapsed time " << elapsed << " <= takeover time " << takeover; *other = o; other->getOwned(); conn.done(); return false; } LOG(0) << "forcing lock '" << lockName << "' because elapsed time " << elapsed << " > takeover time " << takeover; if (elapsed > takeover) { // Lock may forced, reset our timer if succeeds or fails // Ensures that another timeout must happen if something borks up here, and resets // our pristine ping state if acquired. resetLastPing(); try { // Check the clock skew again. If we check this before we get a lock // and after the lock times out, we can be pretty sure the time is // increasing at the same rate on all servers and therefore our // timeout is accurate if (isRemoteTimeSkewed()) { string msg(str::stream() << "remote time in cluster " << _conn.toString() << " is now skewed, cannot force lock."); throw LockException(msg, ErrorCodes::DistributedClockSkewed); } // Make sure we break the lock with the correct "ts" (OID) value, otherwise // we can overwrite a new lock inserted in the meantime. conn->update(LocksType::ConfigNS, BSON(LocksType::name(_name) << LocksType::state() << o[LocksType::state()].numberInt() << LocksType::lockID(o[LocksType::lockID()].OID())), BSON("$set" << BSON(LocksType::state(LocksType::UNLOCKED)))); BSONObj err = conn->getLastErrorDetailed(); string errMsg = DBClientWithCommands::getLastErrorString(err); // TODO: Clean up all the extra code to exit this method, probably with a // refactor if (!errMsg.empty() || !err["n"].type() || err["n"].numberInt() < 1) { logErrMsgOrWarn( "Could not force lock", lockName, errMsg, "(another force won"); *other = o; other->getOwned(); conn.done(); return false; } } catch (UpdateNotTheSame&) { // Ok to continue since we know we forced at least one lock document, and all // lock docs are required for a lock to be held. warning() << "lock forcing " << lockName << " inconsistent" << endl; } catch (const LockException&) { // Let the exception go up and don't repackage the exception. throw; } catch (std::exception& e) { conn.done(); string msg(str::stream() << "exception forcing distributed lock " << lockName << causedBy(e)); throw LockException(msg, 13660); } } else { // Not strictly necessary, but helpful for small timeouts where thread // scheduling is significant. This ensures that two attempts are still // required for a force if not acquired, and resets our state if we // are acquired. resetLastPing(); // Test that the lock is held by trying to update the finalized state of the lock to // the same state if it does not update or does not update on all servers, we can't // re-enter. try { // Test the lock with the correct "ts" (OID) value conn->update(LocksType::ConfigNS, BSON(LocksType::name(_name) << LocksType::state(LocksType::LOCKED) << LocksType::lockID(o[LocksType::lockID()].OID())), BSON("$set" << BSON(LocksType::state(LocksType::LOCKED)))); BSONObj err = conn->getLastErrorDetailed(); string errMsg = DBClientWithCommands::getLastErrorString(err); // TODO: Clean up all the extra code to exit this method, probably with a // refactor if (!errMsg.empty() || !err["n"].type() || err["n"].numberInt() < 1) { logErrMsgOrWarn( "Could not re-enter lock", lockName, errMsg, "(not sure lock is held"); *other = o; other->getOwned(); conn.done(); return false; } } catch (UpdateNotTheSame&) { // NOT ok to continue since our lock isn't held by all servers, so isn't valid. warning() << "inconsistent state re-entering lock, lock " << lockName << " not held" << endl; *other = o; other->getOwned(); conn.done(); return false; } catch (std::exception& e) { conn.done(); string msg(str::stream() << "exception re-entering distributed lock " << lockName << causedBy(e)); throw LockException(msg, 13660); } LOG(logLvl - 1) << "re-entered distributed lock '" << lockName << "'" << endl; *other = o.getOwned(); conn.done(); return true; } LOG(logLvl - 1) << "lock '" << lockName << "' successfully forced" << endl; // We don't need the ts value in the query, since we will only ever replace locks with // state=0. } // Case 3: We have an expired lock else if (o[LocksType::lockID()].type()) { queryBuilder.append(o[LocksType::lockID()]); } } // Always reset our ping if we're trying to get a lock, since getting a lock implies the lock // state is open and no locks need to be forced. If anything goes wrong, we don't want to // remember an old lock. resetLastPing(); bool gotLock = false; BSONObj currLock; BSONObj lockDetails = BSON(LocksType::state(LocksType::LOCK_PREP) << LocksType::who(getDistLockId()) << LocksType::process(_processId) << LocksType::when(jsTime()) << LocksType::why(why) << LocksType::lockID(lockID)); BSONObj whatIWant = BSON("$set" << lockDetails); BSONObj query = queryBuilder.obj(); string lockName = _name + string("/") + _processId; try { // Main codepath to acquire lock LOG(logLvl) << "about to acquire distributed lock '" << lockName << "'"; LOG(logLvl + 1) << "trying to acquire lock " << query.toString(false, true) << " with details " << lockDetails.toString(false, true) << endl; conn->update(LocksType::ConfigNS, query, whatIWant); BSONObj err = conn->getLastErrorDetailed(); string errMsg = DBClientWithCommands::getLastErrorString(err); currLock = conn->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))); if (!errMsg.empty() || !err["n"].type() || err["n"].numberInt() < 1) { logErrMsgOrWarn("could not acquire lock", lockName, errMsg, "(another update won)"); *other = currLock; other->getOwned(); gotLock = false; } else { gotLock = true; } } catch (UpdateNotTheSame& up) { // this means our update got through on some, but not others warning() << "distributed lock '" << lockName << " did not propagate properly." << causedBy(up) << endl; // Overall protection derives from: // All unlocking updates use the ts value when setting state to 0 // This ensures that during locking, we can override all smaller ts locks with // our own safe ts value and not be unlocked afterward. for (unsigned i = 0; i < up.size(); i++) { ScopedDbConnection indDB(up[i].first); BSONObj indUpdate; try { indUpdate = indDB->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))); const auto currentLockID = indUpdate[LocksType::lockID()].OID(); // If we override this lock in any way, grab and protect it. // We assume/ensure that if a process does not have all lock documents, it is no // longer holding the lock. // Note - finalized locks may compete too, but we know they've won already if // competing in this round. Cleanup of crashes during finalizing may take a few // tries. if (currentLockID < lockID || indUpdate[LocksType::state()].numberInt() == LocksType::UNLOCKED) { BSONObj grabQuery = BSON(LocksType::name(_name) << LocksType::lockID(currentLockID)); // Change ts so we won't be forced, state so we won't be relocked BSONObj grabChanges = BSON(LocksType::lockID(lockID) << LocksType::state(LocksType::LOCK_PREP)); // Either our update will succeed, and we'll grab the lock, or it will fail b/c // some other process grabbed the lock (which will change the ts), but the lock // will be set until forcing indDB->update(LocksType::ConfigNS, grabQuery, BSON("$set" << grabChanges)); indUpdate = indDB->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))); // The tournament was interfered and it is not safe to proceed further. // One case this could happen is when the LockPinger processes old // entries from addUnlockOID. See SERVER-10688 for more detailed // description of race. if (indUpdate[LocksType::state()].numberInt() <= LocksType::UNLOCKED) { LOG(logLvl - 1) << "lock tournament interrupted, " << "so no lock was taken; " << "new state of lock: " << indUpdate << endl; // We now break and set our currLock lockID value to zero, so that // we know that we did not acquire the lock below. Later code will // cleanup failed entries. currLock = BSON(LocksType::lockID(OID())); indDB.done(); break; } } // else our lock is the same, in which case we're safe, or it's a bigger lock, // in which case we won't need to protect anything since we won't have the lock. } catch (std::exception& e) { conn.done(); string msg(str::stream() << "distributed lock " << lockName << " had errors communicating with individual server " << up[1].first << causedBy(e)); throw LockException(msg, 13661, lockID); } verify(!indUpdate.isEmpty()); // Find max TS value if (currLock.isEmpty() || currLock[LocksType::lockID()] < indUpdate[LocksType::lockID()]) { currLock = indUpdate.getOwned(); } indDB.done(); } // Locks on all servers are now set and safe until forcing if (currLock[LocksType::lockID()].OID() == lockID) { LOG(logLvl - 1) << "lock update won, completing lock propagation for '" << lockName << "'" << endl; gotLock = true; } else { LOG(logLvl - 1) << "lock update lost, lock '" << lockName << "' not propagated." << endl; gotLock = false; } } catch (std::exception& e) { conn.done(); string msg(str::stream() << "exception creating distributed lock " << lockName << causedBy(e)); throw LockException(msg, 13663, lockID); } // Complete lock propagation if (gotLock) { // This is now safe, since we know that no new locks will be placed on top of the ones we've // checked for at least 15 minutes. Sets the state = 2, so that future clients can // determine that the lock is truly set. The invariant for rollbacks is that we will never // force locks with state = 2 and active pings, since that indicates the lock is active, but // this means the process creating/destroying them must explicitly poll when something goes // wrong. try { BSONObjBuilder finalLockDetails; BSONObjIterator bi(lockDetails); while (bi.more()) { BSONElement el = bi.next(); if ((string)(el.fieldName()) == LocksType::state()) finalLockDetails.append(LocksType::state(), LocksType::LOCKED); else finalLockDetails.append(el); } conn->update(LocksType::ConfigNS, BSON(LocksType::name(_name)), BSON("$set" << finalLockDetails.obj())); BSONObj err = conn->getLastErrorDetailed(); string errMsg = DBClientWithCommands::getLastErrorString(err); currLock = conn->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))); if (!errMsg.empty() || !err["n"].type() || err["n"].numberInt() < 1) { warning() << "could not finalize winning lock " << lockName << (!errMsg.empty() ? causedBy(errMsg) : " (did not update lock) ") << endl; gotLock = false; } else { // SUCCESS! gotLock = true; } } catch (std::exception& e) { conn.done(); string msg(str::stream() << "exception finalizing winning lock" << causedBy(e)); // Inform caller about the potential orphan lock. throw LockException(msg, 13662, lockID); } } *other = currLock; other->getOwned(); // Log our lock results if (gotLock) LOG(logLvl - 1) << "distributed lock '" << lockName << "' acquired for '" << why << "', ts : " << currLock[LocksType::lockID()].OID(); else LOG(logLvl - 1) << "distributed lock '" << lockName << "' was not acquired."; conn.done(); return gotLock; }
bool RangeDeleter::deleteNow(OperationContext* txn, const RangeDeleterOptions& options, string* errMsg) { if (stopRequested()) { *errMsg = "deleter is already stopped."; return false; } string dummy; if (errMsg == NULL) errMsg = &dummy; const string& ns(options.range.ns); const BSONObj& min(options.range.minKey); const BSONObj& max(options.range.maxKey); NSMinMax deleteRange(ns, min, max); { boost::lock_guard<boost::mutex> sl(_queueMutex); if (!canEnqueue_inlock(ns, min, max, errMsg)) { return false; } _deleteSet.insert(&deleteRange); // Note: count for pending deletes is an integral part of the shutdown story. // Therefore, to simplify things, there is no "pending" state for deletes in // deleteNow, the state transition is simply inProgress -> done. _deletesInProgress++; } set<CursorId> cursorsToWait; if (options.waitForOpenCursors) { _env->getCursorIds(txn, ns, &cursorsToWait); } long long checkIntervalMillis = 5; RangeDeleteEntry taskDetails(options); taskDetails.stats.queueStartTS = jsTime(); for (; !cursorsToWait.empty(); sleepmillis(checkIntervalMillis)) { logCursorsWaiting(&taskDetails); set<CursorId> cursorsNow; _env->getCursorIds(txn, ns, &cursorsNow); set<CursorId> cursorsLeft; std::set_intersection(cursorsToWait.begin(), cursorsToWait.end(), cursorsNow.begin(), cursorsNow.end(), std::inserter(cursorsLeft, cursorsLeft.end())); cursorsToWait.swap(cursorsLeft); if (stopRequested()) { *errMsg = "deleter was stopped."; boost::lock_guard<boost::mutex> sl(_queueMutex); _deleteSet.erase(&deleteRange); _deletesInProgress--; if (_deletesInProgress == 0) { _nothingInProgressCV.notify_one(); } return false; } if (checkIntervalMillis < kMaxCursorCheckIntervalMillis) { checkIntervalMillis *= 2; } } taskDetails.stats.queueEndTS = jsTime(); taskDetails.stats.deleteStartTS = jsTime(); bool result = _env->deleteRange(txn, taskDetails, &taskDetails.stats.deletedDocCount, errMsg); taskDetails.stats.deleteEndTS = jsTime(); if (result) { taskDetails.stats.waitForReplStartTS = jsTime(); result = _waitForMajority(txn, errMsg); taskDetails.stats.waitForReplEndTS = jsTime(); } { boost::lock_guard<boost::mutex> sl(_queueMutex); _deleteSet.erase(&deleteRange); _deletesInProgress--; if (_deletesInProgress == 0) { _nothingInProgressCV.notify_one(); } } recordDelStats(new DeleteJobStats(taskDetails.stats)); return result; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { _runCalled = true; long long start = Listener::getElapsedTimeMillis(); BSONObjBuilder timeBuilder(256); const ClientBasic* myClientBasic = ClientBasic::getCurrent(); AuthorizationManager* authManager = myClientBasic->getAuthorizationManager(); // --- basic fields that are global result.append("host", prettyHostName() ); result.append("version", versionString); result.append("process",cmdLine.binaryName); result.append("pid", (int)getpid()); result.append("uptime",(double) (time(0)-cmdLine.started)); result.append("uptimeMillis", (long long)(curTimeMillis64()-_started)); result.append("uptimeEstimate",(double) (start/1000)); result.appendDate( "localTime" , jsTime() ); timeBuilder.appendNumber( "after basic" , Listener::getElapsedTimeMillis() - start ); // --- all sections for ( SectionMap::const_iterator i = _sections->begin(); i != _sections->end(); ++i ) { ServerStatusSection* section = i->second; std::vector<Privilege> requiredPrivileges; section->addRequiredPrivileges(&requiredPrivileges); if (!authManager->checkAuthForPrivileges(requiredPrivileges).isOK()) continue; bool include = section->includeByDefault(); BSONElement e = cmdObj[section->getSectionName()]; if ( e.type() ) { include = e.trueValue(); } if ( ! include ) continue; BSONObj data = section->generateSection(e); if ( data.isEmpty() ) continue; result.append( section->getSectionName(), data ); timeBuilder.appendNumber( static_cast<string>(str::stream() << "after " << section->getSectionName()), Listener::getElapsedTimeMillis() - start ); } // --- counters if ( MetricTree::theMetricTree ) { MetricTree::theMetricTree->appendTo( result ); } // --- some hard coded global things hard to pull out { RamLog* rl = RamLog::get( "warnings" ); massert(15880, "no ram log for warnings?" , rl); if (rl->lastWrite() >= time(0)-(10*60)){ // only show warnings from last 10 minutes vector<const char*> lines; rl->get( lines ); BSONArrayBuilder arr( result.subarrayStart( "warnings" ) ); for ( unsigned i=std::max(0,(int)lines.size()-10); i<lines.size(); i++ ) arr.append( lines[i] ); arr.done(); } } timeBuilder.appendNumber( "at end" , Listener::getElapsedTimeMillis() - start ); if ( Listener::getElapsedTimeMillis() - start > 1000 ) { BSONObj t = timeBuilder.obj(); log() << "serverStatus was very slow: " << t << endl; result.append( "timing" , t ); } return true; }
bool DistributedLock::checkSkew(const ConnectionString& cluster, unsigned skewChecks, unsigned long long maxClockSkew, unsigned long long maxNetSkew) { vector<HostAndPort> servers = cluster.getServers(); if (servers.size() < 1) return true; vector<long long> avgSkews; for (unsigned i = 0; i < skewChecks; i++) { // Find the average skew for each server unsigned s = 0; for (vector<HostAndPort>::iterator si = servers.begin(); si != servers.end(); ++si, s++) { if (i == 0) avgSkews.push_back(0); // Could check if this is self, but shouldn't matter since local network connection // should be fast. ConnectionString server(*si); vector<long long> skew; BSONObj result; Date_t remote = remoteTime(server, maxNetSkew); Date_t local = jsTime(); // Remote time can be delayed by at most MAX_NET_SKEW // Skew is how much time we'd have to add to local to get to remote avgSkews[s] += durationCount<Milliseconds>(remote - local); LOG(logLvl + 1) << "skew from remote server " << server << " found: " << (remote - local); } } // Analyze skews long long serverMaxSkew = 0; long long serverMinSkew = 0; for (unsigned s = 0; s < avgSkews.size(); s++) { long long avgSkew = (avgSkews[s] /= skewChecks); // Keep track of max and min skews if (s == 0) { serverMaxSkew = avgSkew; serverMinSkew = avgSkew; } else { if (avgSkew > serverMaxSkew) serverMaxSkew = avgSkew; if (avgSkew < serverMinSkew) serverMinSkew = avgSkew; } } long long totalSkew = serverMaxSkew - serverMinSkew; // Make sure our max skew is not more than our pre-set limit if (totalSkew > (long long)maxClockSkew) { LOG(logLvl + 1) << "total clock skew of " << totalSkew << "ms for servers " << cluster << " is out of " << maxClockSkew << "ms bounds." << endl; return false; } LOG(logLvl + 1) << "total clock skew of " << totalSkew << "ms for servers " << cluster << " is in " << maxClockSkew << "ms bounds." << endl; return true; }
void runThread(ConnectionString& hostConn, unsigned threadId, unsigned seed, BSONObj& cmdObj, BSONObjBuilder& result) { stringstream ss; ss << "thread-" << threadId; setThreadName(ss.str().c_str()); // Lock name string lockName = string_field(cmdObj, "lockName", this->name + "_lock"); // Range of clock skew in diff threads int skewRange = (int) number_field(cmdObj, "skewRange", 1); // How long to wait with the lock int threadWait = (int) number_field(cmdObj, "threadWait", 30); if(threadWait <= 0) threadWait = 1; // Max amount of time (ms) a thread waits before checking the lock again int threadSleep = (int) number_field(cmdObj, "threadSleep", 30); if(threadSleep <= 0) threadSleep = 1; // How long until the lock is forced in ms, only compared locally unsigned long long takeoverMS = (unsigned long long) number_field(cmdObj, "takeoverMS", 0); // Whether or not we should hang some threads int hangThreads = (int) number_field(cmdObj, "hangThreads", 0); boost::mt19937 gen((boost::mt19937::result_type) seed); boost::variate_generator<boost::mt19937&, boost::uniform_int<> > randomSkew(gen, boost::uniform_int<>(0, skewRange)); boost::variate_generator<boost::mt19937&, boost::uniform_int<> > randomWait(gen, boost::uniform_int<>(1, threadWait)); boost::variate_generator<boost::mt19937&, boost::uniform_int<> > randomSleep(gen, boost::uniform_int<>(1, threadSleep)); int skew = 0; if (!lock.get()) { // Pick a skew, but the first two threads skew the whole range if(threadId == 0) skew = -skewRange / 2; else if(threadId == 1) skew = skewRange / 2; else skew = randomSkew() - (skewRange / 2); // Skew this thread jsTimeVirtualThreadSkew( skew ); log() << "Initializing lock with skew of " << skew << " for thread " << threadId << endl; lock.reset(new DistributedLock(hostConn, lockName, takeoverMS, true )); log() << "Skewed time " << jsTime() << " for thread " << threadId << endl << " max wait (with lock: " << threadWait << ", after lock: " << threadSleep << ")" << endl << " takeover in " << takeoverMS << "(ms remote)" << endl; } DistributedLock* myLock = lock.get(); bool errors = false; BSONObj lockObj; while (keepGoing) { try { if (myLock->lock_try("Testing distributed lock with skew.", false, &lockObj )) { log() << "**** Locked for thread " << threadId << " with ts " << lockObj["ts"] << endl; if( count % 2 == 1 && ! myLock->lock_try( "Testing lock re-entry.", true ) ) { errors = true; log() << "**** !Could not re-enter lock already held" << endl; break; } if( count % 3 == 1 && myLock->lock_try( "Testing lock non-re-entry.", false ) ) { errors = true; log() << "**** !Invalid lock re-entry" << endl; break; } count++; int before = count; int sleep = randomWait(); sleepmillis(sleep); int after = count; if(after != before) { errors = true; log() << "**** !Bad increment while sleeping with lock for: " << sleep << "ms" << endl; break; } // Unlock only half the time... if(hangThreads == 0 || threadId % hangThreads != 0) { log() << "**** Unlocking for thread " << threadId << " with ts " << lockObj["ts"] << endl; myLock->unlock( &lockObj ); } else { log() << "**** Not unlocking for thread " << threadId << endl; DistributedLock::killPinger( *myLock ); // We're simulating a crashed process... break; } } } catch( LockException& e ) { log() << "*** !Could not try distributed lock." << causedBy( e ) << endl; break; } sleepmillis(randomSleep()); } result << "errors" << errors << "skew" << skew << "takeover" << (long long) takeoverMS << "localTimeout" << (takeoverMS > 0); }
void run() { if ( _token.size() == 0 && _name.size() == 0 ) { LOG(1) << "mms not configured" << endl; return; } if ( _token.size() == 0 ) { log() << "no token for mms - not running" << endl; return; } if ( _name.size() == 0 ) { log() << "no name for mms - not running" << endl; return; } log() << "mms monitor staring... token:" << _token << " name:" << _name << " interval: " << _secsToSleep << endl; Client::initThread( "mms" ); Client& c = cc(); // TODO: using direct client is bad, but easy for now while ( ! inShutdown() ) { sleepsecs( _secsToSleep ); try { stringstream url; url << _baseurl << "?" << "token=" << _token << "&" << "name=" << _name << "&" << "ts=" << time(0) ; BSONObjBuilder bb; // duplicated so the post has everything bb.append( "token" , _token ); bb.append( "name" , _name ); bb.appendDate( "ts" , jsTime() ); // any commands _add( bb , "buildinfo" ); _add( bb , "serverStatus" ); BSONObj postData = bb.obj(); LOG(1) << "mms url: " << url.str() << "\n\t post: " << postData << endl;; HttpClient c; HttpClient::Result r; int rc = c.post( url.str() , postData.jsonString() , &r ); LOG(1) << "\t response code: " << rc << endl; if ( rc != 200 ) { log() << "mms error response code:" << rc << endl; LOG(1) << "mms error body:" << r.getEntireResponse() << endl; } } catch ( std::exception& e ) { log() << "mms exception: " << e.what() << endl; } } c.shutdown(); }
virtual bool run(OperationContext* opCtx, const string&, const BSONObj& cmdObj, BSONObjBuilder& result) { /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not authenticated. */ if (cmdObj["forShell"].trueValue()) { LastError::get(opCtx->getClient()).disable(); } transport::Session::TagMask sessionTagsToSet = 0; transport::Session::TagMask sessionTagsToUnset = 0; // Tag connections to avoid closing them on stepdown. auto hangUpElement = cmdObj["hangUpOnStepDown"]; if (!hangUpElement.eoo() && !hangUpElement.trueValue()) { sessionTagsToSet |= transport::Session::kKeepOpen; } auto& clientMetadataIsMasterState = ClientMetadataIsMasterState::get(opCtx->getClient()); bool seenIsMaster = clientMetadataIsMasterState.hasSeenIsMaster(); if (!seenIsMaster) { clientMetadataIsMasterState.setSeenIsMaster(); } BSONElement element = cmdObj[kMetadataDocumentName]; if (!element.eoo()) { if (seenIsMaster) { uasserted(ErrorCodes::ClientMetadataCannotBeMutated, "The client metadata document may only be sent in the first isMaster"); } auto swParseClientMetadata = ClientMetadata::parse(element); uassertStatusOK(swParseClientMetadata.getStatus()); invariant(swParseClientMetadata.getValue()); swParseClientMetadata.getValue().get().logClientMetadata(opCtx->getClient()); clientMetadataIsMasterState.setClientMetadata( opCtx->getClient(), std::move(swParseClientMetadata.getValue())); } // Parse the optional 'internalClient' field. This is provided by incoming connections from // mongod and mongos. auto internalClientElement = cmdObj["internalClient"]; if (internalClientElement) { sessionTagsToSet |= transport::Session::kInternalClient; uassert(ErrorCodes::TypeMismatch, str::stream() << "'internalClient' must be of type Object, but was of type " << typeName(internalClientElement.type()), internalClientElement.type() == BSONType::Object); bool foundMaxWireVersion = false; for (auto&& elem : internalClientElement.Obj()) { auto fieldName = elem.fieldNameStringData(); if (fieldName == "minWireVersion") { // We do not currently use 'internalClient.minWireVersion'. continue; } else if (fieldName == "maxWireVersion") { foundMaxWireVersion = true; uassert(ErrorCodes::TypeMismatch, str::stream() << "'maxWireVersion' field of 'internalClient' must be " "of type int, but was of type " << typeName(elem.type()), elem.type() == BSONType::NumberInt); // All incoming connections from mongod/mongos of earlier versions should be // closed if the featureCompatibilityVersion is bumped to 3.6. if (elem.numberInt() >= WireSpec::instance().incomingInternalClient.maxWireVersion) { sessionTagsToSet |= transport::Session::kLatestVersionInternalClientKeepOpen; } else { sessionTagsToUnset |= transport::Session::kLatestVersionInternalClientKeepOpen; } } else { uasserted(ErrorCodes::BadValue, str::stream() << "Unrecognized field of 'internalClient': '" << fieldName << "'"); } } uassert(ErrorCodes::BadValue, "Missing required field 'maxWireVersion' of 'internalClient'", foundMaxWireVersion); } else { sessionTagsToUnset |= (transport::Session::kInternalClient | transport::Session::kLatestVersionInternalClientKeepOpen); sessionTagsToSet |= transport::Session::kExternalClientKeepOpen; } auto session = opCtx->getClient()->session(); if (session) { session->mutateTags( [sessionTagsToSet, sessionTagsToUnset](transport::Session::TagMask originalTags) { // After a mongos sends the initial "isMaster" command with its mongos client // information, it sometimes sends another "isMaster" command that is forwarded // from its client. Once kInternalClient has been set, we assume that any future // "isMaster" commands are forwarded in this manner, and we do not update the // session tags. if ((originalTags & transport::Session::kInternalClient) == 0) { return (originalTags | sessionTagsToSet) & ~sessionTagsToUnset; } else { return originalTags; } }); } appendReplicationInfo(opCtx, result, 0); if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { const int configServerModeNumber = 2; result.append("configsvr", configServerModeNumber); } result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize); result.appendNumber("maxMessageSizeBytes", MaxMessageSizeBytes); result.appendNumber("maxWriteBatchSize", write_ops::kMaxWriteBatchSize); result.appendDate("localTime", jsTime()); result.append("logicalSessionTimeoutMinutes", localLogicalSessionTimeoutMinutes); result.appendNumber("connectionId", opCtx->getClient()->getConnectionId()); if (internalClientElement) { result.append("minWireVersion", WireSpec::instance().incomingInternalClient.minWireVersion); result.append("maxWireVersion", WireSpec::instance().incomingInternalClient.maxWireVersion); } else { result.append("minWireVersion", WireSpec::instance().incomingExternalClient.minWireVersion); result.append("maxWireVersion", WireSpec::instance().incomingExternalClient.maxWireVersion); } result.append("readOnly", storageGlobalParams.readOnly); const auto parameter = mapFindWithDefault(ServerParameterSet::getGlobal()->getMap(), "automationServiceDescriptor", static_cast<ServerParameter*>(nullptr)); if (parameter) parameter->append(opCtx, result, "automationServiceDescriptor"); if (opCtx->getClient()->session()) { MessageCompressorManager::forSession(opCtx->getClient()->session()) .serverNegotiate(cmdObj, &result); } auto& saslMechanismRegistry = SASLServerMechanismRegistry::get(opCtx->getServiceContext()); saslMechanismRegistry.advertiseMechanismNamesForUser(opCtx, cmdObj, &result); return true; }
void RangeDeleter::doWork() { _env->initThread(); while (!inShutdown() && !stopRequested()) { string errMsg; RangeDeleteEntry* nextTask = NULL; { scoped_lock sl(_queueMutex); while (_taskQueue.empty()) { _taskQueueNotEmptyCV.timed_wait( sl.boost(), duration::milliseconds(NotEmptyTimeoutMillis)); if (stopRequested()) { log() << "stopping range deleter worker" << endl; return; } if (_taskQueue.empty()) { // Try to check if some deletes are ready and move them to the // ready queue. TaskList::iterator iter = _notReadyQueue.begin(); while (iter != _notReadyQueue.end()) { RangeDeleteEntry* entry = *iter; set<CursorId> cursorsNow; { boost::scoped_ptr<OperationContext> txn(getGlobalEnvironment()->newOpCtx()); _env->getCursorIds(txn.get(), entry->ns, &cursorsNow); } set<CursorId> cursorsLeft; std::set_intersection(entry->cursorsToWait.begin(), entry->cursorsToWait.end(), cursorsNow.begin(), cursorsNow.end(), std::inserter(cursorsLeft, cursorsLeft.end())); entry->cursorsToWait.swap(cursorsLeft); if (entry->cursorsToWait.empty()) { (*iter)->stats.queueEndTS = jsTime(); _taskQueue.push_back(*iter); _taskQueueNotEmptyCV.notify_one(); iter = _notReadyQueue.erase(iter); } else { const unsigned long long int elapsedMillis = entry->stats.queueStartTS.millis - curTimeMillis64(); if ( elapsedMillis > LogCursorsThresholdMillis && entry->timeSinceLastLog.millis > LogCursorsIntervalMillis) { entry->timeSinceLastLog = jsTime(); logCursorsWaiting(entry->ns, entry->min, entry->max, elapsedMillis, entry->cursorsToWait); } ++iter; } } } } if (stopRequested()) { log() << "stopping range deleter worker" << endl; return; } nextTask = _taskQueue.front(); _taskQueue.pop_front(); _deletesInProgress++; } { boost::scoped_ptr<OperationContext> txn(getGlobalEnvironment()->newOpCtx()); ReplTime lastOp; nextTask->stats.deleteStartTS = jsTime(); bool delResult = _env->deleteRange(txn.get(), *nextTask, &nextTask->stats.deletedDocCount, &lastOp, &errMsg); nextTask->stats.deleteEndTS = jsTime(); if (delResult) { nextTask->stats.waitForReplStartTS = jsTime(); if (!_env->waitForReplication(lastOp, DelWriteConcern, WaitForReplTimeoutSecs, &errMsg)) { warning() << "Error encountered while waiting for replication: " << errMsg << endl; } nextTask->stats.waitForReplEndTS = jsTime(); } else { warning() << "Error encountered while trying to delete range: " << errMsg << endl; } } { scoped_lock sl(_queueMutex); NSMinMax setEntry(nextTask->ns, nextTask->min, nextTask->max); deletePtrElement(&_deleteSet, &setEntry); _deletesInProgress--; if (nextTask->notifyDone) { nextTask->notifyDone->notifyOne(); } } recordDelStats(new DeleteJobStats(nextTask->stats)); delete nextTask; nextTask = NULL; } }
bool RangeDeleter::deleteNow(OperationContext* txn, const std::string& ns, const BSONObj& min, const BSONObj& max, const BSONObj& shardKeyPattern, bool secondaryThrottle, string* errMsg) { if (stopRequested()) { *errMsg = "deleter is already stopped."; return false; } string dummy; if (errMsg == NULL) errMsg = &dummy; NSMinMax deleteRange(ns, min, max); { scoped_lock sl(_queueMutex); if (!canEnqueue_inlock(ns, min, max, errMsg)) { return false; } _deleteSet.insert(&deleteRange); // Note: count for pending deletes is an integral part of the shutdown story. // Therefore, to simplify things, there is no "pending" state for deletes in // deleteNow, the state transition is simply inProgress -> done. _deletesInProgress++; } set<CursorId> cursorsToWait; _env->getCursorIds(txn, ns, &cursorsToWait); long long checkIntervalMillis = 5; if (!cursorsToWait.empty()) { log() << "rangeDeleter waiting for " << cursorsToWait.size() << " cursors in " << ns << " to finish" << endl; } RangeDeleteEntry taskDetails(ns, min, max, shardKeyPattern, secondaryThrottle); taskDetails.stats.queueStartTS = jsTime(); Date_t timeSinceLastLog; for (; !cursorsToWait.empty(); sleepmillis(checkIntervalMillis)) { const unsigned long long timeNow = curTimeMillis64(); const unsigned long long elapsedTimeMillis = timeNow - taskDetails.stats.queueStartTS.millis; const unsigned long long lastLogMillis = timeNow - timeSinceLastLog.millis; if (elapsedTimeMillis > LogCursorsThresholdMillis && lastLogMillis > LogCursorsIntervalMillis) { timeSinceLastLog = jsTime(); logCursorsWaiting(ns, min, max, elapsedTimeMillis, cursorsToWait); } set<CursorId> cursorsNow; _env->getCursorIds(txn, ns, &cursorsNow); set<CursorId> cursorsLeft; std::set_intersection(cursorsToWait.begin(), cursorsToWait.end(), cursorsNow.begin(), cursorsNow.end(), std::inserter(cursorsLeft, cursorsLeft.end())); cursorsToWait.swap(cursorsLeft); if (stopRequested()) { *errMsg = "deleter was stopped."; scoped_lock sl(_queueMutex); _deleteSet.erase(&deleteRange); _deletesInProgress--; if (_deletesInProgress == 0) { _nothingInProgressCV.notify_one(); } return false; } if (checkIntervalMillis < MaxCurorCheckIntervalMillis) { checkIntervalMillis *= 2; } } taskDetails.stats.queueEndTS = jsTime(); ReplTime lastOp; taskDetails.stats.deleteStartTS = jsTime(); bool result = _env->deleteRange(txn, taskDetails, &taskDetails.stats.deletedDocCount, &lastOp, errMsg); taskDetails.stats.deleteEndTS = jsTime(); if (result) { taskDetails.stats.waitForReplStartTS = jsTime(); result = _env->waitForReplication(lastOp, DelWriteConcern, WaitForReplTimeoutSecs, errMsg); taskDetails.stats.waitForReplEndTS = jsTime(); } { scoped_lock sl(_queueMutex); _deleteSet.erase(&deleteRange); _deletesInProgress--; if (_deletesInProgress == 0) { _nothingInProgressCV.notify_one(); } } recordDelStats(new DeleteJobStats(taskDetails.stats)); return result; }