bool Mutex::trylock() { switch (pthread_mutex_lock(&_mutex)) { case 0: return true; case EBUSY: return false; default: throw LockException("Unable to try/lock the mutex"); } }
void Mutex::lock() { if (pthread_mutex_lock(&_mutex)) throw LockException("Unable to lock the mutex"); }
// Semantics of this method are basically that if the lock cannot be acquired, returns false, // can be retried. If the lock should not be tried again (some unexpected error), // a LockException is thrown. bool DistributedLock::lock_try(const OID& lockID, const string& why, BSONObj* other, double timeout) { // This should always be true, if not, we are using the lock incorrectly. verify(_name != ""); auto lockTimeout = _lockTimeout; MONGO_FAIL_POINT_BLOCK(setSCCCDistLockTimeout, customTimeout) { const BSONObj& data = customTimeout.getData(); lockTimeout = data["timeoutMs"].numberInt(); } LOG(logLvl) << "trying to acquire new distributed lock for " << _name << " on " << _conn << " ( lock timeout : " << lockTimeout << ", ping interval : " << _lockPing << ", process : " << _processId << " )" << endl; // write to dummy if 'other' is null BSONObj dummyOther; if (other == NULL) other = &dummyOther; ScopedDbConnection conn(_conn.toString(), timeout); BSONObjBuilder queryBuilder; queryBuilder.append(LocksType::name(), _name); queryBuilder.append(LocksType::state(), LocksType::UNLOCKED); { // make sure its there so we can use simple update logic below BSONObj o = conn->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))).getOwned(); // Case 1: No locks if (o.isEmpty()) { try { LOG(logLvl) << "inserting initial doc in " << LocksType::ConfigNS << " for lock " << _name << endl; conn->insert(LocksType::ConfigNS, BSON(LocksType::name(_name) << LocksType::state(LocksType::UNLOCKED) << LocksType::who("") << LocksType::lockID(OID()))); } catch (UserException& e) { warning() << "could not insert initial doc for distributed lock " << _name << causedBy(e) << endl; } } // Case 2: A set lock that we might be able to force else if (o[LocksType::state()].numberInt() > LocksType::UNLOCKED) { string lockName = o[LocksType::name()].String() + string("/") + o[LocksType::process()].String(); BSONObj lastPing = conn->findOne( LockpingsType::ConfigNS, o[LocksType::process()].wrap(LockpingsType::process())); if (lastPing.isEmpty()) { LOG(logLvl) << "empty ping found for process in lock '" << lockName << "'" << endl; // TODO: Using 0 as a "no time found" value Will fail if dates roll over, but then, // so will a lot. lastPing = BSON(LockpingsType::process(o[LocksType::process()].String()) << LockpingsType::ping(Date_t())); } unsigned long long elapsed = 0; unsigned long long takeover = lockTimeout; DistLockPingInfo lastPingEntry = getLastPing(); LOG(logLvl) << "checking last ping for lock '" << lockName << "' against process " << lastPingEntry.processId << " and ping " << lastPingEntry.lastPing; try { Date_t remote = remoteTime(_conn); auto pingDocProcessId = lastPing[LockpingsType::process()].String(); auto pingDocPingValue = lastPing[LockpingsType::ping()].Date(); // Timeout the elapsed time using comparisons of remote clock // For non-finalized locks, timeout 15 minutes since last seen (ts) // For finalized locks, timeout 15 minutes since last ping bool recPingChange = o[LocksType::state()].numberInt() == LocksType::LOCKED && (lastPingEntry.processId != pingDocProcessId || lastPingEntry.lastPing != pingDocPingValue); bool recTSChange = lastPingEntry.lockSessionId != o[LocksType::lockID()].OID(); if (recPingChange || recTSChange) { // If the ping has changed since we last checked, mark the current date and time setLastPing(DistLockPingInfo(pingDocProcessId, pingDocPingValue, remote, o[LocksType::lockID()].OID(), OID())); } else { // GOTCHA! Due to network issues, it is possible that the current time // is less than the remote time. We *have* to check this here, otherwise // we overflow and our lock breaks. if (lastPingEntry.configLocalTime >= remote) elapsed = 0; else elapsed = durationCount<Milliseconds>(remote - lastPingEntry.configLocalTime); } } catch (LockException& e) { // Remote server cannot be found / is not responsive warning() << "Could not get remote time from " << _conn << causedBy(e); // If our config server is having issues, forget all the pings until we can see it // again resetLastPing(); } if (elapsed <= takeover) { LOG(1) << "could not force lock '" << lockName << "' because elapsed time " << elapsed << " <= takeover time " << takeover; *other = o; other->getOwned(); conn.done(); return false; } LOG(0) << "forcing lock '" << lockName << "' because elapsed time " << elapsed << " > takeover time " << takeover; if (elapsed > takeover) { // Lock may forced, reset our timer if succeeds or fails // Ensures that another timeout must happen if something borks up here, and resets // our pristine ping state if acquired. resetLastPing(); try { // Check the clock skew again. If we check this before we get a lock // and after the lock times out, we can be pretty sure the time is // increasing at the same rate on all servers and therefore our // timeout is accurate if (isRemoteTimeSkewed()) { string msg(str::stream() << "remote time in cluster " << _conn.toString() << " is now skewed, cannot force lock."); throw LockException(msg, ErrorCodes::DistributedClockSkewed); } // Make sure we break the lock with the correct "ts" (OID) value, otherwise // we can overwrite a new lock inserted in the meantime. conn->update(LocksType::ConfigNS, BSON(LocksType::name(_name) << LocksType::state() << o[LocksType::state()].numberInt() << LocksType::lockID(o[LocksType::lockID()].OID())), BSON("$set" << BSON(LocksType::state(LocksType::UNLOCKED)))); BSONObj err = conn->getLastErrorDetailed(); string errMsg = DBClientWithCommands::getLastErrorString(err); // TODO: Clean up all the extra code to exit this method, probably with a // refactor if (!errMsg.empty() || !err["n"].type() || err["n"].numberInt() < 1) { logErrMsgOrWarn( "Could not force lock", lockName, errMsg, "(another force won"); *other = o; other->getOwned(); conn.done(); return false; } } catch (UpdateNotTheSame&) { // Ok to continue since we know we forced at least one lock document, and all // lock docs are required for a lock to be held. warning() << "lock forcing " << lockName << " inconsistent" << endl; } catch (const LockException&) { // Let the exception go up and don't repackage the exception. throw; } catch (std::exception& e) { conn.done(); string msg(str::stream() << "exception forcing distributed lock " << lockName << causedBy(e)); throw LockException(msg, 13660); } } else { // Not strictly necessary, but helpful for small timeouts where thread // scheduling is significant. This ensures that two attempts are still // required for a force if not acquired, and resets our state if we // are acquired. resetLastPing(); // Test that the lock is held by trying to update the finalized state of the lock to // the same state if it does not update or does not update on all servers, we can't // re-enter. try { // Test the lock with the correct "ts" (OID) value conn->update(LocksType::ConfigNS, BSON(LocksType::name(_name) << LocksType::state(LocksType::LOCKED) << LocksType::lockID(o[LocksType::lockID()].OID())), BSON("$set" << BSON(LocksType::state(LocksType::LOCKED)))); BSONObj err = conn->getLastErrorDetailed(); string errMsg = DBClientWithCommands::getLastErrorString(err); // TODO: Clean up all the extra code to exit this method, probably with a // refactor if (!errMsg.empty() || !err["n"].type() || err["n"].numberInt() < 1) { logErrMsgOrWarn( "Could not re-enter lock", lockName, errMsg, "(not sure lock is held"); *other = o; other->getOwned(); conn.done(); return false; } } catch (UpdateNotTheSame&) { // NOT ok to continue since our lock isn't held by all servers, so isn't valid. warning() << "inconsistent state re-entering lock, lock " << lockName << " not held" << endl; *other = o; other->getOwned(); conn.done(); return false; } catch (std::exception& e) { conn.done(); string msg(str::stream() << "exception re-entering distributed lock " << lockName << causedBy(e)); throw LockException(msg, 13660); } LOG(logLvl - 1) << "re-entered distributed lock '" << lockName << "'" << endl; *other = o.getOwned(); conn.done(); return true; } LOG(logLvl - 1) << "lock '" << lockName << "' successfully forced" << endl; // We don't need the ts value in the query, since we will only ever replace locks with // state=0. } // Case 3: We have an expired lock else if (o[LocksType::lockID()].type()) { queryBuilder.append(o[LocksType::lockID()]); } } // Always reset our ping if we're trying to get a lock, since getting a lock implies the lock // state is open and no locks need to be forced. If anything goes wrong, we don't want to // remember an old lock. resetLastPing(); bool gotLock = false; BSONObj currLock; BSONObj lockDetails = BSON(LocksType::state(LocksType::LOCK_PREP) << LocksType::who(getDistLockId()) << LocksType::process(_processId) << LocksType::when(jsTime()) << LocksType::why(why) << LocksType::lockID(lockID)); BSONObj whatIWant = BSON("$set" << lockDetails); BSONObj query = queryBuilder.obj(); string lockName = _name + string("/") + _processId; try { // Main codepath to acquire lock LOG(logLvl) << "about to acquire distributed lock '" << lockName << "'"; LOG(logLvl + 1) << "trying to acquire lock " << query.toString(false, true) << " with details " << lockDetails.toString(false, true) << endl; conn->update(LocksType::ConfigNS, query, whatIWant); BSONObj err = conn->getLastErrorDetailed(); string errMsg = DBClientWithCommands::getLastErrorString(err); currLock = conn->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))); if (!errMsg.empty() || !err["n"].type() || err["n"].numberInt() < 1) { logErrMsgOrWarn("could not acquire lock", lockName, errMsg, "(another update won)"); *other = currLock; other->getOwned(); gotLock = false; } else { gotLock = true; } } catch (UpdateNotTheSame& up) { // this means our update got through on some, but not others warning() << "distributed lock '" << lockName << " did not propagate properly." << causedBy(up) << endl; // Overall protection derives from: // All unlocking updates use the ts value when setting state to 0 // This ensures that during locking, we can override all smaller ts locks with // our own safe ts value and not be unlocked afterward. for (unsigned i = 0; i < up.size(); i++) { ScopedDbConnection indDB(up[i].first); BSONObj indUpdate; try { indUpdate = indDB->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))); const auto currentLockID = indUpdate[LocksType::lockID()].OID(); // If we override this lock in any way, grab and protect it. // We assume/ensure that if a process does not have all lock documents, it is no // longer holding the lock. // Note - finalized locks may compete too, but we know they've won already if // competing in this round. Cleanup of crashes during finalizing may take a few // tries. if (currentLockID < lockID || indUpdate[LocksType::state()].numberInt() == LocksType::UNLOCKED) { BSONObj grabQuery = BSON(LocksType::name(_name) << LocksType::lockID(currentLockID)); // Change ts so we won't be forced, state so we won't be relocked BSONObj grabChanges = BSON(LocksType::lockID(lockID) << LocksType::state(LocksType::LOCK_PREP)); // Either our update will succeed, and we'll grab the lock, or it will fail b/c // some other process grabbed the lock (which will change the ts), but the lock // will be set until forcing indDB->update(LocksType::ConfigNS, grabQuery, BSON("$set" << grabChanges)); indUpdate = indDB->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))); // The tournament was interfered and it is not safe to proceed further. // One case this could happen is when the LockPinger processes old // entries from addUnlockOID. See SERVER-10688 for more detailed // description of race. if (indUpdate[LocksType::state()].numberInt() <= LocksType::UNLOCKED) { LOG(logLvl - 1) << "lock tournament interrupted, " << "so no lock was taken; " << "new state of lock: " << indUpdate << endl; // We now break and set our currLock lockID value to zero, so that // we know that we did not acquire the lock below. Later code will // cleanup failed entries. currLock = BSON(LocksType::lockID(OID())); indDB.done(); break; } } // else our lock is the same, in which case we're safe, or it's a bigger lock, // in which case we won't need to protect anything since we won't have the lock. } catch (std::exception& e) { conn.done(); string msg(str::stream() << "distributed lock " << lockName << " had errors communicating with individual server " << up[1].first << causedBy(e)); throw LockException(msg, 13661, lockID); } verify(!indUpdate.isEmpty()); // Find max TS value if (currLock.isEmpty() || currLock[LocksType::lockID()] < indUpdate[LocksType::lockID()]) { currLock = indUpdate.getOwned(); } indDB.done(); } // Locks on all servers are now set and safe until forcing if (currLock[LocksType::lockID()].OID() == lockID) { LOG(logLvl - 1) << "lock update won, completing lock propagation for '" << lockName << "'" << endl; gotLock = true; } else { LOG(logLvl - 1) << "lock update lost, lock '" << lockName << "' not propagated." << endl; gotLock = false; } } catch (std::exception& e) { conn.done(); string msg(str::stream() << "exception creating distributed lock " << lockName << causedBy(e)); throw LockException(msg, 13663, lockID); } // Complete lock propagation if (gotLock) { // This is now safe, since we know that no new locks will be placed on top of the ones we've // checked for at least 15 minutes. Sets the state = 2, so that future clients can // determine that the lock is truly set. The invariant for rollbacks is that we will never // force locks with state = 2 and active pings, since that indicates the lock is active, but // this means the process creating/destroying them must explicitly poll when something goes // wrong. try { BSONObjBuilder finalLockDetails; BSONObjIterator bi(lockDetails); while (bi.more()) { BSONElement el = bi.next(); if ((string)(el.fieldName()) == LocksType::state()) finalLockDetails.append(LocksType::state(), LocksType::LOCKED); else finalLockDetails.append(el); } conn->update(LocksType::ConfigNS, BSON(LocksType::name(_name)), BSON("$set" << finalLockDetails.obj())); BSONObj err = conn->getLastErrorDetailed(); string errMsg = DBClientWithCommands::getLastErrorString(err); currLock = conn->findOne(LocksType::ConfigNS, BSON(LocksType::name(_name))); if (!errMsg.empty() || !err["n"].type() || err["n"].numberInt() < 1) { warning() << "could not finalize winning lock " << lockName << (!errMsg.empty() ? causedBy(errMsg) : " (did not update lock) ") << endl; gotLock = false; } else { // SUCCESS! gotLock = true; } } catch (std::exception& e) { conn.done(); string msg(str::stream() << "exception finalizing winning lock" << causedBy(e)); // Inform caller about the potential orphan lock. throw LockException(msg, 13662, lockID); } } *other = currLock; other->getOwned(); // Log our lock results if (gotLock) LOG(logLvl - 1) << "distributed lock '" << lockName << "' acquired for '" << why << "', ts : " << currLock[LocksType::lockID()].OID(); else LOG(logLvl - 1) << "distributed lock '" << lockName << "' was not acquired."; conn.done(); return gotLock; }