void Lock::GlobalLock::_enqueue(LockMode lockMode, Date_t deadline) { if (lockMode == LockMode::MODE_IX) { auto ticketholder = FlowControlTicketholder::get(_opCtx); if (ticketholder) { ticketholder->getTicket(_opCtx); } } try { if (_opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) { _pbwm.lock(MODE_IS); } auto unlockPBWM = makeGuard([this] { if (_opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) { _pbwm.unlock(); } }); _opCtx->lockState()->lock( _opCtx, resourceIdReplicationStateTransitionLock, MODE_IX, deadline); auto unlockRSTL = makeGuard( [this] { _opCtx->lockState()->unlock(resourceIdReplicationStateTransitionLock); }); _result = LOCK_INVALID; _result = _opCtx->lockState()->lockGlobalBegin(_opCtx, lockMode, deadline); unlockRSTL.dismiss(); unlockPBWM.dismiss(); } catch (const ExceptionForCat<ErrorCategory::Interruption>&) { // The kLeaveUnlocked behavior suppresses this exception. if (_interruptBehavior == InterruptBehavior::kThrow) throw; } }
fbstring errnoStr(int err) { int savedErrno = errno; // Ensure that we reset errno upon exit. auto guard(makeGuard([&] { errno = savedErrno; })); char buf[1024]; buf[0] = '\0'; fbstring result; // https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/strerror_r.3.html // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html #if defined(__APPLE__) || defined(__FreeBSD__) || \ ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE) // Using XSI-compatible strerror_r int r = strerror_r(err, buf, sizeof(buf)); // OSX/FreeBSD use EINVAL and Linux uses -1 so just check for non-zero if (r != 0) { result = to<fbstring>( "Unknown error ", err, " (strerror_r failed with error ", errno, ")"); } else { result.assign(buf); } #else // Using GNU strerror_r result.assign(strerror_r(err, buf, sizeof(buf))); #endif return result; }
Status ServiceExecutorReserved::_startWorker() { log() << "Starting new worker thread for " << _name << " service executor"; return launchServiceWorkerThread([this] { stdx::unique_lock<stdx::mutex> lk(_mutex); _numRunningWorkerThreads.addAndFetch(1); auto numRunningGuard = makeGuard([&] { _numRunningWorkerThreads.subtractAndFetch(1); _shutdownCondition.notify_one(); }); _numStartingThreads--; _numReadyThreads++; while (_stillRunning.load()) { _threadWakeup.wait(lk, [&] { return (!_stillRunning.load() || !_readyTasks.empty()); }); if (!_stillRunning.loadRelaxed()) { break; } if (_readyTasks.empty()) { continue; } auto task = std::move(_readyTasks.front()); _readyTasks.pop_front(); _numReadyThreads -= 1; bool launchReplacement = false; if (_numReadyThreads + _numStartingThreads < _reservedThreads) { _numStartingThreads++; launchReplacement = true; } lk.unlock(); if (launchReplacement) { auto threadStartStatus = _startWorker(); if (!threadStartStatus.isOK()) { warning() << "Could not start new reserve worker thread: " << threadStartStatus; } } _localWorkQueue.emplace_back(std::move(task)); while (!_localWorkQueue.empty() && _stillRunning.loadRelaxed()) { _localRecursionDepth = 1; _localWorkQueue.front()(); _localWorkQueue.pop_front(); } lk.lock(); if (_numReadyThreads + 1 > _reservedThreads) { break; } else { _numReadyThreads += 1; } } LOG(3) << "Exiting worker thread in " << _name << " service executor"; }); }
bool CryptoBuffer::doCrypt(const char *in, int inlen, bool isEncrypt, QByteArray &out) { const int OUTBUF_SIZE = 8*1024; unsigned char outbuf[OUTBUF_SIZE + EVP_MAX_BLOCK_LENGTH]; EVP_CIPHER_CTX ctx; EVP_CIPHER_CTX_init(&ctx); auto ctxGuard = makeGuard([&ctx] { EVP_CIPHER_CTX_cleanup(&ctx); }); if (!EVP_CipherInit_ex(&ctx, d->cipher, NULL, d->key, d->iv, isEncrypt)) return DEBUGRET(false, "EVP_CipherInit_Ex failed"); const unsigned char *ptr = reinterpret_cast<const unsigned char*>(in); int restlen = inlen; int outlen; while (restlen > 0) { int readlen = std::min(restlen, OUTBUF_SIZE); if (!EVP_CipherUpdate(&ctx, outbuf, &outlen, ptr, readlen)) return DEBUGRET(false, "EVP_CipherUpdate failed"); out.append(reinterpret_cast<const char*>(outbuf), outlen); ptr += readlen; restlen -= readlen; } if (!EVP_CipherFinal_ex(&ctx, outbuf, &outlen)) return DEBUGRET(false, "EVP_CipherFinal_ex failed"); out.append(reinterpret_cast<const char*>(outbuf), outlen); return true; }
fbstring errnoStr(int err) { int savedErrno = errno; // Ensure that we reset errno upon exit. auto guard(makeGuard([&] { errno = savedErrno; })); char buf[1024]; buf[0] = '\0'; fbstring result; // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html #if (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || \ !FOLLY_HAVE_FEATURES_H) && !_GNU_SOURCE // Using XSI-compatible strerror_r int r = strerror_r(err, buf, sizeof(buf)); if (r == -1) { result = to<fbstring>( "Unknown error ", err, " (strerror_r failed with error ", errno, ")"); } else { result.assign(buf); } #else // Using GNU strerror_r result.assign(strerror_r(err, buf, sizeof(buf))); #endif return result; }
Database* DatabaseHolderImpl::openDb(OperationContext* opCtx, StringData ns, bool* justCreated) { const StringData dbname = _todb(ns); invariant(opCtx->lockState()->isDbLockedForMode(dbname, MODE_X)); if (justCreated) *justCreated = false; // Until proven otherwise. stdx::unique_lock<SimpleMutex> lk(_m); // The following will insert a nullptr for dbname, which will treated the same as a non- // existant database by the get method, yet still counts in getNamesWithConflictingCasing. if (auto db = _dbs[dbname]) return db; // We've inserted a nullptr entry for dbname: make sure to remove it on unsuccessful exit. auto removeDbGuard = makeGuard([this, &lk, dbname] { if (!lk.owns_lock()) lk.lock(); _dbs.erase(dbname); }); // Check casing in lock to avoid transient duplicates. auto duplicates = _getNamesWithConflictingCasing_inlock(dbname); uassert(ErrorCodes::DatabaseDifferCase, str::stream() << "db already exists with different case already have: [" << *duplicates.cbegin() << "] trying to create [" << dbname.toString() << "]", duplicates.empty()); // Do the catalog lookup and database creation outside of the scoped lock, because these may // block. Only one thread can be inside this method for the same DB name, because of the // requirement for X-lock on the database when we enter. So there is no way we can insert two // different databases for the same name. lk.unlock(); StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine(); DatabaseCatalogEntry* entry = storageEngine->getDatabaseCatalogEntry(opCtx, dbname); if (!entry->exists()) { audit::logCreateDatabase(opCtx->getClient(), dbname); if (justCreated) *justCreated = true; } auto newDb = stdx::make_unique<DatabaseImpl>(dbname, entry, ++_epoch); newDb->init(opCtx); // Finally replace our nullptr entry with the new Database pointer. removeDbGuard.dismiss(); lk.lock(); auto it = _dbs.find(dbname); invariant(it != _dbs.end() && it->second == nullptr); it->second = newDb.release(); invariant(_getNamesWithConflictingCasing_inlock(dbname.toString()).empty()); return it->second; }
Status CollectionBulkLoaderImpl::_runTaskReleaseResourcesOnFailure(const F& task) noexcept { AlternativeClientRegion acr(_client); auto guard = makeGuard([this] { _releaseResources(); }); try { const auto status = task(); if (status.isOK()) { guard.dismiss(); } return status; } catch (...) { std::terminate(); } }
MinVisibleTimestampMap closeCatalog(OperationContext* opCtx) { invariant(opCtx->lockState()->isW()); MinVisibleTimestampMap minVisibleTimestampMap; std::vector<std::string> allDbs; opCtx->getServiceContext()->getStorageEngine()->listDatabases(&allDbs); auto databaseHolder = DatabaseHolder::get(opCtx); for (auto&& dbName : allDbs) { const auto db = databaseHolder->getDb(opCtx, dbName); for (auto collIt = db->begin(opCtx); collIt != db->end(opCtx); ++collIt) { auto coll = *collIt; if (!coll) { break; } OptionalCollectionUUID uuid = coll->uuid(); boost::optional<Timestamp> minVisible = coll->getMinimumVisibleSnapshot(); // If there's a minimum visible, invariant there's also a UUID. invariant(!minVisible || uuid); if (uuid && minVisible) { LOG(1) << "closeCatalog: preserving min visible timestamp. Collection: " << coll->ns() << " UUID: " << uuid << " TS: " << minVisible; minVisibleTimestampMap[*uuid] = *minVisible; } } } // Need to mark the UUIDCatalog as open if we our closeAll fails, dismissed if successful. auto reopenOnFailure = makeGuard([opCtx] { UUIDCatalog::get(opCtx).onOpenCatalog(opCtx); }); // Closing UUID Catalog: only lookupNSSByUUID will fall back to using pre-closing state to // allow authorization for currently unknown UUIDs. This is needed because authorization needs // to work before acquiring locks, and might otherwise spuriously regard a UUID as unknown // while reloading the catalog. UUIDCatalog::get(opCtx).onCloseCatalog(opCtx); LOG(1) << "closeCatalog: closing UUID catalog"; // Close all databases. log() << "closeCatalog: closing all databases"; databaseHolder->closeAll(opCtx); // Close the storage engine's catalog. log() << "closeCatalog: closing storage engine catalog"; opCtx->getServiceContext()->getStorageEngine()->closeCatalog(opCtx); reopenOnFailure.dismiss(); return minVisibleTimestampMap; }
void Subprocess::spawn( std::unique_ptr<const char*[]> argv, const char* executable, const Options& optionsIn, const std::vector<std::string>* env) { if (optionsIn.usePath_ && env) { throw std::invalid_argument( "usePath() not allowed when overriding environment"); } // Make a copy, we'll mutate options Options options(optionsIn); // On error, close all of the pipes_ auto pipesGuard = makeGuard([&] { for (auto& p : this->pipes_) { CHECK_ERR(::close(p.parentFd)); } }); // Create a pipe to use to receive error information from the child, // in case it fails before calling exec() int errFds[2]; int r = ::pipe(errFds); checkUnixError(r, "pipe"); SCOPE_EXIT { CHECK_ERR(::close(errFds[0])); if (errFds[1] >= 0) { CHECK_ERR(::close(errFds[1])); } }; // Ask the child to close the read end of the error pipe. options.fdActions_[errFds[0]] = CLOSE; // Set the close-on-exec flag on the write side of the pipe. // This way the pipe will be closed automatically in the child if execve() // succeeds. If the exec fails the child can write error information to the // pipe. r = fcntl(errFds[1], F_SETFD, FD_CLOEXEC); checkUnixError(r, "set FD_CLOEXEC"); // Perform the actual work of setting up pipes then forking and // executing the child. spawnInternal(std::move(argv), executable, options, env, errFds[1]); // After spawnInternal() returns the child is alive. We have to be very // careful about throwing after this point. We are inside the constructor, // so if we throw the Subprocess object will have never existed, and the // destructor will never be called. // // We should only throw if we got an error via the errFd, and we know the // child has exited and can be immediately waited for. In all other cases, // we have no way of cleaning up the child. // Close writable side of the errFd pipe in the parent process CHECK_ERR(::close(errFds[1])); errFds[1] = -1; // Read from the errFd pipe, to tell if the child ran into any errors before // calling exec() readChildErrorPipe(errFds[0], executable); // We have fully succeeded now, so release the guard on pipes_ pipesGuard.dismiss(); }
Status IndexBuildInterceptor::drainWritesIntoIndex(OperationContext* opCtx, const InsertDeleteOptions& options, RecoveryUnit::ReadSource readSource) { invariant(!opCtx->lockState()->inAWriteUnitOfWork()); // Callers may request to read at a specific timestamp so that no drained writes are timestamped // earlier than their original write timestamp. Also ensure that leaving this function resets // the ReadSource to its original value. auto resetReadSourceGuard = makeGuard([ opCtx, prevReadSource = opCtx->recoveryUnit()->getTimestampReadSource() ] { opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource(prevReadSource); }); if (readSource != RecoveryUnit::ReadSource::kUnset) { opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource(readSource); } else { resetReadSourceGuard.dismiss(); } // These are used for logging only. int64_t totalDeleted = 0; int64_t totalInserted = 0; Timer timer; const int64_t appliedAtStart = _numApplied; // Set up the progress meter. This will never be completely accurate, because more writes can be // read from the side writes table than are observed before draining. static const char* curopMessage = "Index Build: draining writes received during build"; ProgressMeterHolder progress; { stdx::unique_lock<Client> lk(*opCtx->getClient()); progress.set(CurOp::get(opCtx)->setProgress_inlock(curopMessage)); } // Force the progress meter to log at the end of every batch. By default, the progress meter // only logs after a large number of calls to hit(), but since we batch inserts by up to // 1000 records, progress would rarely be displayed. progress->reset(_sideWritesCounter.load() - appliedAtStart /* total */, 3 /* secondsBetween */, 1 /* checkInterval */); // Buffer operations into batches to insert per WriteUnitOfWork. Impose an upper limit on the // number of documents and the total size of the batch. const int32_t kBatchMaxSize = 1000; const int64_t kBatchMaxBytes = BSONObjMaxInternalSize; int64_t batchSizeBytes = 0; std::vector<SideWriteRecord> batch; batch.reserve(kBatchMaxSize); // Hold on to documents that would exceed the per-batch memory limit. Always insert this first // into the next batch. boost::optional<SideWriteRecord> stashed; auto cursor = _sideWritesTable->rs()->getCursor(opCtx); bool atEof = false; while (!atEof) { opCtx->checkForInterrupt(); // Stashed records should be inserted into a batch first. if (stashed) { invariant(batch.empty()); batch.push_back(std::move(stashed.get())); stashed.reset(); } auto record = cursor->next(); if (record) { RecordId currentRecordId = record->id; BSONObj docOut = record->data.toBson().getOwned(); // If the total batch size in bytes would be too large, stash this document and let the // current batch insert. int objSize = docOut.objsize(); if (batchSizeBytes + objSize > kBatchMaxBytes) { invariant(!stashed); // Stash this document to be inserted in the next batch. stashed.emplace(currentRecordId, std::move(docOut)); } else { batchSizeBytes += objSize; batch.emplace_back(currentRecordId, std::move(docOut)); // Continue if there is more room in the batch. if (batch.size() < kBatchMaxSize) { continue; } } } else { atEof = true; if (batch.empty()) break; } invariant(!batch.empty()); cursor->save(); // If we are here, either we have reached the end of the table or the batch is full, so // insert everything in one WriteUnitOfWork, and delete each inserted document from the side // writes table. auto status = writeConflictRetry(opCtx, "index build drain", _indexCatalogEntry->ns(), [&] { WriteUnitOfWork wuow(opCtx); for (auto& operation : batch) { auto status = _applyWrite(opCtx, operation.second, options, &totalInserted, &totalDeleted); if (!status.isOK()) { return status; } // Delete the document from the table as soon as it has been inserted into the // index. This ensures that no key is ever inserted twice and no keys are skipped. _sideWritesTable->rs()->deleteRecord(opCtx, operation.first); } // For rollback to work correctly, these writes need to be timestamped. The actual time // is not important, as long as it not older than the most recent visible side write. IndexTimestampHelper::setGhostCommitTimestampForWrite( opCtx, NamespaceString(_indexCatalogEntry->ns())); wuow.commit(); return Status::OK(); }); if (!status.isOK()) { return status; } progress->hit(batch.size()); // Lock yielding will only happen if we are holding intent locks. _tryYield(opCtx); cursor->restore(); // Account for more writes coming in during a batch. progress->setTotalWhileRunning(_sideWritesCounter.loadRelaxed() - appliedAtStart); _numApplied += batch.size(); batch.clear(); batchSizeBytes = 0; } progress->finished(); int logLevel = (_numApplied - appliedAtStart > 0) ? 0 : 1; LOG(logLevel) << "index build: drain applied " << (_numApplied - appliedAtStart) << " side writes (inserted: " << totalInserted << ", deleted: " << totalDeleted << ") for '" << _indexCatalogEntry->descriptor()->indexName() << "' in " << timer.millis() << " ms"; return Status::OK(); }
void BSONElement::jsonStringStream(JsonStringFormat format, bool includeFieldNames, int pretty, std::stringstream& s) const { if (includeFieldNames) s << '"' << escape(fieldName()) << "\" : "; switch (type()) { case mongo::String: case Symbol: s << '"' << escape(string(valuestr(), valuestrsize() - 1)) << '"'; break; case NumberLong: if (format == TenGen) { s << "NumberLong(" << _numberLong() << ")"; } else { s << "{ \"$numberLong\" : \"" << _numberLong() << "\" }"; } break; case NumberInt: if (format == TenGen) { s << "NumberInt(" << _numberInt() << ")"; break; } case NumberDouble: if (number() >= -std::numeric_limits<double>::max() && number() <= std::numeric_limits<double>::max()) { auto origPrecision = s.precision(); auto guard = makeGuard([&s, origPrecision]() { s.precision(origPrecision); }); s.precision(16); s << number(); } // This is not valid JSON, but according to RFC-4627, "Numeric values that cannot be // represented as sequences of digits (such as Infinity and NaN) are not permitted." so // we are accepting the fact that if we have such values we cannot output valid JSON. else if (std::isnan(number())) { s << "NaN"; } else if (std::isinf(number())) { s << (number() > 0 ? "Infinity" : "-Infinity"); } else { StringBuilder ss; ss << "Number " << number() << " cannot be represented in JSON"; string message = ss.str(); massert(10311, message.c_str(), false); } break; case NumberDecimal: if (format == TenGen) s << "NumberDecimal(\""; else s << "{ \"$numberDecimal\" : \""; // Recognize again that this is not valid JSON according to RFC-4627. // Also, treat -NaN and +NaN as the same thing for MongoDB. if (numberDecimal().isNaN()) { s << "NaN"; } else if (numberDecimal().isInfinite()) { s << (numberDecimal().isNegative() ? "-Infinity" : "Infinity"); } else { s << numberDecimal().toString(); } if (format == TenGen) s << "\")"; else s << "\" }"; break; case mongo::Bool: s << (boolean() ? "true" : "false"); break; case jstNULL: s << "null"; break; case Undefined: if (format == Strict) { s << "{ \"$undefined\" : true }"; } else { s << "undefined"; } break; case Object: embeddedObject().jsonStringStream(format, pretty, false, s); break; case mongo::Array: { if (embeddedObject().isEmpty()) { s << "[]"; break; } s << "[ "; BSONObjIterator i(embeddedObject()); BSONElement e = i.next(); if (!e.eoo()) { int count = 0; while (1) { if (pretty) { s << '\n'; for (int x = 0; x < pretty; x++) s << " "; } if (strtol(e.fieldName(), 0, 10) > count) { s << "undefined"; } else { e.jsonStringStream(format, false, pretty ? pretty + 1 : 0, s); e = i.next(); } count++; if (e.eoo()) break; s << ", "; } } s << " ]"; break; } case DBRef: { if (format == TenGen) s << "Dbref( "; else s << "{ \"$ref\" : "; s << '"' << valuestr() << "\", "; if (format != TenGen) s << "\"$id\" : "; s << '"' << mongo::OID::from(valuestr() + valuestrsize()) << "\" "; if (format == TenGen) s << ')'; else s << '}'; break; } case jstOID: if (format == TenGen) { s << "ObjectId( "; } else { s << "{ \"$oid\" : "; } s << '"' << __oid() << '"'; if (format == TenGen) { s << " )"; } else { s << " }"; } break; case BinData: { ConstDataCursor reader(value()); const int len = reader.readAndAdvance<LittleEndian<int>>(); BinDataType type = static_cast<BinDataType>(reader.readAndAdvance<uint8_t>()); s << "{ \"$binary\" : \""; base64::encode(s, reader.view(), len); auto origFill = s.fill(); auto origFmtF = s.flags(); auto origWidth = s.width(); auto guard = makeGuard([&s, origFill, origFmtF, origWidth] { s.fill(origFill); s.setf(origFmtF); s.width(origWidth); }); s.setf(std::ios_base::hex, std::ios_base::basefield); s << "\", \"$type\" : \""; s.width(2); s.fill('0'); s << type; s << "\" }"; break; } case mongo::Date: if (format == Strict) { Date_t d = date(); s << "{ \"$date\" : "; // The two cases in which we cannot convert Date_t::millis to an ISO Date string are // when the date is too large to format (SERVER-13760), and when the date is before // the epoch (SERVER-11273). Since Date_t internally stores millis as an unsigned // long long, despite the fact that it is logically signed (SERVER-8573), this check // handles both the case where Date_t::millis is too large, and the case where // Date_t::millis is negative (before the epoch). if (d.isFormattable()) { s << "\"" << dateToISOStringLocal(date()) << "\""; } else { s << "{ \"$numberLong\" : \"" << d.toMillisSinceEpoch() << "\" }"; } s << " }"; } else { s << "Date( "; if (pretty) { Date_t d = date(); // The two cases in which we cannot convert Date_t::millis to an ISO Date string // are when the date is too large to format (SERVER-13760), and when the date is // before the epoch (SERVER-11273). Since Date_t internally stores millis as an // unsigned long long, despite the fact that it is logically signed // (SERVER-8573), this check handles both the case where Date_t::millis is too // large, and the case where Date_t::millis is negative (before the epoch). if (d.isFormattable()) { s << "\"" << dateToISOStringLocal(date()) << "\""; } else { // FIXME: This is not parseable by the shell, since it may not fit in a // float s << d.toMillisSinceEpoch(); } } else { s << date().asInt64(); } s << " )"; } break; case RegEx: if (format == Strict) { s << "{ \"$regex\" : \"" << escape(regex()); s << "\", \"$options\" : \"" << regexFlags() << "\" }"; } else { s << "/" << escape(regex(), true) << "/"; // FIXME Worry about alpha order? for (const char* f = regexFlags(); *f; ++f) { switch (*f) { case 'g': case 'i': case 'm': s << *f; default: break; } } } break; case CodeWScope: { BSONObj scope = codeWScopeObject(); if (!scope.isEmpty()) { s << "{ \"$code\" : \"" << escape(_asCode()) << "\" , " << "\"$scope\" : " << scope.jsonString() << " }"; break; } } case Code: s << "\"" << escape(_asCode()) << "\""; break; case bsonTimestamp: if (format == TenGen) { s << "Timestamp( " << durationCount<Seconds>(timestampTime().toDurationSinceEpoch()) << ", " << timestampInc() << " )"; } else { s << "{ \"$timestamp\" : { \"t\" : " << durationCount<Seconds>(timestampTime().toDurationSinceEpoch()) << ", \"i\" : " << timestampInc() << " } }"; } break; case MinKey: s << "{ \"$minKey\" : 1 }"; break; case MaxKey: s << "{ \"$maxKey\" : 1 }"; break; default: StringBuilder ss; ss << "Cannot create a properly formatted JSON string with " << "element: " << toString() << " of type: " << type(); string message = ss.str(); massert(10312, message.c_str(), false); } }