void OnConnection(const TcpConnectionPtr& conn) { LOG_INFO << conn->localAddress().toIpPort() << " -> " << conn->peerAddress().toIpPort() << " is " << (conn->connected() ? "UP" : "DOWN"); if(conn->connected()) { m_connection = conn; if(g_aliveConnections.incrementAndGet() == g_connections) { LOG_INFO << "all connected"; } else { m_connection.reset(); if(g_aliveConnections.incrementAndGet() == g_connections) { LOG_INFO << "all disconnected"; g_loop->quit(); } } } }
void WiredTigerSession::releaseCursor(uint64_t id, WT_CURSOR* cursor) { invariant(_session); invariant(cursor); _cursorsOut--; invariantWTOK(cursor->reset(cursor)); // Cursors are pushed to the front of the list and removed from the back _cursors.push_front(WiredTigerCachedCursor(id, _cursorGen++, cursor)); // A negative value for wiredTigercursorCacheSize means to use hybrid caching. std::uint32_t cacheSize = abs(kWiredTigerCursorCacheSize.load()); while (!_cursors.empty() && _cursorGen - _cursors.back()._gen > cacheSize) { cursor = _cursors.back()._cursor; _cursors.pop_back(); invariantWTOK(cursor->close(cursor)); } }
void App::onGraphics3D(RenderDevice* rd, Array<shared_ptr<Surface> >& allSurfaces) { rd->clear(); // Bind the main framebuffer rd->pushState(m_framebuffer); { rd->setProjectionAndCameraMatrix(activeCamera()->projection(), activeCamera()->frame()); m_gbuffer->resize(rd->width(), rd->height()); m_gbuffer->prepare(rd, activeCamera(), 0, -(float)previousSimTimeStep(), m_settings.depthGuardBandThickness, m_settings.colorGuardBandThickness); rd->clear(); // Cull and sort Array<shared_ptr<Surface> > sortedVisibleSurfaces; Surface::cull(activeCamera()->frame(), activeCamera()->projection(), rd->viewport(), allSurfaces, sortedVisibleSurfaces); Surface::sortBackToFront(sortedVisibleSurfaces, activeCamera()->frame().lookVector()); // Depth pre-pass static const bool renderTransmissiveSurfaces = false; Surface::renderDepthOnly(rd, sortedVisibleSurfaces, CullFace::BACK, renderTransmissiveSurfaces); // Intentionally copy the lighting environment for mutation LightingEnvironment environment = scene()->lightingEnvironment(); if (! m_settings.colorGuardBandThickness.isZero()) { rd->setGuardBandClip2D(m_settings.colorGuardBandThickness); } // Render G-buffer if needed. In this default implementation, it is needed if motion blur is enabled (for velocity) or // if face normals have been allocated and ambient occlusion is enabled. if (activeCamera()->motionBlurSettings().enabled() || (environment.ambientOcclusionSettings.enabled && notNull(m_gbuffer) && notNull(m_gbuffer->texture(GBuffer::Field::CS_FACE_NORMAL)))) { rd->setDepthWrite(false); { // We've already rendered the depth Surface::renderIntoGBuffer(rd, sortedVisibleSurfaces, m_gbuffer, activeCamera()->previousFrame(), activeCamera()->expressivePreviousFrame()); } rd->setDepthWrite(true); } // Compute AO environment.ambientOcclusionSettings.useDepthPeelBuffer = false; m_ambientOcclusion->update(rd, environment.ambientOcclusionSettings, activeCamera(), m_framebuffer->texture(Framebuffer::DEPTH), shared_ptr<Texture>(), m_gbuffer->texture(GBuffer::Field::CS_FACE_NORMAL), m_settings.depthGuardBandThickness - m_settings.colorGuardBandThickness); // Compute shadow maps and forward-render visible surfaces environment.ambientOcclusion = m_ambientOcclusion; Surface::render(rd, activeCamera()->frame(), activeCamera()->projection(), sortedVisibleSurfaces, allSurfaces, environment); // Call to make the App show the output of debugDraw(...) drawDebugShapes(); scene()->visualize(rd, shared_ptr<Entity>(), sceneVisualizationSettings()); // Post-process special effects m_depthOfField->apply(rd, m_framebuffer->texture(0), m_framebuffer->texture(Framebuffer::DEPTH), activeCamera(), m_settings.depthGuardBandThickness - m_settings.colorGuardBandThickness); m_motionBlur->apply(rd, m_framebuffer->texture(0), m_gbuffer->texture(GBuffer::Field::SS_POSITION_CHANGE), m_framebuffer->texture(Framebuffer::DEPTH), activeCamera(), m_settings.depthGuardBandThickness - m_settings.colorGuardBandThickness); } rd->popState(); // Perform gamma correction, bloom, and SSAA, and write to the native window frame buffer rd->push2D(m_finalFramebuffer); { rd->clear(); m_film->exposeAndRender(rd, activeCamera()->filmSettings(), m_framebuffer->texture(0)); } rd->pop2D(); // Copy the final buffer to the server screen rd->push2D(); { Draw::rect2D(m_finalFramebuffer->texture(0)->rect2DBounds(), rd, Color3::white(), m_finalFramebuffer->texture(0)); } rd->pop2D(); clientSetMutex.lock(); screenPrintf("Number of clients: %d\n", clientSet.size()); //screenPrintf("clientWantsImage: %d\n", clientWantsImage.value()); if ((clientWantsImage.value() != 0) && (clientSet.size() > 0)) { // Send the image to the first client mg_connection* conn = *clientSet.begin(); // JPEG encoding/decoding takes more time but substantially less bandwidth than PNG mg_websocket_write_image(conn, m_finalBuffer->toImage(ImageFormat::RGB8()), Image::JPEG); clientWantsImage = 0; } clientSetMutex.unlock(); }
namespace mongo { namespace { MONGO_INITIALIZER(InitializeMultiIndexBlockFactory)(InitializerContext* const) { MultiIndexBlock::registerFactory( [](OperationContext* const opCtx, Collection* const collection) { return stdx::make_unique<MultiIndexBlockImpl>(opCtx, collection); }); return Status::OK(); } } // namespace using std::unique_ptr; using std::string; using std::endl; MONGO_FP_DECLARE(crashAfterStartingIndexBuild); MONGO_FP_DECLARE(hangAfterStartingIndexBuild); MONGO_FP_DECLARE(hangAfterStartingIndexBuildUnlocked); AtomicInt32 maxIndexBuildMemoryUsageMegabytes(500); class ExportedMaxIndexBuildMemoryUsageParameter : public ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime> { public: ExportedMaxIndexBuildMemoryUsageParameter() : ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime>( ServerParameterSet::getGlobal(), "maxIndexBuildMemoryUsageMegabytes", &maxIndexBuildMemoryUsageMegabytes) {} virtual Status validate(const std::int32_t& potentialNewValue) { if (potentialNewValue < 100) { return Status( ErrorCodes::BadValue, "maxIndexBuildMemoryUsageMegabytes must be greater than or equal to 100 MB"); } return Status::OK(); } } exportedMaxIndexBuildMemoryUsageParameter; /** * On rollback sets MultiIndexBlockImpl::_needToCleanup to true. */ class MultiIndexBlockImpl::SetNeedToCleanupOnRollback : public RecoveryUnit::Change { public: explicit SetNeedToCleanupOnRollback(MultiIndexBlockImpl* indexer) : _indexer(indexer) {} virtual void commit() {} virtual void rollback() { _indexer->_needToCleanup = true; } private: MultiIndexBlockImpl* const _indexer; }; /** * On rollback in init(), cleans up _indexes so that ~MultiIndexBlock doesn't try to clean * up _indexes manually (since the changes were already rolled back). * Due to this, it is thus legal to call init() again after it fails. */ class MultiIndexBlockImpl::CleanupIndexesVectorOnRollback : public RecoveryUnit::Change { public: explicit CleanupIndexesVectorOnRollback(MultiIndexBlockImpl* indexer) : _indexer(indexer) {} virtual void commit() {} virtual void rollback() { _indexer->_indexes.clear(); } private: MultiIndexBlockImpl* const _indexer; }; MultiIndexBlockImpl::MultiIndexBlockImpl(OperationContext* opCtx, Collection* collection) : _collection(collection), _opCtx(opCtx), _buildInBackground(false), _allowInterruption(false), _ignoreUnique(false), _needToCleanup(true) {} MultiIndexBlockImpl::~MultiIndexBlockImpl() { if (!_needToCleanup || _indexes.empty()) return; while (true) { try { WriteUnitOfWork wunit(_opCtx); // This cleans up all index builds. // Because that may need to write, it is done inside // of a WUOW. Nothing inside this block can fail, and it is made fatal if it does. for (size_t i = 0; i < _indexes.size(); i++) { _indexes[i].block->fail(); } wunit.commit(); return; } catch (const WriteConflictException&) { continue; } catch (const DBException& e) { if (e.toStatus() == ErrorCodes::ExceededMemoryLimit) continue; error() << "Caught exception while cleaning up partially built indexes: " << redact(e); } catch (const std::exception& e) { error() << "Caught exception while cleaning up partially built indexes: " << e.what(); } catch (...) { error() << "Caught unknown exception while cleaning up partially built indexes."; } fassertFailed(18644); } } void MultiIndexBlockImpl::removeExistingIndexes(std::vector<BSONObj>* specs) const { for (size_t i = 0; i < specs->size(); i++) { Status status = _collection->getIndexCatalog()->prepareSpecForCreate(_opCtx, (*specs)[i]).getStatus(); if (status.code() == ErrorCodes::IndexAlreadyExists) { specs->erase(specs->begin() + i); i--; } // intentionally ignoring other error codes } } StatusWith<std::vector<BSONObj>> MultiIndexBlockImpl::init(const BSONObj& spec) { const auto indexes = std::vector<BSONObj>(1, spec); return init(indexes); } StatusWith<std::vector<BSONObj>> MultiIndexBlockImpl::init(const std::vector<BSONObj>& indexSpecs) { WriteUnitOfWork wunit(_opCtx); invariant(_indexes.empty()); _opCtx->recoveryUnit()->registerChange(new CleanupIndexesVectorOnRollback(this)); const string& ns = _collection->ns().ns(); const auto idxCat = _collection->getIndexCatalog(); invariant(idxCat); invariant(idxCat->ok()); Status status = idxCat->checkUnfinished(); if (!status.isOK()) return status; for (size_t i = 0; i < indexSpecs.size(); i++) { BSONObj info = indexSpecs[i]; string pluginName = IndexNames::findPluginName(info["key"].Obj()); if (pluginName.size()) { Status s = _collection->getIndexCatalog()->_upgradeDatabaseMinorVersionIfNeeded( _opCtx, pluginName); if (!s.isOK()) return s; } // Any foreground indexes make all indexes be built in the foreground. _buildInBackground = (_buildInBackground && info["background"].trueValue()); } std::vector<BSONObj> indexInfoObjs; indexInfoObjs.reserve(indexSpecs.size()); std::size_t eachIndexBuildMaxMemoryUsageBytes = 0; if (!indexSpecs.empty()) { eachIndexBuildMaxMemoryUsageBytes = static_cast<std::size_t>(maxIndexBuildMemoryUsageMegabytes.load()) * 1024 * 1024 / indexSpecs.size(); } for (size_t i = 0; i < indexSpecs.size(); i++) { BSONObj info = indexSpecs[i]; StatusWith<BSONObj> statusWithInfo = _collection->getIndexCatalog()->prepareSpecForCreate(_opCtx, info); Status status = statusWithInfo.getStatus(); if (!status.isOK()) return status; info = statusWithInfo.getValue(); indexInfoObjs.push_back(info); IndexToBuild index; index.block.reset(new IndexCatalogImpl::IndexBuildBlock(_opCtx, _collection, info)); status = index.block->init(); if (!status.isOK()) return status; index.real = index.block->getEntry()->accessMethod(); status = index.real->initializeAsEmpty(_opCtx); if (!status.isOK()) return status; if (!_buildInBackground) { // Bulk build process requires foreground building as it assumes nothing is changing // under it. index.bulk = index.real->initiateBulk(eachIndexBuildMaxMemoryUsageBytes); } const IndexDescriptor* descriptor = index.block->getEntry()->descriptor(); IndexCatalog::prepareInsertDeleteOptions(_opCtx, descriptor, &index.options); index.options.dupsAllowed = index.options.dupsAllowed || _ignoreUnique; if (_ignoreUnique) { index.options.getKeysMode = IndexAccessMethod::GetKeysMode::kRelaxConstraints; } log() << "build index on: " << ns << " properties: " << descriptor->toString(); if (index.bulk) log() << "\t building index using bulk method; build may temporarily use up to " << eachIndexBuildMaxMemoryUsageBytes / 1024 / 1024 << " megabytes of RAM"; index.filterExpression = index.block->getEntry()->getFilterExpression(); // TODO SERVER-14888 Suppress this in cases we don't want to audit. audit::logCreateIndex(_opCtx->getClient(), &info, descriptor->indexName(), ns); _indexes.push_back(std::move(index)); } if (_buildInBackground) _backgroundOperation.reset(new BackgroundOperation(ns)); wunit.commit(); if (MONGO_FAIL_POINT(crashAfterStartingIndexBuild)) { log() << "Index build interrupted due to 'crashAfterStartingIndexBuild' failpoint. Exiting " "after waiting for changes to become durable."; Locker::LockSnapshot lockInfo; _opCtx->lockState()->saveLockStateAndUnlock(&lockInfo); if (_opCtx->recoveryUnit()->waitUntilDurable()) { quickExit(EXIT_TEST); } } return indexInfoObjs; } Status MultiIndexBlockImpl::insertAllDocumentsInCollection(std::set<RecordId>* dupsOut) { const char* curopMessage = _buildInBackground ? "Index Build (background)" : "Index Build"; const auto numRecords = _collection->numRecords(_opCtx); stdx::unique_lock<Client> lk(*_opCtx->getClient()); ProgressMeterHolder progress( CurOp::get(_opCtx)->setMessage_inlock(curopMessage, curopMessage, numRecords)); lk.unlock(); Timer t; unsigned long long n = 0; PlanExecutor::YieldPolicy yieldPolicy; if (_buildInBackground) { invariant(_allowInterruption); yieldPolicy = PlanExecutor::YIELD_AUTO; } else { yieldPolicy = PlanExecutor::WRITE_CONFLICT_RETRY_ONLY; } auto exec = InternalPlanner::collectionScan(_opCtx, _collection->ns().ns(), _collection, yieldPolicy); Snapshotted<BSONObj> objToIndex; RecordId loc; PlanExecutor::ExecState state; int retries = 0; // non-zero when retrying our last document. while (retries || (PlanExecutor::ADVANCED == (state = exec->getNextSnapshotted(&objToIndex, &loc))) || MONGO_FAIL_POINT(hangAfterStartingIndexBuild)) { try { if (_allowInterruption) _opCtx->checkForInterrupt(); if (!(retries || (PlanExecutor::ADVANCED == state))) { // The only reason we are still in the loop is hangAfterStartingIndexBuild. log() << "Hanging index build due to 'hangAfterStartingIndexBuild' failpoint"; invariant(_allowInterruption); sleepmillis(1000); continue; } // Make sure we are working with the latest version of the document. if (objToIndex.snapshotId() != _opCtx->recoveryUnit()->getSnapshotId() && !_collection->findDoc(_opCtx, loc, &objToIndex)) { // doc was deleted so don't index it. retries = 0; continue; } // Done before insert so we can retry document if it WCEs. progress->setTotalWhileRunning(_collection->numRecords(_opCtx)); WriteUnitOfWork wunit(_opCtx); Status ret = insert(objToIndex.value(), loc); if (_buildInBackground) exec->saveState(); if (ret.isOK()) { wunit.commit(); } else if (dupsOut && ret.code() == ErrorCodes::DuplicateKey) { // If dupsOut is non-null, we should only fail the specific insert that // led to a DuplicateKey rather than the whole index build. dupsOut->insert(loc); } else { // Fail the index build hard. return ret; } if (_buildInBackground) { auto restoreStatus = exec->restoreState(); // Handles any WCEs internally. if (!restoreStatus.isOK()) { return restoreStatus; } } // Go to the next document progress->hit(); n++; retries = 0; } catch (const WriteConflictException&) { CurOp::get(_opCtx)->debug().writeConflicts++; retries++; // logAndBackoff expects this to be 1 on first call. WriteConflictException::logAndBackoff( retries, "index creation", _collection->ns().ns()); // Can't use writeConflictRetry since we need to save/restore exec around call to // abandonSnapshot. exec->saveState(); _opCtx->recoveryUnit()->abandonSnapshot(); auto restoreStatus = exec->restoreState(); // Handles any WCEs internally. if (!restoreStatus.isOK()) { return restoreStatus; } } } uassert(28550, "Unable to complete index build due to collection scan failure: " + WorkingSetCommon::toStatusString(objToIndex.value()), state == PlanExecutor::IS_EOF); if (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) { // Unlock before hanging so replication recognizes we've completed. Locker::LockSnapshot lockInfo; _opCtx->lockState()->saveLockStateAndUnlock(&lockInfo); while (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) { log() << "Hanging index build with no locks due to " "'hangAfterStartingIndexBuildUnlocked' failpoint"; sleepmillis(1000); } // If we want to support this, we'd need to regrab the lock and be sure that all callers are // ok with us yielding. They should be for BG indexes, but not for foreground. invariant(!"the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off"); } progress->finished(); Status ret = doneInserting(dupsOut); if (!ret.isOK()) return ret; log() << "build index done. scanned " << n << " total records. " << t.seconds() << " secs"; return Status::OK(); } Status MultiIndexBlockImpl::insert(const BSONObj& doc, const RecordId& loc) { for (size_t i = 0; i < _indexes.size(); i++) { if (_indexes[i].filterExpression && !_indexes[i].filterExpression->matchesBSON(doc)) { continue; } int64_t unused; Status idxStatus(ErrorCodes::InternalError, ""); if (_indexes[i].bulk) { idxStatus = _indexes[i].bulk->insert(_opCtx, doc, loc, _indexes[i].options, &unused); } else { idxStatus = _indexes[i].real->insert(_opCtx, doc, loc, _indexes[i].options, &unused); } if (!idxStatus.isOK()) return idxStatus; } return Status::OK(); } Status MultiIndexBlockImpl::doneInserting(std::set<RecordId>* dupsOut) { for (size_t i = 0; i < _indexes.size(); i++) { if (_indexes[i].bulk == NULL) continue; LOG(1) << "\t bulk commit starting for index: " << _indexes[i].block->getEntry()->descriptor()->indexName(); Status status = _indexes[i].real->commitBulk(_opCtx, std::move(_indexes[i].bulk), _allowInterruption, _indexes[i].options.dupsAllowed, dupsOut); if (!status.isOK()) { return status; } } return Status::OK(); } void MultiIndexBlockImpl::abortWithoutCleanup() { _indexes.clear(); _needToCleanup = false; } void MultiIndexBlockImpl::commit() { for (size_t i = 0; i < _indexes.size(); i++) { _indexes[i].block->success(); } _opCtx->recoveryUnit()->registerChange(new SetNeedToCleanupOnRollback(this)); _needToCleanup = false; } } // namespace mongo
StatusWith<std::vector<BSONObj>> MultiIndexBlockImpl::init(const std::vector<BSONObj>& indexSpecs) { WriteUnitOfWork wunit(_opCtx); invariant(_indexes.empty()); _opCtx->recoveryUnit()->registerChange(new CleanupIndexesVectorOnRollback(this)); const string& ns = _collection->ns().ns(); const auto idxCat = _collection->getIndexCatalog(); invariant(idxCat); invariant(idxCat->ok()); Status status = idxCat->checkUnfinished(); if (!status.isOK()) return status; for (size_t i = 0; i < indexSpecs.size(); i++) { BSONObj info = indexSpecs[i]; string pluginName = IndexNames::findPluginName(info["key"].Obj()); if (pluginName.size()) { Status s = _collection->getIndexCatalog()->_upgradeDatabaseMinorVersionIfNeeded( _opCtx, pluginName); if (!s.isOK()) return s; } // Any foreground indexes make all indexes be built in the foreground. _buildInBackground = (_buildInBackground && info["background"].trueValue()); } std::vector<BSONObj> indexInfoObjs; indexInfoObjs.reserve(indexSpecs.size()); std::size_t eachIndexBuildMaxMemoryUsageBytes = 0; if (!indexSpecs.empty()) { eachIndexBuildMaxMemoryUsageBytes = static_cast<std::size_t>(maxIndexBuildMemoryUsageMegabytes.load()) * 1024 * 1024 / indexSpecs.size(); } for (size_t i = 0; i < indexSpecs.size(); i++) { BSONObj info = indexSpecs[i]; StatusWith<BSONObj> statusWithInfo = _collection->getIndexCatalog()->prepareSpecForCreate(_opCtx, info); Status status = statusWithInfo.getStatus(); if (!status.isOK()) return status; info = statusWithInfo.getValue(); indexInfoObjs.push_back(info); IndexToBuild index; index.block.reset(new IndexCatalogImpl::IndexBuildBlock(_opCtx, _collection, info)); status = index.block->init(); if (!status.isOK()) return status; index.real = index.block->getEntry()->accessMethod(); status = index.real->initializeAsEmpty(_opCtx); if (!status.isOK()) return status; if (!_buildInBackground) { // Bulk build process requires foreground building as it assumes nothing is changing // under it. index.bulk = index.real->initiateBulk(eachIndexBuildMaxMemoryUsageBytes); } const IndexDescriptor* descriptor = index.block->getEntry()->descriptor(); IndexCatalog::prepareInsertDeleteOptions(_opCtx, descriptor, &index.options); index.options.dupsAllowed = index.options.dupsAllowed || _ignoreUnique; if (_ignoreUnique) { index.options.getKeysMode = IndexAccessMethod::GetKeysMode::kRelaxConstraints; } log() << "build index on: " << ns << " properties: " << descriptor->toString(); if (index.bulk) log() << "\t building index using bulk method; build may temporarily use up to " << eachIndexBuildMaxMemoryUsageBytes / 1024 / 1024 << " megabytes of RAM"; index.filterExpression = index.block->getEntry()->getFilterExpression(); // TODO SERVER-14888 Suppress this in cases we don't want to audit. audit::logCreateIndex(_opCtx->getClient(), &info, descriptor->indexName(), ns); _indexes.push_back(std::move(index)); } if (_buildInBackground) _backgroundOperation.reset(new BackgroundOperation(ns)); wunit.commit(); if (MONGO_FAIL_POINT(crashAfterStartingIndexBuild)) { log() << "Index build interrupted due to 'crashAfterStartingIndexBuild' failpoint. Exiting " "after waiting for changes to become durable."; Locker::LockSnapshot lockInfo; _opCtx->lockState()->saveLockStateAndUnlock(&lockInfo); if (_opCtx->recoveryUnit()->waitUntilDurable()) { quickExit(EXIT_TEST); } } return indexInfoObjs; }
namespace mongo { // The "wiredTigerCursorCacheSize" parameter has the following meaning. // // wiredTigerCursorCacheSize == 0 // For this setting, cursors are only cached in the WiredTiger storage engine // itself. Operations that need exclusive access such as drop or verify will // not be blocked by inactive cached cursors with this setting. However, this // setting may reduce the performance of certain workloads that normally // benefit from cursor caching above the storage engine. // // wiredTigerCursorCacheSize > 0 // WiredTiger-level caching of cursors is disabled but cursor caching does // occur above the storage engine. The value of this setting represents the // maximum number of cursors that are cached. Setting the value to 10000 will // give the old (<= 3.6) behavior. Note that cursors remain cached, even when a // session is released back to the cache. Thus, exclusive operations may be // blocked temporarily, and in some cases, a long time. Drops that fail because // of exclusivity silently succeed and are queued for retries. // // wiredTigerCursorCacheSize < 0 // This is a hybrid approach of the above two, and is the default. The the // absolute value of the setting is used as the number of cursors cached above // the storage engine. When a session is released, all cursors are closed, and // will be cached in WiredTiger. Exclusive operations should only be blocked // for a short time, except if a cursor is held by a long running session. This // is a good compromise for most workloads. AtomicInt32 kWiredTigerCursorCacheSize(-100); ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime> WiredTigerCursorCacheSizeSetting(ServerParameterSet::getGlobal(), "wiredTigerCursorCacheSize", &kWiredTigerCursorCacheSize); WiredTigerSession::WiredTigerSession(WT_CONNECTION* conn, uint64_t epoch, uint64_t cursorEpoch) : _epoch(epoch), _cursorEpoch(cursorEpoch), _session(NULL), _cursorGen(0), _cursorsOut(0) { invariantWTOK(conn->open_session(conn, NULL, "isolation=snapshot", &_session)); } WiredTigerSession::WiredTigerSession(WT_CONNECTION* conn, WiredTigerSessionCache* cache, uint64_t epoch, uint64_t cursorEpoch) : _epoch(epoch), _cursorEpoch(cursorEpoch), _cache(cache), _session(NULL), _cursorGen(0), _cursorsOut(0) { invariantWTOK(conn->open_session(conn, NULL, "isolation=snapshot", &_session)); } WiredTigerSession::~WiredTigerSession() { if (_session) { invariantWTOK(_session->close(_session, NULL)); } } WT_CURSOR* WiredTigerSession::getCursor(const std::string& uri, uint64_t id, bool forRecordStore) { // Find the most recently used cursor for (CursorCache::iterator i = _cursors.begin(); i != _cursors.end(); ++i) { if (i->_id == id) { WT_CURSOR* c = i->_cursor; _cursors.erase(i); _cursorsOut++; return c; } } WT_CURSOR* c = NULL; int ret = _session->open_cursor( _session, uri.c_str(), NULL, forRecordStore ? "" : "overwrite=false", &c); if (ret != ENOENT) invariantWTOK(ret); if (c) _cursorsOut++; return c; } void WiredTigerSession::releaseCursor(uint64_t id, WT_CURSOR* cursor) { invariant(_session); invariant(cursor); _cursorsOut--; invariantWTOK(cursor->reset(cursor)); // Cursors are pushed to the front of the list and removed from the back _cursors.push_front(WiredTigerCachedCursor(id, _cursorGen++, cursor)); // A negative value for wiredTigercursorCacheSize means to use hybrid caching. std::uint32_t cacheSize = abs(kWiredTigerCursorCacheSize.load()); while (!_cursors.empty() && _cursorGen - _cursors.back()._gen > cacheSize) { cursor = _cursors.back()._cursor; _cursors.pop_back(); invariantWTOK(cursor->close(cursor)); } } void WiredTigerSession::closeAllCursors(const std::string& uri) { invariant(_session); bool all = (uri == ""); for (auto i = _cursors.begin(); i != _cursors.end();) { WT_CURSOR* cursor = i->_cursor; if (cursor && (all || uri == cursor->uri)) { invariantWTOK(cursor->close(cursor)); i = _cursors.erase(i); } else ++i; } } void WiredTigerSession::closeCursorsForQueuedDrops(WiredTigerKVEngine* engine) { invariant(_session); _cursorEpoch = _cache->getCursorEpoch(); auto toDrop = engine->filterCursorsWithQueuedDrops(&_cursors); for (auto i = toDrop.begin(); i != toDrop.end(); i++) { WT_CURSOR* cursor = i->_cursor; if (cursor) { invariantWTOK(cursor->close(cursor)); } } } namespace { AtomicUInt64 nextTableId(1); } // static uint64_t WiredTigerSession::genTableId() { return nextTableId.fetchAndAdd(1); } // ----------------------- WiredTigerSessionCache::WiredTigerSessionCache(WiredTigerKVEngine* engine) : _engine(engine), _conn(engine->getConnection()), _shuttingDown(0) {} WiredTigerSessionCache::WiredTigerSessionCache(WT_CONNECTION* conn) : _engine(NULL), _conn(conn), _shuttingDown(0) {} WiredTigerSessionCache::~WiredTigerSessionCache() { shuttingDown(); } void WiredTigerSessionCache::shuttingDown() { uint32_t actual = _shuttingDown.load(); uint32_t expected; // Try to atomically set _shuttingDown flag, but just return if another thread was first. do { expected = actual; actual = _shuttingDown.compareAndSwap(expected, expected | kShuttingDownMask); if (actual & kShuttingDownMask) return; } while (actual != expected); // Spin as long as there are threads in releaseSession while (_shuttingDown.load() != kShuttingDownMask) { sleepmillis(1); } closeAll(); } void WiredTigerSessionCache::waitUntilDurable(bool forceCheckpoint, bool stableCheckpoint) { // For inMemory storage engines, the data is "as durable as it's going to get". // That is, a restart is equivalent to a complete node failure. if (isEphemeral()) { return; } const int shuttingDown = _shuttingDown.fetchAndAdd(1); ON_BLOCK_EXIT([this] { _shuttingDown.fetchAndSubtract(1); }); uassert(ErrorCodes::ShutdownInProgress, "Cannot wait for durability because a shutdown is in progress", !(shuttingDown & kShuttingDownMask)); // Stable checkpoints are only meaningful in a replica set. Replication sets the "stable // timestamp". If the stable timestamp is unset, WiredTiger takes a full checkpoint, which is // incidentally what we want. A "true" stable checkpoint (a stable timestamp was set on the // WT_CONNECTION, i.e: replication is on) requires `forceCheckpoint` to be true and journaling // to be enabled. if (stableCheckpoint && getGlobalReplSettings().usingReplSets()) { invariant(forceCheckpoint && _engine->isDurable()); } // When forcing a checkpoint with journaling enabled, don't synchronize with other // waiters, as a log flush is much cheaper than a full checkpoint. if (forceCheckpoint && _engine->isDurable()) { UniqueWiredTigerSession session = getSession(); WT_SESSION* s = session->getSession(); { stdx::unique_lock<stdx::mutex> lk(_journalListenerMutex); JournalListener::Token token = _journalListener->getToken(); auto config = stableCheckpoint ? "use_timestamp=true" : "use_timestamp=false"; invariantWTOK(s->checkpoint(s, config)); _journalListener->onDurable(token); } LOG(4) << "created checkpoint (forced)"; return; } uint32_t start = _lastSyncTime.load(); // Do the remainder in a critical section that ensures only a single thread at a time // will attempt to synchronize. stdx::unique_lock<stdx::mutex> lk(_lastSyncMutex); uint32_t current = _lastSyncTime.loadRelaxed(); // synchronized with writes through mutex if (current != start) { // Someone else synced already since we read lastSyncTime, so we're done! return; } _lastSyncTime.store(current + 1); // Nobody has synched yet, so we have to sync ourselves. // This gets the token (OpTime) from the last write, before flushing (either the journal, or a // checkpoint), and then reports that token (OpTime) as a durable write. stdx::unique_lock<stdx::mutex> jlk(_journalListenerMutex); JournalListener::Token token = _journalListener->getToken(); // Initialize on first use. if (!_waitUntilDurableSession) { invariantWTOK( _conn->open_session(_conn, NULL, "isolation=snapshot", &_waitUntilDurableSession)); } // Use the journal when available, or a checkpoint otherwise. if (_engine && _engine->isDurable()) { invariantWTOK(_waitUntilDurableSession->log_flush(_waitUntilDurableSession, "sync=on")); LOG(4) << "flushed journal"; } else { invariantWTOK(_waitUntilDurableSession->checkpoint(_waitUntilDurableSession, NULL)); LOG(4) << "created checkpoint"; } _journalListener->onDurable(token); } void WiredTigerSessionCache::waitUntilPreparedUnitOfWorkCommitsOrAborts(OperationContext* opCtx) { invariant(opCtx); stdx::unique_lock<stdx::mutex> lk(_prepareCommittedOrAbortedMutex); auto lastCounter = _lastCommitOrAbortCounter; opCtx->waitForConditionOrInterrupt(_prepareCommittedOrAbortedCond, lk, [&] { return lastCounter != _lastCommitOrAbortCounter; }); } void WiredTigerSessionCache::notifyPreparedUnitOfWorkHasCommittedOrAborted() { { stdx::unique_lock<stdx::mutex> lk(_prepareCommittedOrAbortedMutex); _lastCommitOrAbortCounter++; } _prepareCommittedOrAbortedCond.notify_all(); } void WiredTigerSessionCache::closeAllCursors(const std::string& uri) { stdx::lock_guard<stdx::mutex> lock(_cacheLock); for (SessionCache::iterator i = _sessions.begin(); i != _sessions.end(); i++) { (*i)->closeAllCursors(uri); } } void WiredTigerSessionCache::closeCursorsForQueuedDrops() { // Increment the cursor epoch so that all cursors from this epoch are closed. _cursorEpoch.fetchAndAdd(1); stdx::lock_guard<stdx::mutex> lock(_cacheLock); for (SessionCache::iterator i = _sessions.begin(); i != _sessions.end(); i++) { (*i)->closeCursorsForQueuedDrops(_engine); } } void WiredTigerSessionCache::closeAll() { // Increment the epoch as we are now closing all sessions with this epoch. SessionCache swap; { stdx::lock_guard<stdx::mutex> lock(_cacheLock); _epoch.fetchAndAdd(1); _sessions.swap(swap); } for (SessionCache::iterator i = swap.begin(); i != swap.end(); i++) { delete (*i); } } bool WiredTigerSessionCache::isEphemeral() { return _engine && _engine->isEphemeral(); } UniqueWiredTigerSession WiredTigerSessionCache::getSession() { // We should never be able to get here after _shuttingDown is set, because no new // operations should be allowed to start. invariant(!(_shuttingDown.loadRelaxed() & kShuttingDownMask)); { stdx::lock_guard<stdx::mutex> lock(_cacheLock); if (!_sessions.empty()) { // Get the most recently used session so that if we discard sessions, we're // discarding older ones WiredTigerSession* cachedSession = _sessions.back(); _sessions.pop_back(); return UniqueWiredTigerSession(cachedSession); } } // Outside of the cache partition lock, but on release will be put back on the cache return UniqueWiredTigerSession( new WiredTigerSession(_conn, this, _epoch.load(), _cursorEpoch.load())); } void WiredTigerSessionCache::releaseSession(WiredTigerSession* session) { invariant(session); invariant(session->cursorsOut() == 0); const int shuttingDown = _shuttingDown.fetchAndAdd(1); ON_BLOCK_EXIT([this] { _shuttingDown.fetchAndSubtract(1); }); if (shuttingDown & kShuttingDownMask) { // There is a race condition with clean shutdown, where the storage engine is ripped from // underneath OperationContexts, which are not "active" (i.e., do not have any locks), but // are just about to delete the recovery unit. See SERVER-16031 for more information. Since // shutting down the WT_CONNECTION will close all WT_SESSIONS, we shouldn't also try to // directly close this session. session->_session = nullptr; // Prevents calling _session->close() in destructor. delete session; return; } { WT_SESSION* ss = session->getSession(); uint64_t range; // This checks that we are only caching idle sessions and not something which might hold // locks or otherwise prevent truncation. invariantWTOK(ss->transaction_pinned_range(ss, &range)); invariant(range == 0); // Release resources in the session we're about to cache. // If we are using hybrid caching, then close cursors now and let them // be cached at the WiredTiger level. if (kWiredTigerCursorCacheSize.load() < 0) { session->closeAllCursors(""); } invariantWTOK(ss->reset(ss)); } // If the cursor epoch has moved on, close all cursors in the session. uint64_t cursorEpoch = _cursorEpoch.load(); if (session->_getCursorEpoch() != cursorEpoch) session->closeCursorsForQueuedDrops(_engine); bool returnedToCache = false; uint64_t currentEpoch = _epoch.load(); bool dropQueuedIdentsAtSessionEnd = session->isDropQueuedIdentsAtSessionEndAllowed(); // Reset this session's flag for dropping queued idents to default, before returning it to // session cache. session->dropQueuedIdentsAtSessionEndAllowed(true); if (session->_getEpoch() == currentEpoch) { // check outside of lock to reduce contention stdx::lock_guard<stdx::mutex> lock(_cacheLock); if (session->_getEpoch() == _epoch.load()) { // recheck inside the lock for correctness returnedToCache = true; _sessions.push_back(session); } } else invariant(session->_getEpoch() < currentEpoch); if (!returnedToCache) delete session; if (dropQueuedIdentsAtSessionEnd && _engine && _engine->haveDropsQueued()) _engine->dropSomeQueuedIdents(); } void WiredTigerSessionCache::setJournalListener(JournalListener* jl) { stdx::unique_lock<stdx::mutex> lk(_journalListenerMutex); _journalListener = jl; } bool WiredTigerSessionCache::isEngineCachingCursors() { return kWiredTigerCursorCacheSize.load() <= 0; } void WiredTigerSessionCache::WiredTigerSessionDeleter::operator()( WiredTigerSession* session) const { session->_cache->releaseSession(session); } } // namespace mongo
bool WiredTigerSessionCache::isEngineCachingCursors() { return kWiredTigerCursorCacheSize.load() <= 0; }
void WiredTigerSessionCache::releaseSession(WiredTigerSession* session) { invariant(session); invariant(session->cursorsOut() == 0); const int shuttingDown = _shuttingDown.fetchAndAdd(1); ON_BLOCK_EXIT([this] { _shuttingDown.fetchAndSubtract(1); }); if (shuttingDown & kShuttingDownMask) { // There is a race condition with clean shutdown, where the storage engine is ripped from // underneath OperationContexts, which are not "active" (i.e., do not have any locks), but // are just about to delete the recovery unit. See SERVER-16031 for more information. Since // shutting down the WT_CONNECTION will close all WT_SESSIONS, we shouldn't also try to // directly close this session. session->_session = nullptr; // Prevents calling _session->close() in destructor. delete session; return; } { WT_SESSION* ss = session->getSession(); uint64_t range; // This checks that we are only caching idle sessions and not something which might hold // locks or otherwise prevent truncation. invariantWTOK(ss->transaction_pinned_range(ss, &range)); invariant(range == 0); // Release resources in the session we're about to cache. // If we are using hybrid caching, then close cursors now and let them // be cached at the WiredTiger level. if (kWiredTigerCursorCacheSize.load() < 0) { session->closeAllCursors(""); } invariantWTOK(ss->reset(ss)); } // If the cursor epoch has moved on, close all cursors in the session. uint64_t cursorEpoch = _cursorEpoch.load(); if (session->_getCursorEpoch() != cursorEpoch) session->closeCursorsForQueuedDrops(_engine); bool returnedToCache = false; uint64_t currentEpoch = _epoch.load(); bool dropQueuedIdentsAtSessionEnd = session->isDropQueuedIdentsAtSessionEndAllowed(); // Reset this session's flag for dropping queued idents to default, before returning it to // session cache. session->dropQueuedIdentsAtSessionEndAllowed(true); if (session->_getEpoch() == currentEpoch) { // check outside of lock to reduce contention stdx::lock_guard<stdx::mutex> lock(_cacheLock); if (session->_getEpoch() == _epoch.load()) { // recheck inside the lock for correctness returnedToCache = true; _sessions.push_back(session); } } else invariant(session->_getEpoch() < currentEpoch); if (!returnedToCache) delete session; if (dropQueuedIdentsAtSessionEnd && _engine && _engine->haveDropsQueued()) _engine->dropSomeQueuedIdents(); }