Пример #1
0
bool isMMAPV1() {
    StorageEngine* globalStorageEngine = getGlobalServiceContext()->getGlobalStorageEngine();

    invariant(globalStorageEngine);
    return globalStorageEngine->isMmapV1();
}
Пример #2
0
    static void handleCursorCommand(OperationContext* txn,
                                    const string& ns,
                                    ClientCursorPin* pin,
                                    PlanExecutor* exec,
                                    const BSONObj& cmdObj,
                                    BSONObjBuilder& result) {

        ClientCursor* cursor = pin ? pin->c() : NULL;
        if (pin) {
            invariant(cursor);
            invariant(cursor->getExecutor() == exec);
            invariant(cursor->isAggCursor());
        }

        BSONElement batchSizeElem = cmdObj.getFieldDotted("cursor.batchSize");
        const long long batchSize = batchSizeElem.isNumber()
                                    ? batchSizeElem.numberLong()
                                    : 101; // same as query

        // can't use result BSONObjBuilder directly since it won't handle exceptions correctly.
        BSONArrayBuilder resultsArray;
        const int byteLimit = MaxBytesToReturnToClientAtOnce;
        BSONObj next;
        for (int objCount = 0; objCount < batchSize; objCount++) {
            // The initial getNext() on a PipelineProxyStage may be very expensive so we don't
            // do it when batchSize is 0 since that indicates a desire for a fast return.
            if (exec->getNext(&next, NULL) != PlanExecutor::ADVANCED) {
                if (pin) pin->deleteUnderlying();
                // make it an obvious error to use cursor or executor after this point
                cursor = NULL;
                exec = NULL;
                break;
            }

            if (resultsArray.len() + next.objsize() > byteLimit) {
                // Get the pipeline proxy stage wrapped by this PlanExecutor.
                PipelineProxyStage* proxy = static_cast<PipelineProxyStage*>(exec->getRootStage());
                // too big. next will be the first doc in the second batch
                proxy->pushBack(next);
                break;
            }

            resultsArray.append(next);
        }

        // NOTE: exec->isEOF() can have side effects such as writing by $out. However, it should
        // be relatively quick since if there was no pin then the input is empty. Also, this
        // violates the contract for batchSize==0. Sharding requires a cursor to be returned in that
        // case. This is ok for now however, since you can't have a sharded collection that doesn't
        // exist.
        const bool canReturnMoreBatches = pin;
        if (!canReturnMoreBatches && exec && !exec->isEOF()) {
            // msgasserting since this shouldn't be possible to trigger from today's aggregation
            // language. The wording assumes that the only reason pin would be null is if the
            // collection doesn't exist.
            msgasserted(17391, str::stream()
                << "Aggregation has more results than fit in initial batch, but can't "
                << "create cursor since collection " << ns << " doesn't exist");
        }

        if (cursor) {
            // If a time limit was set on the pipeline, remaining time is "rolled over" to the
            // cursor (for use by future getmore ops).
            cursor->setLeftoverMaxTimeMicros( txn->getCurOp()->getRemainingMaxTimeMicros() );

            // We stash away the RecoveryUnit in the ClientCursor.  It's used for subsequent
            // getMore requests.  The calling OpCtx gets a fresh RecoveryUnit.
            cursor->setOwnedRecoveryUnit(txn->releaseRecoveryUnit());
            StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
            txn->setRecoveryUnit(storageEngine->newRecoveryUnit());

            // Cursor needs to be in a saved state while we yield locks for getmore. State
            // will be restored in getMore().
            exec->saveState();
        }

        const long long cursorId = cursor ? cursor->cursorid() : 0LL;
        Command::appendCursorResponseObject(cursorId, ns, resultsArray.arr(), &result);
    }
Пример #3
0
static void repairDatabasesAndCheckVersion(OperationContext* txn) {
    LOG(1) << "enter repairDatabases (to check pdfile version #)" << endl;

    ScopedTransaction transaction(txn, MODE_X);
    Lock::GlobalWrite lk(txn->lockState());

    vector<string> dbNames;

    StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
    storageEngine->listDatabases(&dbNames);

    // Repair all databases first, so that we do not try to open them if they are in bad shape
    if (storageGlobalParams.repair) {
        for (vector<string>::const_iterator i = dbNames.begin(); i != dbNames.end(); ++i) {
            const string dbName = *i;
            LOG(1) << "    Repairing database: " << dbName << endl;

            fassert(18506, repairDatabase(txn, storageEngine, dbName));
        }
    }

    const repl::ReplSettings& replSettings = repl::getGlobalReplicationCoordinator()->getSettings();

    // On replica set members we only clear temp collections on DBs other than "local" during
    // promotion to primary. On pure slaves, they are only cleared when the oplog tells them
    // to. The local DB is special because it is not replicated.  See SERVER-10927 for more
    // details.
    const bool shouldClearNonLocalTmpCollections =
        !(checkIfReplMissingFromCommandLine(txn) || replSettings.usingReplSets() ||
          replSettings.slave == repl::SimpleSlave);

    for (vector<string>::const_iterator i = dbNames.begin(); i != dbNames.end(); ++i) {
        const string dbName = *i;
        LOG(1) << "    Recovering database: " << dbName << endl;

        Database* db = dbHolder().openDb(txn, dbName);
        invariant(db);

        // First thing after opening the database is to check for file compatibility,
        // otherwise we might crash if this is a deprecated format.
        if (!db->getDatabaseCatalogEntry()->currentFilesCompatible(txn)) {
            log() << "****";
            log() << "cannot do this upgrade without an upgrade in the middle";
            log() << "please do a --repair with 2.6 and then start this version";
            dbexit(EXIT_NEED_UPGRADE);
            return;
        }

        // Major versions match, check indexes
        const string systemIndexes = db->name() + ".system.indexes";

        Collection* coll = db->getCollection(systemIndexes);
        unique_ptr<PlanExecutor> exec(
            InternalPlanner::collectionScan(txn, systemIndexes, coll, PlanExecutor::YIELD_MANUAL));

        BSONObj index;
        PlanExecutor::ExecState state;
        while (PlanExecutor::ADVANCED == (state = exec->getNext(&index, NULL))) {
            const BSONObj key = index.getObjectField("key");
            const string plugin = IndexNames::findPluginName(key);

            if (db->getDatabaseCatalogEntry()->isOlderThan24(txn)) {
                if (IndexNames::existedBefore24(plugin)) {
                    continue;
                }

                log() << "Index " << index << " claims to be of type '" << plugin << "', "
                      << "which is either invalid or did not exist before v2.4. "
                      << "See the upgrade section: "
                      << "http://dochub.mongodb.org/core/upgrade-2.4" << startupWarningsLog;
            }

            const Status keyStatus = validateKeyPattern(key);
            if (!keyStatus.isOK()) {
                log() << "Problem with index " << index << ": " << keyStatus.reason()
                      << " This index can still be used however it cannot be rebuilt."
                      << " For more info see"
                      << " http://dochub.mongodb.org/core/index-validation" << startupWarningsLog;
            }

            if (index["v"].isNumber() && index["v"].numberInt() == 0) {
                log() << "WARNING: The index: " << index << " was created with the deprecated"
                      << " v:0 format.  This format will not be supported in a future release."
                      << startupWarningsLog;
                log() << "\t To fix this, you need to rebuild this index."
                      << " For instructions, see http://dochub.mongodb.org/core/rebuild-v0-indexes"
                      << startupWarningsLog;
            }
        }

        if (PlanExecutor::IS_EOF != state) {
            warning() << "Internal error while reading collection " << systemIndexes;
        }

        if (replSettings.usingReplSets()) {
            // We only care about the _id index if we are in a replset
            checkForIdIndexes(txn, db);
        }

        if (shouldClearNonLocalTmpCollections || dbName == "local") {
            db->clearTmpCollections(txn);
        }
    }

    LOG(1) << "done repairDatabases" << endl;
}
Пример #4
0
    Database* DatabaseHolder::getOrCreate(OperationContext* txn,
                                          const StringData& ns,
                                          bool& justCreated) {

        const StringData dbname = _todb( ns );
        invariant(txn->lockState()->isAtLeastReadLocked(dbname));

        if (txn->lockState()->isWriteLocked() && FileAllocator::get()->hasFailed()) {
            uassert(17507, "Can't take a write lock while out of disk space", false);
        }

        {
            SimpleMutex::scoped_lock lk(_m);
            {
                DBs::const_iterator i = _dbs.find(dbname);
                if( i != _dbs.end() ) {
                    justCreated = false;
                    return i->second;
                }
            }

            // todo: protect against getting sprayed with requests for different db names that DNE -
            //       that would make the DBs map very large.  not clear what to do to handle though,
            //       perhaps just log it, which is what we do here with the "> 40" :
            bool cant = !txn->lockState()->isWriteLocked(ns);
            if( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) ||
                _dbs.size() > 40 || cant || DEBUG_BUILD ) {
                log() << "opening db: " << dbname;
            }
            massert(15927, "can't open database in a read lock. if db was just closed, consider retrying the query. might otherwise indicate an internal error", !cant);
        }

        // we know we have a db exclusive lock here
        { // check casing
            string duplicate = Database::duplicateUncasedName(dbname.toString());
            if ( !duplicate.empty() ) {
                stringstream ss;
                ss << "db already exists with different case already have: [" << duplicate
                   << "] trying to create [" << dbname.toString() << "]";
                uasserted( DatabaseDifferCaseCode , ss.str() );
            }
        }

        // we mark our thread as having done writes now as we do not want any exceptions
        // once we start creating a new database
        cc().writeHappened();

        // this locks _m for defensive checks, so we don't want to be locked right here :
        StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
        invariant(storageEngine);
        Database *db;
        {
            WriteUnitOfWork wunit(txn);
            DatabaseCatalogEntry* entry = storageEngine->getDatabaseCatalogEntry(txn, dbname);
            invariant(entry);
            justCreated = !entry->exists();
            db = new Database(txn, dbname, entry);
            wunit.commit();
        }

        {
            SimpleMutex::scoped_lock lk(_m);
            _dbs[dbname] = db;
        }

        return db;
    }
Пример #5
0
static void repairDatabasesAndCheckVersion(OperationContext* txn) {
    LOG(1) << "enter repairDatabases (to check pdfile version #)" << endl;

    ScopedTransaction transaction(txn, MODE_X);
    Lock::GlobalWrite lk(txn->lockState());

    vector<string> dbNames;

    StorageEngine* storageEngine = txn->getServiceContext()->getGlobalStorageEngine();
    storageEngine->listDatabases(&dbNames);

    // Repair all databases first, so that we do not try to open them if they are in bad shape
    if (storageGlobalParams.repair) {
        invariant(!storageGlobalParams.readOnly);
        for (vector<string>::const_iterator i = dbNames.begin(); i != dbNames.end(); ++i) {
            const string dbName = *i;
            LOG(1) << "    Repairing database: " << dbName << endl;

            fassert(18506, repairDatabase(txn, storageEngine, dbName));
        }
    }

    const repl::ReplSettings& replSettings = repl::getGlobalReplicationCoordinator()->getSettings();

    // On replica set members we only clear temp collections on DBs other than "local" during
    // promotion to primary. On pure slaves, they are only cleared when the oplog tells them
    // to. The local DB is special because it is not replicated.  See SERVER-10927 for more
    // details.
    const bool shouldClearNonLocalTmpCollections =
        !(checkIfReplMissingFromCommandLine(txn) || replSettings.usingReplSets() ||
          replSettings.isSlave());

    const bool shouldDoCleanupForSERVER23299 = isSubjectToSERVER23299(txn);

    for (vector<string>::const_iterator i = dbNames.begin(); i != dbNames.end(); ++i) {
        const string dbName = *i;
        LOG(1) << "    Recovering database: " << dbName << endl;

        Database* db = dbHolder().openDb(txn, dbName);
        invariant(db);

        // First thing after opening the database is to check for file compatibility,
        // otherwise we might crash if this is a deprecated format.
        auto status = db->getDatabaseCatalogEntry()->currentFilesCompatible(txn);
        if (!status.isOK()) {
            if (status.code() == ErrorCodes::CanRepairToDowngrade) {
                // Convert CanRepairToDowngrade statuses to MustUpgrade statuses to avoid logging a
                // potentially confusing and inaccurate message.
                //
                // TODO SERVER-24097: Log a message informing the user that they can start the
                // current version of mongod with --repair and then proceed with normal startup.
                status = {ErrorCodes::MustUpgrade, status.reason()};
            }
            severe() << "Unable to start mongod due to an incompatibility with the data files and"
                        " this version of mongod: "
                     << status;
            severe() << "Please consult our documentation when trying to downgrade to a previous"
                        " major release";
            quickExit(EXIT_NEED_UPGRADE);
            return;
        }

        // Major versions match, check indexes
        const string systemIndexes = db->name() + ".system.indexes";

        Collection* coll = db->getCollection(systemIndexes);
        unique_ptr<PlanExecutor> exec(
            InternalPlanner::collectionScan(txn, systemIndexes, coll, PlanExecutor::YIELD_MANUAL));

        BSONObj index;
        PlanExecutor::ExecState state;
        while (PlanExecutor::ADVANCED == (state = exec->getNext(&index, NULL))) {
            const BSONObj key = index.getObjectField("key");
            const string plugin = IndexNames::findPluginName(key);

            if (db->getDatabaseCatalogEntry()->isOlderThan24(txn)) {
                if (IndexNames::existedBefore24(plugin)) {
                    continue;
                }

                log() << "Index " << index << " claims to be of type '" << plugin << "', "
                      << "which is either invalid or did not exist before v2.4. "
                      << "See the upgrade section: "
                      << "http://dochub.mongodb.org/core/upgrade-2.4" << startupWarningsLog;
            }

            const Status keyStatus = validateKeyPattern(key);
            if (!keyStatus.isOK()) {
                log() << "Problem with index " << index << ": " << keyStatus.reason()
                      << " This index can still be used however it cannot be rebuilt."
                      << " For more info see"
                      << " http://dochub.mongodb.org/core/index-validation" << startupWarningsLog;
            }

            if (index["v"].isNumber() && index["v"].numberInt() == 0) {
                log() << "WARNING: The index: " << index << " was created with the deprecated"
                      << " v:0 format.  This format will not be supported in a future release."
                      << startupWarningsLog;
                log() << "\t To fix this, you need to rebuild this index."
                      << " For instructions, see http://dochub.mongodb.org/core/rebuild-v0-indexes"
                      << startupWarningsLog;
            }
        }

        // Non-yielding collection scans from InternalPlanner will never error.
        invariant(PlanExecutor::IS_EOF == state);

        if (replSettings.usingReplSets()) {
            // We only care about the _id index if we are in a replset
            checkForIdIndexes(txn, db);
            // Ensure oplog is capped (mmap does not guarantee order of inserts on noncapped
            // collections)
            repl::checkForCappedOplog(txn);
        }

        if (shouldDoCleanupForSERVER23299) {
            handleSERVER23299ForDb(txn, db);
        }

        if (!storageGlobalParams.readOnly &&
            (shouldClearNonLocalTmpCollections || dbName == "local")) {
            db->clearTmpCollections(txn);
        }
    }

    LOG(1) << "done repairDatabases" << endl;
}
Пример #6
0
void IndexCatalogEntry::setMultikey(OperationContext* txn, const MultikeyPaths& multikeyPaths) {
    if (!_indexTracksPathLevelMultikeyInfo && isMultikey()) {
        // If the index is already set as multikey and we don't have any path-level information to
        // update, then there's nothing more for us to do.
        return;
    }

    if (_indexTracksPathLevelMultikeyInfo) {
        stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex);
        invariant(multikeyPaths.size() == _indexMultikeyPaths.size());

        bool newPathIsMultikey = false;
        for (size_t i = 0; i < multikeyPaths.size(); ++i) {
            if (!std::includes(_indexMultikeyPaths[i].begin(),
                               _indexMultikeyPaths[i].end(),
                               multikeyPaths[i].begin(),
                               multikeyPaths[i].end())) {
                // If 'multikeyPaths' contains a new path component that causes this index to be
                // multikey, then we must update the index metadata in the CollectionCatalogEntry.
                newPathIsMultikey = true;
                break;
            }
        }

        if (!newPathIsMultikey) {
            // Otherwise, if all the path components in 'multikeyPaths' are already tracked in
            // '_indexMultikeyPaths', then there's nothing more for us to do.
            return;
        }
    }

    {
        // Only one thread should set the multi-key value per collection, because the metadata for a
        // collection is one large document.
        Lock::ResourceLock collMDLock(txn->lockState(), ResourceId(RESOURCE_METADATA, _ns), MODE_X);

        if (!_indexTracksPathLevelMultikeyInfo && isMultikey()) {
            // It's possible that we raced with another thread when acquiring the MD lock. If the
            // index is already set as multikey and we don't have any path-level information to
            // update, then there's nothing more for us to do.
            return;
        }

        // This effectively emulates a side-transaction off the main transaction, which invoked
        // setMultikey. The reason we need is to avoid artificial WriteConflicts, which happen with
        // snapshot isolation.
        {
            StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
            RecoveryUnitSwap ruSwap(txn, storageEngine->newRecoveryUnit());

            WriteUnitOfWork wuow(txn);

            // It's possible that the index type (e.g. ascending/descending index) supports tracking
            // path-level multikey information, but this particular index doesn't.
            // CollectionCatalogEntry::setIndexIsMultikey() requires that we discard the path-level
            // multikey information in order to avoid unintentionally setting path-level multikey
            // information on an index created before 3.4.
            if (_collection->setIndexIsMultikey(
                    txn,
                    _descriptor->indexName(),
                    _indexTracksPathLevelMultikeyInfo ? multikeyPaths : MultikeyPaths{})) {
                if (_infoCache) {
                    LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern()
                           << " set to multi key.";
                    _infoCache->clearQueryCache();
                }
            }

            wuow.commit();
        }
    }

    _isMultikey.store(true);

    if (_indexTracksPathLevelMultikeyInfo) {
        stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex);
        for (size_t i = 0; i < multikeyPaths.size(); ++i) {
            _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end());
        }
    }
}
Пример #7
0
    void createOplog(OperationContext* txn) {
        Lock::GlobalWrite lk(txn->lockState());

        const char * ns = "local.oplog.$main";

        const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
        bool rs = !replSettings.replSet.empty();
        if( rs )
            ns = rsoplog;

        Client::Context ctx(txn, ns);
        Collection* collection = ctx.db()->getCollection(txn, ns );

        if ( collection ) {

            if (replSettings.oplogSize != 0) {
                int o = (int)(collection->getRecordStore()->storageSize(txn) / ( 1024 * 1024 ) );
                int n = (int)(replSettings.oplogSize / (1024 * 1024));
                if ( n != o ) {
                    stringstream ss;
                    ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog";
                    log() << ss.str() << endl;
                    throw UserException( 13257 , ss.str() );
                }
            }

            if( rs ) return;

            initOpTimeFromOplog(txn, ns);
            return;
        }

        /* create an oplog collection, if it doesn't yet exist. */
        long long sz = 0;
        if ( replSettings.oplogSize != 0 ) {
            sz = replSettings.oplogSize;
        }
        else {
            /* not specified. pick a default size */
            sz = 50LL * 1024LL * 1024LL;
            if ( sizeof(int *) >= 8 ) {
#if defined(__APPLE__)
                // typically these are desktops (dev machines), so keep it smallish
                sz = (256-64) * 1024 * 1024;
#else
                sz = 990LL * 1024 * 1024;
                double free =
                    File::freeSpace(storageGlobalParams.dbpath); //-1 if call not supported.
                long long fivePct = static_cast<long long>( free * 0.05 );
                if ( fivePct > sz )
                    sz = fivePct;
                // we use 5% of free space up to 50GB (1TB free)
                static long long upperBound = 50LL * 1024 * 1024 * 1024;
                if (fivePct > upperBound)
                    sz = upperBound;
#endif
            }
        }

        log() << "******" << endl;
        log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB..." << endl;

        CollectionOptions options;
        options.capped = true;
        options.cappedSize = sz;
        options.autoIndexId = CollectionOptions::NO;

        WriteUnitOfWork wunit(txn);
        invariant(ctx.db()->createCollection(txn, ns, options));
        if( !rs )
            logOp(txn, "n", "", BSONObj() );
        wunit.commit();

        /* sync here so we don't get any surprising lag later when we try to sync */
        StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
        storageEngine->flushAllFiles(true);
        log() << "******" << endl;
    }
Пример #8
0
Status waitForWriteConcern(OperationContext* txn,
                           const OpTime& replOpTime,
                           const WriteConcernOptions& writeConcern,
                           WriteConcernResult* result) {
    LOG(2) << "Waiting for write concern. OpTime: " << replOpTime
           << ", write concern: " << writeConcern.toBSON();
    auto replCoord = repl::ReplicationCoordinator::get(txn);

    MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeWaitingForWriteConcern);

    // Next handle blocking on disk
    Timer syncTimer;
    WriteConcernOptions writeConcernWithPopulatedSyncMode =
        replCoord->populateUnsetWriteConcernOptionsSyncMode(writeConcern);

    switch (writeConcernWithPopulatedSyncMode.syncMode) {
        case WriteConcernOptions::SyncMode::UNSET:
            severe() << "Attempting to wait on a WriteConcern with an unset sync option";
            fassertFailed(34410);
        case WriteConcernOptions::SyncMode::NONE:
            break;
        case WriteConcernOptions::SyncMode::FSYNC: {
            StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
            if (!storageEngine->isDurable()) {
                result->fsyncFiles = storageEngine->flushAllFiles(true);
            } else {
                // We only need to commit the journal if we're durable
                txn->recoveryUnit()->waitUntilDurable();
            }
            break;
        }
        case WriteConcernOptions::SyncMode::JOURNAL:
            if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::Mode::modeNone) {
                // Wait for ops to become durable then update replication system's
                // knowledge of this.
                OpTime appliedOpTime = replCoord->getMyLastAppliedOpTime();
                txn->recoveryUnit()->waitUntilDurable();
                replCoord->setMyLastDurableOpTimeForward(appliedOpTime);
            } else {
                txn->recoveryUnit()->waitUntilDurable();
            }
            break;
    }

    result->syncMillis = syncTimer.millis();

    // Now wait for replication

    if (replOpTime.isNull()) {
        // no write happened for this client yet
        return Status::OK();
    }

    // needed to avoid incrementing gleWtimeStats SERVER-9005
    if (writeConcernWithPopulatedSyncMode.wNumNodes <= 1 &&
        writeConcernWithPopulatedSyncMode.wMode.empty()) {
        // no desired replication check
        return Status::OK();
    }

    // Replica set stepdowns and gle mode changes are thrown as errors
    repl::ReplicationCoordinator::StatusAndDuration replStatus =
        replCoord->awaitReplication(txn, replOpTime, writeConcernWithPopulatedSyncMode);
    if (replStatus.status == ErrorCodes::WriteConcernFailed) {
        gleWtimeouts.increment();
        result->err = "timeout";
        result->wTimedOut = true;
    }

    // Add stats
    result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo(
        replOpTime,
        writeConcernWithPopulatedSyncMode.syncMode == WriteConcernOptions::SyncMode::JOURNAL);
    gleWtimeStats.recordMillis(durationCount<Milliseconds>(replStatus.duration));
    result->wTime = durationCount<Milliseconds>(replStatus.duration);

    return replStatus.status;
}
 Status AuthzManagerExternalStateMongod::getAllDatabaseNames(
             OperationContext* txn, std::vector<std::string>* dbnames) {
     StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
     storageEngine->listDatabases(dbnames);
     return Status::OK();
 }
Пример #10
0
Status waitForWriteConcern(OperationContext* txn,
                           const OpTime& replOpTime,
                           const WriteConcernOptions& writeConcern,
                           WriteConcernResult* result) {
    // We assume all options have been validated earlier, if not, programming error.
    // Passing localDB name is a hack to avoid more rigorous check that performed for non local DB.
    dassert(validateWriteConcern(txn, writeConcern, kLocalDB).isOK());

    // We should never be waiting for write concern while holding any sort of lock, because this may
    // lead to situations where the replication heartbeats are stalled.
    //
    // This check does not hold for writes done through dbeval because it runs with a global X lock.
    dassert(!txn->lockState()->isLocked() || txn->getClient()->isInDirectClient());

    // Next handle blocking on disk

    Timer syncTimer;
    auto replCoord = repl::getGlobalReplicationCoordinator();
    WriteConcernOptions writeConcernWithPopulatedSyncMode =
        replCoord->populateUnsetWriteConcernOptionsSyncMode(writeConcern);


    switch (writeConcernWithPopulatedSyncMode.syncMode) {
        case WriteConcernOptions::SyncMode::UNSET:
            severe() << "Attempting to wait on a WriteConcern with an unset sync option";
            fassertFailed(34410);
        case WriteConcernOptions::SyncMode::NONE:
            break;
        case WriteConcernOptions::SyncMode::FSYNC: {
            StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
            if (!storageEngine->isDurable()) {
                result->fsyncFiles = storageEngine->flushAllFiles(true);
            } else {
                // We only need to commit the journal if we're durable
                txn->recoveryUnit()->waitUntilDurable();
            }
            break;
        }
        case WriteConcernOptions::SyncMode::JOURNAL:
            if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::Mode::modeNone) {
                // Wait for ops to become durable then update replication system's
                // knowledge of this.
                OpTime appliedOpTime = replCoord->getMyLastAppliedOpTime();
                txn->recoveryUnit()->waitUntilDurable();
                replCoord->setMyLastDurableOpTimeForward(appliedOpTime);
            } else {
                txn->recoveryUnit()->waitUntilDurable();
            }
            break;
    }

    result->syncMillis = syncTimer.millis();

    // Now wait for replication

    if (replOpTime.isNull()) {
        // no write happened for this client yet
        return Status::OK();
    }

    // needed to avoid incrementing gleWtimeStats SERVER-9005
    if (writeConcernWithPopulatedSyncMode.wNumNodes <= 1 &&
        writeConcernWithPopulatedSyncMode.wMode.empty()) {
        // no desired replication check
        return Status::OK();
    }

    // Now we wait for replication
    // Note that replica set stepdowns and gle mode changes are thrown as errors
    repl::ReplicationCoordinator::StatusAndDuration replStatus =
        repl::getGlobalReplicationCoordinator()->awaitReplication(
            txn, replOpTime, writeConcernWithPopulatedSyncMode);
    if (replStatus.status == ErrorCodes::WriteConcernFailed) {
        gleWtimeouts.increment();
        result->err = "timeout";
        result->wTimedOut = true;
    }
    // Add stats
    result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo(
        replOpTime,
        writeConcernWithPopulatedSyncMode.syncMode == WriteConcernOptions::SyncMode::JOURNAL);
    gleWtimeStats.recordMillis(durationCount<Milliseconds>(replStatus.duration));
    result->wTime = durationCount<Milliseconds>(replStatus.duration);

    return replStatus.status;
}
Пример #11
0
    bool run(OperationContext* opCtx,
             const string& dbname,
             const BSONObj& cmdObj,
             BSONObjBuilder& result) final {
        CommandHelpers::handleMarkKillOnClientDisconnect(opCtx);
        IDLParserErrorContext ctx("listDatabases");
        auto cmd = ListDatabasesCommand::parse(ctx, cmdObj);
        auto* as = AuthorizationSession::get(opCtx->getClient());

        // {nameOnly: bool} - default false.
        const bool nameOnly = cmd.getNameOnly();

        // {authorizedDatabases: bool} - Dynamic default based on permissions.
        const bool authorizedDatabases = ([as](const boost::optional<bool>& authDB) {
            const bool mayListAllDatabases = as->isAuthorizedForActionsOnResource(
                ResourcePattern::forClusterResource(), ActionType::listDatabases);

            if (authDB) {
                uassert(ErrorCodes::Unauthorized,
                        "Insufficient permissions to list all databases",
                        authDB.get() || mayListAllDatabases);
                return authDB.get();
            }

            // By default, list all databases if we can, otherwise
            // only those we're allowed to find on.
            return !mayListAllDatabases;
        })(cmd.getAuthorizedDatabases());

        // {filter: matchExpression}.
        std::unique_ptr<MatchExpression> filter;
        if (auto filterObj = cmd.getFilter()) {
            // The collator is null because database metadata objects are compared using simple
            // binary comparison.
            const CollatorInterface* collator = nullptr;
            boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContext(opCtx, collator));
            auto matcher =
                uassertStatusOK(MatchExpressionParser::parse(filterObj.get(), std::move(expCtx)));
            filter = std::move(matcher);
        }

        vector<string> dbNames;
        StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine();
        {
            Lock::GlobalLock lk(opCtx, MODE_IS);
            CurOpFailpointHelpers::waitWhileFailPointEnabled(
                &hangBeforeListDatabases, opCtx, "hangBeforeListDatabases", []() {});
            dbNames = storageEngine->listDatabases();
        }

        vector<BSONObj> dbInfos;

        const bool filterNameOnly = filter &&
            filter->getCategory() == MatchExpression::MatchCategory::kLeaf &&
            filter->path() == kNameField;
        intmax_t totalSize = 0;
        for (const auto& dbname : dbNames) {
            if (authorizedDatabases && !as->isAuthorizedForAnyActionOnAnyResourceInDB(dbname)) {
                // We don't have listDatabases on the cluser or find on this database.
                continue;
            }

            BSONObjBuilder b;
            b.append("name", dbname);

            int64_t size = 0;
            if (!nameOnly) {
                // Filtering on name only should not require taking locks on filtered-out names.
                if (filterNameOnly && !filter->matchesBSON(b.asTempObj()))
                    continue;

                AutoGetDb autoDb(opCtx, dbname, MODE_IS);
                Database* const db = autoDb.getDb();
                if (!db)
                    continue;

                writeConflictRetry(opCtx, "sizeOnDisk", dbname, [&] {
                    size = storageEngine->sizeOnDiskForDb(opCtx, dbname);
                });
                b.append("sizeOnDisk", static_cast<double>(size));

                b.appendBool(
                    "empty",
                    CollectionCatalog::get(opCtx).getAllCollectionUUIDsFromDb(dbname).empty());
            }
            BSONObj curDbObj = b.obj();

            if (!filter || filter->matchesBSON(curDbObj)) {
                totalSize += size;
                dbInfos.push_back(curDbObj);
            }
        }

        result.append("databases", dbInfos);
        if (!nameOnly) {
            result.append("totalSize", double(totalSize));
        }
        return true;
    }
Пример #12
0
    // ran at startup.
    static void repairDatabasesAndCheckVersion(bool shouldClearNonLocalTmpCollections) {
        LOG(1) << "enter repairDatabases (to check pdfile version #)" << endl;

        OperationContextImpl txn;
        Lock::GlobalWrite lk(txn.lockState());

        vector< string > dbNames;

        StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
        storageEngine->listDatabases( &dbNames );

        for ( vector< string >::iterator i = dbNames.begin(); i != dbNames.end(); ++i ) {
            string dbName = *i;
            LOG(1) << "\t" << dbName << endl;

            Client::Context ctx(&txn,  dbName );

            if (repl::getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
                // we only care about the _id index if we are in a replset
                checkForIdIndexes(&txn, ctx.db());
            }

            if (shouldClearNonLocalTmpCollections || dbName == "local")
                ctx.db()->clearTmpCollections(&txn);

            if ( storageGlobalParams.repair ) {
                fassert(18506, storageEngine->repairDatabase(&txn, dbName));
            }
            else if (!ctx.db()->getDatabaseCatalogEntry()->currentFilesCompatible(&txn)) {
                log() << "****";
                log() << "cannot do this upgrade without an upgrade in the middle";
                log() << "please do a --repair with 2.6 and then start this version";
                dbexit( EXIT_NEED_UPGRADE );
                invariant( false );
                return;
            }
            else {
                // major versions match, check indexes

                const string systemIndexes = ctx.db()->name() + ".system.indexes";
                Collection* coll = ctx.db()->getCollection( &txn, systemIndexes );
                auto_ptr<PlanExecutor> exec(
                    InternalPlanner::collectionScan(&txn, systemIndexes,coll));
                BSONObj index;
                PlanExecutor::ExecState state;
                while (PlanExecutor::ADVANCED == (state = exec->getNext(&index, NULL))) {
                    const BSONObj key = index.getObjectField("key");
                    const string plugin = IndexNames::findPluginName(key);

                    if (ctx.db()->getDatabaseCatalogEntry()->isOlderThan24(&txn)) {
                        if (IndexNames::existedBefore24(plugin))
                            continue;

                        log() << "Index " << index << " claims to be of type '" << plugin << "', "
                              << "which is either invalid or did not exist before v2.4. "
                              << "See the upgrade section: "
                              << "http://dochub.mongodb.org/core/upgrade-2.4"
                              << startupWarningsLog;
                    }

                    const Status keyStatus = validateKeyPattern(key);
                    if (!keyStatus.isOK()) {
                        log() << "Problem with index " << index << ": " << keyStatus.reason()
                              << " This index can still be used however it cannot be rebuilt."
                              << " For more info see"
                              << " http://dochub.mongodb.org/core/index-validation"
                              << startupWarningsLog;
                    }
                }

                if (PlanExecutor::IS_EOF != state) {
                    warning() << "Internal error while reading collection " << systemIndexes;
                }

                dbHolder().close( &txn, dbName );
            }
        }

        LOG(1) << "done repairDatabases" << endl;
    }
Пример #13
0
void FSyncLockThread::run() {
    ThreadClient tc("fsyncLockWorker", getGlobalServiceContext());
    stdx::lock_guard<SimpleMutex> lkf(filesLockedFsync);
    stdx::unique_lock<stdx::mutex> lk(fsyncCmd.lockStateMutex);

    invariant(fsyncCmd.getLockCount_inLock() == 1);

    try {
        const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
        OperationContext& opCtx = *opCtxPtr;
        Lock::GlobalRead global(&opCtx);  // Block any writes in order to flush the files.

        StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine();

        try {
            storageEngine->flushAllFiles(&opCtx, true);
        } catch (const std::exception& e) {
            error() << "error doing flushAll: " << e.what();
            fsyncCmd.threadStatus = Status(ErrorCodes::CommandFailed, e.what());
            fsyncCmd.acquireFsyncLockSyncCV.notify_one();
            return;
        }

        bool successfulFsyncLock = false;
        auto backupCursorHooks = BackupCursorHooks::get(opCtx.getServiceContext());
        try {
            writeConflictRetry(&opCtx,
                               "beginBackup",
                               "global",
                               [&opCtx, backupCursorHooks, &successfulFsyncLock, storageEngine] {
                                   if (backupCursorHooks->enabled()) {
                                       backupCursorHooks->fsyncLock(&opCtx);
                                       successfulFsyncLock = true;
                                   } else {
                                       // Have the uassert be caught by the DBException
                                       // block. Maintain "allowFsyncFailure" compatibility in
                                       // community.
                                       uassertStatusOK(storageEngine->beginBackup(&opCtx));
                                       successfulFsyncLock = true;
                                   }
                               });
        } catch (const DBException& e) {
            if (_allowFsyncFailure) {
                warning() << "Locking despite storage engine being unable to begin backup : "
                          << e.toString();
                opCtx.recoveryUnit()->waitUntilDurable();
            } else {
                error() << "storage engine unable to begin backup : " << e.toString();
                fsyncCmd.threadStatus = e.toStatus();
                fsyncCmd.acquireFsyncLockSyncCV.notify_one();
                return;
            }
        }

        fsyncCmd.threadStarted = true;
        fsyncCmd.acquireFsyncLockSyncCV.notify_one();

        while (fsyncCmd.getLockCount_inLock() > 0) {
            fsyncCmd.releaseFsyncLockSyncCV.wait(lk);
        }

        if (successfulFsyncLock) {
            if (backupCursorHooks->enabled()) {
                backupCursorHooks->fsyncUnlock(&opCtx);
            } else {
                storageEngine->endBackup(&opCtx);
            }
        }

    } catch (const std::exception& e) {
        severe() << "FSyncLockThread exception: " << e.what();
        fassertFailed(40350);
    }
}
Пример #14
0
    virtual bool errmsgRun(OperationContext* opCtx,
                           const string& dbname,
                           const BSONObj& cmdObj,
                           string& errmsg,
                           BSONObjBuilder& result) {
        if (opCtx->lockState()->isLocked()) {
            errmsg = "fsync: Cannot execute fsync command from contexts that hold a data lock";
            return false;
        }

        const bool sync =
            !cmdObj["async"].trueValue();  // async means do an fsync, but return immediately
        const bool lock = cmdObj["lock"].trueValue();
        log() << "CMD fsync: sync:" << sync << " lock:" << lock;

        // fsync + lock is sometimes used to block writes out of the system and does not care if
        // the `BackupCursorService::fsyncLock` call succeeds.
        const bool allowFsyncFailure =
            getTestCommandsEnabled() && cmdObj["allowFsyncFailure"].trueValue();

        if (!lock) {
            // Take a global IS lock to ensure the storage engine is not shutdown
            Lock::GlobalLock global(opCtx, MODE_IS);
            StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine();
            result.append("numFiles", storageEngine->flushAllFiles(opCtx, sync));
            return true;
        }

        Lock::ExclusiveLock lk(opCtx->lockState(), commandMutex);
        if (!sync) {
            errmsg = "fsync: sync option must be true when using lock";
            return false;
        }

        const auto lockCountAtStart = getLockCount();
        invariant(lockCountAtStart > 0 || !_lockThread);

        acquireLock();

        if (lockCountAtStart == 0) {

            Status status = Status::OK();
            {
                stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
                threadStatus = Status::OK();
                threadStarted = false;
                _lockThread = stdx::make_unique<FSyncLockThread>(allowFsyncFailure);
                _lockThread->go();

                while (!threadStarted && threadStatus.isOK()) {
                    acquireFsyncLockSyncCV.wait(lk);
                }

                // 'threadStatus' must be copied while 'lockStateMutex' is held.
                status = threadStatus;
            }

            if (!status.isOK()) {
                releaseLock();
                warning() << "fsyncLock failed. Lock count reset to 0. Status: " << status;
                uassertStatusOK(status);
            }
        }

        log() << "mongod is locked and no writes are allowed. db.fsyncUnlock() to unlock";
        log() << "Lock count is " << getLockCount();
        log() << "    For more info see " << FSyncCommand::url();
        result.append("info", "now locked against writes, use db.fsyncUnlock() to unlock");
        result.append("lockCount", getLockCount());
        result.append("seeAlso", FSyncCommand::url());

        return true;
    }