// static
void QueryYield::yieldAllLocks(OperationContext* opCtx,
                               stdx::function<void()> whileYieldingFn,
                               const NamespaceString& planExecNS) {
    // Things have to happen here in a specific order:
    //   * Release lock mgr locks
    //   * Go to sleep
    //   * Call the whileYieldingFn
    //   * Reacquire lock mgr locks

    Locker* locker = opCtx->lockState();

    Locker::LockSnapshot snapshot;

    // Nothing was unlocked, just return, yielding is pointless.
    if (!locker->saveLockStateAndUnlock(&snapshot)) {
        return;
    }

    // Top-level locks are freed, release any potential low-level (storage engine-specific
    // locks). If we are yielding, we are at a safe place to do so.
    opCtx->recoveryUnit()->abandonSnapshot();

    // Track the number of yields in CurOp.
    CurOp::get(opCtx)->yielded();

    MONGO_FAIL_POINT_BLOCK(setYieldAllLocksHang, config) {
        StringData ns{config.getData().getStringField("namespace")};
        if (ns.empty() || ns == planExecNS.ns()) {
            MONGO_FAIL_POINT_PAUSE_WHILE_SET(setYieldAllLocksHang);
        }
    }
void IndexBuildInterceptor::_tryYield(OperationContext* opCtx) {
    // Never yield while holding locks that prevent writes to the collection: only yield while
    // holding intent locks. This check considers all locks in the hierarchy that would cover this
    // mode.
    const NamespaceString nss(_indexCatalogEntry->ns());
    if (opCtx->lockState()->isCollectionLockedForMode(nss, MODE_S)) {
        return;
    }
    DEV {
        invariant(!opCtx->lockState()->isCollectionLockedForMode(nss, MODE_X));
        invariant(!opCtx->lockState()->isDbLockedForMode(nss.db(), MODE_X));
    }

    // Releasing locks means a new snapshot should be acquired when restored.
    opCtx->recoveryUnit()->abandonSnapshot();

    auto locker = opCtx->lockState();
    Locker::LockSnapshot snapshot;
    invariant(locker->saveLockStateAndUnlock(&snapshot));


    // Track the number of yields in CurOp.
    CurOp::get(opCtx)->yielded();

    MONGO_FAIL_POINT_BLOCK(hangDuringIndexBuildDrainYield, config) {
        StringData ns{config.getData().getStringField("namespace")};
        if (ns == _indexCatalogEntry->ns()) {
            log() << "Hanging index build during drain yield";
            MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangDuringIndexBuildDrainYield);
        }
    }
Esempio n. 3
0
Status PlanYieldPolicy::yield(stdx::function<void()> beforeYieldingFn,
                              stdx::function<void()> whileYieldingFn) {
    invariant(_planYielding);
    invariant(canAutoYield());

    // After we finish yielding (or in any early return), call resetTimer() to prevent yielding
    // again right away. We delay the resetTimer() call so that the clock doesn't start ticking
    // until after we return from the yield.
    ON_BLOCK_EXIT([this]() { resetTimer(); });

    _forceYield = false;

    OperationContext* opCtx = _planYielding->getOpCtx();
    invariant(opCtx);
    invariant(!opCtx->lockState()->inAWriteUnitOfWork());

    // Can't use writeConflictRetry since we need to call saveState before reseting the transaction.
    for (int attempt = 1; true; attempt++) {
        try {
            // All YIELD_AUTO plans will get here eventually when the elapsed tracker triggers
            // that it's time to yield. Whether or not we will actually yield, we need to check
            // if this operation has been interrupted.
            if (_policy == PlanExecutor::YIELD_AUTO) {
                MONGO_FAIL_POINT_PAUSE_WHILE_SET(setCheckForInterruptHang);

                auto interruptStatus = opCtx->checkForInterruptNoAssert();
                if (!interruptStatus.isOK()) {
                    return interruptStatus;
                }
            }

            try {
                _planYielding->saveState();
            } catch (const WriteConflictException&) {
                invariant(!"WriteConflictException not allowed in saveState");
            }

            if (_policy == PlanExecutor::WRITE_CONFLICT_RETRY_ONLY) {
                // Just reset the snapshot. Leave all LockManager locks alone.
                opCtx->recoveryUnit()->abandonSnapshot();
            } else {
                // Release and reacquire locks.
                if (beforeYieldingFn)
                    beforeYieldingFn();
                QueryYield::yieldAllLocks(opCtx, whileYieldingFn, _planYielding->nss());
            }

            return _planYielding->restoreStateWithoutRetrying();
        } catch (const WriteConflictException&) {
            CurOp::get(opCtx)->debug().writeConflicts++;
            WriteConflictException::logAndBackoff(
                attempt, "plan execution restoreState", _planYielding->nss().ns());
            // retry
        }
    }
}
Esempio n. 4
0
Status PlanYieldPolicy::yieldOrInterrupt(stdx::function<void()> beforeYieldingFn,
                                         stdx::function<void()> whileYieldingFn) {
    if (_policy == PlanExecutor::INTERRUPT_ONLY) {
        ON_BLOCK_EXIT([this]() { resetTimer(); });
        OperationContext* opCtx = _planYielding->getOpCtx();
        invariant(opCtx);
        MONGO_FAIL_POINT_PAUSE_WHILE_SET(setCheckForInterruptHang);
        return opCtx->checkForInterruptNoAssert();
    }

    return yield(beforeYieldingFn, whileYieldingFn);
}
Esempio n. 5
0
Status dropCollection(OperationContext* opCtx,
                      const NamespaceString& collectionName,
                      BSONObjBuilder& result,
                      const repl::OpTime& dropOpTime,
                      DropCollectionSystemCollectionMode systemCollectionMode) {
    if (!serverGlobalParams.quiet.load()) {
        log() << "CMD: drop " << collectionName;
    }

    return writeConflictRetry(opCtx, "drop", collectionName.ns(), [&] {
        AutoGetDb autoDb(opCtx, collectionName.db(), MODE_X);
        Database* const db = autoDb.getDb();
        Collection* coll = db ? db->getCollection(opCtx, collectionName) : nullptr;
        auto view =
            db && !coll ? db->getViewCatalog()->lookup(opCtx, collectionName.ns()) : nullptr;

        if (MONGO_FAIL_POINT(hangDuringDropCollection)) {
            log() << "hangDuringDropCollection fail point enabled. Blocking until fail point is "
                     "disabled.";
            MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangDuringDropCollection);
        }

        if (!db || (!coll && !view)) {
            return Status(ErrorCodes::NamespaceNotFound, "ns not found");
        }

        const bool shardVersionCheck = true;
        OldClientContext context(opCtx, collectionName.ns(), shardVersionCheck);

        bool userInitiatedWritesAndNotPrimary = opCtx->writesAreReplicated() &&
            !repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesFor(opCtx, collectionName);

        if (userInitiatedWritesAndNotPrimary) {
            return Status(ErrorCodes::NotMaster,
                          str::stream() << "Not primary while dropping collection "
                                        << collectionName);
        }

        WriteUnitOfWork wunit(opCtx);
        if (!result.hasField("ns")) {
            result.append("ns", collectionName.ns());
        }

        if (coll) {
            invariant(!view);
            int numIndexes = coll->getIndexCatalog()->numIndexesTotal(opCtx);

            BackgroundOperation::assertNoBgOpInProgForNs(collectionName.ns());

            Status s = systemCollectionMode ==
                    DropCollectionSystemCollectionMode::kDisallowSystemCollectionDrops
                ? db->dropCollection(opCtx, collectionName.ns(), dropOpTime)
                : db->dropCollectionEvenIfSystem(opCtx, collectionName, dropOpTime);

            if (!s.isOK()) {
                return s;
            }

            result.append("nIndexesWas", numIndexes);
        } else {
            invariant(view);
            Status status = db->dropView(opCtx, collectionName.ns());
            if (!status.isOK()) {
                return status;
            }
        }
        wunit.commit();

        return Status::OK();
    });
}
Status MigrationSourceManager::commitDonateChunk(OperationContext* txn) {
    invariant(!txn->lockState()->isLocked());
    invariant(_state == kCriticalSection);
    auto scopedGuard = MakeGuard([&] { cleanupOnError(txn); });

    // Tell the recipient shard to fetch the latest changes
    Status commitCloneStatus = _cloneDriver->commitClone(txn);

    if (MONGO_FAIL_POINT(failMigrationCommit) && commitCloneStatus.isOK()) {
        commitCloneStatus = {ErrorCodes::InternalError,
                             "Failing _recvChunkCommit due to failpoint."};
    }

    if (!commitCloneStatus.isOK()) {
        return {commitCloneStatus.code(),
                str::stream() << "commit clone failed due to " << commitCloneStatus.toString()};
    }

    // Generate the next collection version.
    ChunkVersion uncommittedCollVersion = _committedMetadata->getCollVersion();
    uncommittedCollVersion.incMajor();

    // applyOps preparation for reflecting the uncommitted metadata on the config server

    // Preconditions
    BSONArrayBuilder preCond;
    {
        BSONObjBuilder b;
        b.append("ns", ChunkType::ConfigNS);
        b.append("q",
                 BSON("query" << BSON(ChunkType::ns(_args.getNss().ns())) << "orderby"
                              << BSON(ChunkType::DEPRECATED_lastmod() << -1)));
        {
            BSONObjBuilder bb(b.subobjStart("res"));

            // TODO: For backwards compatibility, we can't yet require an epoch here
            bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(),
                               _committedMetadata->getCollVersion().toLong());
            bb.done();
        }

        preCond.append(b.obj());
    }

    // Update for the chunk which is being donated
    BSONArrayBuilder updates;
    {
        BSONObjBuilder op;
        op.append("op", "u");
        op.appendBool("b", false);  // No upserting
        op.append("ns", ChunkType::ConfigNS);

        BSONObjBuilder n(op.subobjStart("o"));
        n.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), _args.getMinKey()));
        uncommittedCollVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod());
        n.append(ChunkType::ns(), _args.getNss().ns());
        n.append(ChunkType::min(), _args.getMinKey());
        n.append(ChunkType::max(), _args.getMaxKey());
        n.append(ChunkType::shard(), _args.getToShardId());
        n.done();

        BSONObjBuilder q(op.subobjStart("o2"));
        q.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), _args.getMinKey()));
        q.done();

        updates.append(op.obj());
    }

    // Update for the chunk being moved

    // Version at which the next highest lastmod will be set. If the chunk being moved is the last
    // in the shard, nextVersion is that chunk's lastmod otherwise the highest version is from the
    // chunk being bumped on the FROM-shard.
    ChunkVersion nextVersion = uncommittedCollVersion;

    // If we have chunks left on the FROM shard, update the version of one of them as well. We can
    // figure that out by grabbing the metadata as it has been changed.
    if (_committedMetadata->getNumChunks() > 1) {
        ChunkType bumpChunk;
        invariant(_committedMetadata->getDifferentChunk(_args.getMinKey(), &bumpChunk));

        BSONObj bumpMin = bumpChunk.getMin();
        BSONObj bumpMax = bumpChunk.getMax();
        nextVersion.incMinor();

        dassert(bumpMin.woCompare(_args.getMinKey()) != 0);

        BSONObjBuilder op;
        op.append("op", "u");
        op.appendBool("b", false);
        op.append("ns", ChunkType::ConfigNS);

        BSONObjBuilder n(op.subobjStart("o"));
        n.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), bumpMin));
        nextVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod());
        n.append(ChunkType::ns(), _args.getNss().ns());
        n.append(ChunkType::min(), bumpMin);
        n.append(ChunkType::max(), bumpMax);
        n.append(ChunkType::shard(), _args.getFromShardId());
        n.done();

        BSONObjBuilder q(op.subobjStart("o2"));
        q.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), bumpMin));
        q.done();

        updates.append(op.obj());

        log() << "moveChunk updating self version to: " << nextVersion << " through " << bumpMin
              << " -> " << bumpMax << " for collection '" << _args.getNss().ns() << "'";
    } else {
        log() << "moveChunk moved last chunk out for collection '" << _args.getNss().ns() << "'";
    }

    MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeCommitMigration);

    Status applyOpsStatus = grid.catalogClient(txn)->applyChunkOpsDeprecated(
        txn, updates.arr(), preCond.arr(), _args.getNss().ns(), nextVersion);

    if (MONGO_FAIL_POINT(failCommitMigrationCommand)) {
        applyOpsStatus = Status(ErrorCodes::InternalError,
                                "Failpoint 'failCommitMigrationCommand' generated error");
    }

    if (applyOpsStatus.isOK()) {
        // Now that applyOps succeeded and the new collection version is committed, update the
        // collection metadata to the new collection version and forget the migrated chunk.
        ScopedTransaction scopedXact(txn, MODE_IX);
        AutoGetCollection autoColl(txn, _args.getNss(), MODE_IX, MODE_X);

        ChunkType migratingChunkToForget;
        migratingChunkToForget.setMin(_args.getMinKey());
        migratingChunkToForget.setMax(_args.getMaxKey());
        _committedMetadata =
            _committedMetadata->cloneMigrate(migratingChunkToForget, uncommittedCollVersion);
        auto css = CollectionShardingState::get(txn, _args.getNss().ns());
        css->setMetadata(_committedMetadata);
    } else {
        // This could be an unrelated error (e.g. network error). Check whether the metadata update
        // succeeded by refreshing the collection metadata from the config server and checking that
        // the original chunks no longer exist.

        warning() << "Migration metadata commit may have failed: refreshing metadata to check"
                  << causedBy(applyOpsStatus);

        // Need to get the latest optime in case the refresh request goes to a secondary --
        // otherwise the read won't wait for the write that applyChunkOpsDeprecated may have done.
        Status status = grid.catalogClient(txn)->logChange(
            txn,
            "moveChunk.validating",
            _args.getNss().ns(),
            BSON("min" << _args.getMinKey() << "max" << _args.getMaxKey() << "from"
                       << _args.getFromShardId()
                       << "to"
                       << _args.getToShardId()));
        if (!status.isOK()) {
            fassertStatusOK(
                40137,
                {status.code(),
                 str::stream() << "applyOps failed to commit chunk [" << _args.getMinKey() << ","
                               << _args.getMaxKey()
                               << ") due to "
                               << causedBy(applyOpsStatus)
                               << ", and updating the optime with a write before refreshing the "
                               << "metadata also failed: "
                               << causedBy(status)});
        }

        ShardingState* const shardingState = ShardingState::get(txn);
        ChunkVersion shardVersion;
        Status refreshStatus =
            shardingState->refreshMetadataNow(txn, _args.getNss().ns(), &shardVersion);
        fassertStatusOK(34431,
                        {refreshStatus.code(),
                         str::stream() << "applyOps failed to commit chunk [" << _args.getMinKey()
                                       << ","
                                       << _args.getMaxKey()
                                       << ") due to "
                                       << causedBy(applyOpsStatus)
                                       << ", and refreshing collection metadata failed: "
                                       << causedBy(refreshStatus)});

        {
            ScopedTransaction scopedXact(txn, MODE_IS);
            AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS);

            auto css = CollectionShardingState::get(txn, _args.getNss());
            std::shared_ptr<CollectionMetadata> refreshedMetadata = css->getMetadata();

            if (refreshedMetadata->keyBelongsToMe(_args.getMinKey())) {
                invariant(refreshedMetadata->getCollVersion() ==
                          _committedMetadata->getCollVersion());

                // After refresh, the collection metadata indicates that the donor shard still owns
                // the chunk, so no migration changes were written to the config server metadata.
                return {applyOpsStatus.code(),
                        str::stream() << "Migration was not committed, applyOps failed: "
                                      << causedBy(applyOpsStatus)};
            }

            ChunkVersion refreshedCollectionVersion = refreshedMetadata->getCollVersion();
            if (!refreshedCollectionVersion.equals(nextVersion)) {
                // The refreshed collection metadata's collection version does not match the control
                // chunk's updated collection version, which should now be the highest. The control
                // chunk was not committed, but the migrated chunk was. This state is not
                // recoverable.
                fassertStatusOK(40138,
                                {applyOpsStatus.code(),
                                 str::stream() << "Migration was partially committed, state is "
                                               << "unrecoverable. applyOps error: "
                                               << causedBy(applyOpsStatus)});
            }
        }
    }

    MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection);

    scopedGuard.Dismiss();
    _cleanup(txn);

    grid.catalogClient(txn)->logChange(txn,
                                       "moveChunk.commit",
                                       _args.getNss().ns(),
                                       BSON("min" << _args.getMinKey() << "max" << _args.getMaxKey()
                                                  << "from"
                                                  << _args.getFromShardId()
                                                  << "to"
                                                  << _args.getToShardId()));

    return Status::OK();
}