Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* txn) {
    invariant(!txn->lockState()->isLocked());
    auto scopedGuard = MakeGuard([&] { cancelClone(txn); });

    // Resolve the donor and recipient shards and their connection string

    {
        auto donorShardStatus = grid.shardRegistry()->getShard(txn, _args.getFromShardId());
        if (!donorShardStatus.isOK()) {
            return donorShardStatus.getStatus();
        }
        _donorCS = donorShardStatus.getValue()->getConnString();
    }

    {
        auto recipientShardStatus = grid.shardRegistry()->getShard(txn, _args.getToShardId());
        if (!recipientShardStatus.isOK()) {
            return recipientShardStatus.getStatus();
        }
        auto recipientShard = recipientShardStatus.getValue();

        auto shardHostStatus = recipientShard->getTargeter()->findHost(
            ReadPreferenceSetting{ReadPreference::PrimaryOnly});
        if (!shardHostStatus.isOK()) {
            return shardHostStatus.getStatus();
        }

        _recipientHost = std::move(shardHostStatus.getValue());
    }

    // Prepare the currently available documents
    Status status = _storeCurrentLocs(txn);
    if (!status.isOK()) {
        return status;
    }

    // Tell the recipient shard to start cloning
    BSONObjBuilder cmdBuilder;
    StartChunkCloneRequest::appendAsCommand(&cmdBuilder,
                                            _args.getNss(),
                                            _sessionId,
                                            _args.getConfigServerCS(),
                                            _donorCS,
                                            _args.getFromShardId(),
                                            _args.getToShardId(),
                                            _args.getMinKey(),
                                            _args.getMaxKey(),
                                            _shardKeyPattern.toBSON(),
                                            _args.getSecondaryThrottle());

    auto responseStatus = _callRecipient(cmdBuilder.obj());
    if (!responseStatus.isOK()) {
        return responseStatus.getStatus();
    }

    scopedGuard.Dismiss();
    return Status::OK();
}
void MigrationChunkClonerSourceLegacy::cancelClone(OperationContext* txn) {
    invariant(!txn->lockState()->isLocked());

    {
        stdx::lock_guard<stdx::mutex> sl(_mutex);
        if (_cloneCompleted)
            return;
    }

    _callRecipient(BSON(kRecvChunkAbort << _args.getNss().ns()));
    _cleanup(txn);
}
Status MigrationChunkClonerSourceLegacy::commitClone(OperationContext* txn) {
    invariant(!txn->lockState()->isLocked());

    {
        stdx::lock_guard<stdx::mutex> sl(_mutex);
        invariant(!_cloneCompleted);
    }

    auto responseStatus = _callRecipient(createRecvChunkCommitRequest(_args.getNss(), _sessionId));
    if (responseStatus.isOK()) {
        _cleanup(txn);
        return Status::OK();
    }

    cancelClone(txn);
    return responseStatus.getStatus();
}
Status MigrationChunkClonerSourceLegacy::awaitUntilCriticalSectionIsAppropriate(
    OperationContext* txn, Milliseconds maxTimeToWait) {
    invariant(!txn->lockState()->isLocked());
    auto scopedGuard = MakeGuard([&] { cancelClone(txn); });

    const auto startTime = Date_t::now();

    int iteration = 0;
    while ((Date_t::now() - startTime) < maxTimeToWait) {
        // Exponential sleep backoff, up to 1024ms. Don't sleep much on the first few iterations,
        // since we want empty chunk migrations to be fast.
        sleepmillis(1 << std::min(iteration, 10));
        iteration++;

        auto responseStatus = _callRecipient(BSON(kRecvChunkStatus << _args.getNss().ns()));
        if (!responseStatus.isOK()) {
            return {responseStatus.getStatus().code(),
                    str::stream()
                        << "Failed to contact recipient shard to monitor data transfer due to "
                        << responseStatus.getStatus().toString()};
        }

        BSONObj res = std::move(responseStatus.getValue());

        log() << "moveChunk data transfer progress: " << res << " my mem used: " << _memoryUsed;

        if (res["state"].String() == "steady") {
            // Ensure all cloned docs have actually been transferred
            const std::size_t locsRemaining = _cloneLocs.size();
            if (locsRemaining != 0) {
                return {
                    ErrorCodes::OperationIncomplete,
                    str::stream()
                        << "cannot enter critical section before all data is cloned, "
                        << locsRemaining
                        << " locs were not transferred but to-shard thinks they are all cloned"};
            }

            scopedGuard.Dismiss();
            return Status::OK();
        }

        if (res["state"].String() == "fail") {
            return {ErrorCodes::OperationFailed, "Data transfer error"};
        }

        if (res["ns"].str() != _args.getNss().ns() || res["from"].str() != _donorCS.toString() ||
            !res["min"].isABSONObj() || res["min"].Obj().woCompare(_args.getMinKey()) != 0 ||
            !res["max"].isABSONObj() || res["max"].Obj().woCompare(_args.getMaxKey()) != 0) {
            // This can happen when the destination aborted the migration and received another
            // recvChunk before this thread sees the transition to the abort state. This is
            // currently possible only if multiple migrations are happening at once. This is an
            // unfortunate consequence of the shards not being able to keep track of multiple
            // incoming and outgoing migrations.
            return {ErrorCodes::OperationIncomplete,
                    "Destination shard aborted migration because a new one is running"};
        }

        if (_memoryUsed > 500 * 1024 * 1024) {
            // This is too much memory for us to use so we're going to abort the migration
            return {ErrorCodes::ExceededMemoryLimit,
                    "Aborting migration because of high memory usage"};
        }

        Status interruptStatus = txn->checkForInterruptNoAssert();
        if (!interruptStatus.isOK()) {
            return interruptStatus;
        }
    }

    scopedGuard.Dismiss();
    return {ErrorCodes::ExceededTimeLimit, "Timed out waiting for the cloner to catch up"};
}