Ejemplo n.º 1
0
Status AuthorizationSession::_checkAuthForPrivilegeHelper(const Privilege& privilege) {
    AuthorizationManager& authMan = getAuthorizationManager();
    Privilege modifiedPrivilege = _modifyPrivilegeForSpecialCases(privilege);

    // Need to check not just the resource of the privilege, but also just the database
    // component and the "*" resource.
    std::string resourceSearchList[3];
    resourceSearchList[0] = AuthorizationManager::WILDCARD_RESOURCE_NAME;
    resourceSearchList[1] = nsToDatabase(modifiedPrivilege.getResource());
    resourceSearchList[2] = modifiedPrivilege.getResource();


    ActionSet unmetRequirements = modifiedPrivilege.getActions();
    UserSet::iterator it = _authenticatedUsers.begin();
    while (it != _authenticatedUsers.end()) {
        User* user = *it;

        if (!user->isValid()) {
            // Make a good faith effort to acquire an up-to-date user object, since the one
            // we've cached is marked "out-of-date."
            UserName name = user->getName();
            User* updatedUser;

            Status status = authMan.acquireUser(name, &updatedUser);
            switch (status.code()) {
            case ErrorCodes::OK: {
                // Success! Replace the old User object with the updated one.
                fassert(17067, _authenticatedUsers.replaceAt(it, updatedUser) == user);
                authMan.releaseUser(user);
                user = updatedUser;
                LOG(1) << "Updated session cache of user information for " << name;
                break;
            }
            case ErrorCodes::UserNotFound: {
                // User does not exist anymore; remove it from _authenticatedUsers.
                fassert(17068, _authenticatedUsers.removeAt(it) == user);
                authMan.releaseUser(user);
                LOG(1) << "Removed deleted user " << name <<
                       " from session cache of user information.";
                continue;  // No need to advance "it" in this case.
            }
            default:
                // Unrecognized error; assume that it's transient, and continue working with the
                // out-of-date privilege data.
                warning() << "Could not fetch updated user privilege information for " <<
                          name << "; continuing to use old information.  Reason is " << status;
                break;
            }
        }

        for (int i = 0; i < static_cast<int>(boost::size(resourceSearchList)); ++i) {
            ActionSet userActions = user->getActionsForResource(resourceSearchList[i]);
            unmetRequirements.removeAllActionsFromSet(userActions);

            if (unmetRequirements.empty())
                return Status::OK();
        }
        ++it;
    }

    return Status(ErrorCodes::Unauthorized, "unauthorized");
}
Ejemplo n.º 2
0
        bool run( OperationContext* txn,
                  string const &db,
                  BSONObj &cmdObj,
                  int,
                  string &errmsg,
                  BSONObjBuilder &result,
                  bool ) {

            string ns;
            if ( !FieldParser::extract( cmdObj, nsField, &ns, &errmsg ) ) {
                return false;
            }

            if ( ns == "" ) {
                errmsg = "no collection name specified";
                return false;
            }

            BSONObj startingFromKey;
            if ( !FieldParser::extract( cmdObj,
                                        startingFromKeyField,
                                        &startingFromKey,
                                        &errmsg ) ) {
                return false;
            }

            WriteConcernOptions writeConcern;
            Status status = writeConcern.parseSecondaryThrottle(cmdObj, NULL);

            if (!status.isOK()){
                if (status.code() != ErrorCodes::WriteConcernNotDefined) {
                    return appendCommandStatus(result, status);
                }

                writeConcern = DefaultWriteConcern;
            }
            else {
                repl::ReplicationCoordinator* replCoordinator =
                        repl::getGlobalReplicationCoordinator();
                Status status = replCoordinator->checkIfWriteConcernCanBeSatisfied(writeConcern);
                if (!status.isOK()) {
                    return appendCommandStatus(result, status);
                }
            }

            if (writeConcern.shouldWaitForOtherNodes() &&
                    writeConcern.wTimeout == WriteConcernOptions::kNoTimeout) {
                // Don't allow no timeout.
                writeConcern.wTimeout = kDefaultWTimeoutMs;
            }

            if (!shardingState.enabled()) {
                errmsg = str::stream() << "server is not part of a sharded cluster or "
                                       << "the sharding metadata is not yet initialized.";
                return false;
            }

            ChunkVersion shardVersion;
            status = shardingState.refreshMetadataNow( ns, &shardVersion );
            if ( !status.isOK() ) {
                if ( status.code() == ErrorCodes::RemoteChangeDetected ) {
                    warning() << "Shard version in transition detected while refreshing "
                              << "metadata for " << ns << " at version " << shardVersion << endl;
                }
                else {
                    errmsg = str::stream() << "failed to refresh shard metadata: "
                                           << status.reason();
                    return false;
                }
            }

            BSONObj stoppedAtKey;
            CleanupResult cleanupResult = cleanupOrphanedData( txn,
                                                               NamespaceString( ns ),
                                                               startingFromKey,
                                                               writeConcern,
                                                               &stoppedAtKey,
                                                               &errmsg );

            if ( cleanupResult == CleanupResult_Error ) {
                return false;
            }

            if ( cleanupResult == CleanupResult_Continue ) {
                result.append( stoppedAtKeyField(), stoppedAtKey );
            }
            else {
                dassert( cleanupResult == CleanupResult_Done );
            }

            return true;
        }
Ejemplo n.º 3
0
void Strategy::queryOp(OperationContext* txn, Request& r) {
    verify(!NamespaceString(r.getns()).isCommand());

    Timer queryTimer;

    globalOpCounters.gotQuery();

    QueryMessage q(r.d());

    NamespaceString ns(q.ns);
    ClientBasic* client = txn->getClient();
    AuthorizationSession* authSession = AuthorizationSession::get(client);
    Status status = authSession->checkAuthForQuery(ns, q.query);
    audit::logQueryAuthzCheck(client, ns, q.query, status.code());
    uassertStatusOK(status);

    LOG(3) << "query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn
           << " options: " << q.queryOptions;

    if (q.ntoreturn == 1 && strstr(q.ns, ".$cmd"))
        throw UserException(8010, "something is wrong, shouldn't see a command here");

    if (q.queryOptions & QueryOption_Exhaust) {
        uasserted(18526,
                  string("the 'exhaust' query option is invalid for mongos queries: ") + q.ns +
                      " " + q.query.toString());
    }

    // Spigot which controls whether OP_QUERY style find on mongos uses the new ClusterClientCursor
    // code path.
    // TODO: Delete the spigot and always use the new code.
    if (useClusterClientCursor) {
        auto txn = cc().makeOperationContext();

        ReadPreferenceSetting readPreference(ReadPreference::PrimaryOnly, TagSet::primaryOnly());

        BSONElement rpElem;
        auto readPrefExtractStatus = bsonExtractTypedField(
            q.query, LiteParsedQuery::kFindCommandReadPrefField, mongo::Object, &rpElem);

        if (readPrefExtractStatus.isOK()) {
            auto parsedRps = ReadPreferenceSetting::fromBSON(rpElem.Obj());
            uassertStatusOK(parsedRps.getStatus());
            readPreference = parsedRps.getValue();
        } else if (readPrefExtractStatus != ErrorCodes::NoSuchKey) {
            uassertStatusOK(readPrefExtractStatus);
        }

        auto canonicalQuery = CanonicalQuery::canonicalize(q, WhereCallbackNoop());
        uassertStatusOK(canonicalQuery.getStatus());

        // Do the work to generate the first batch of results. This blocks waiting to get responses
        // from the shard(s).
        std::vector<BSONObj> batch;

        // 0 means the cursor is exhausted and
        // otherwise we assume that a cursor with the returned id can be retrieved via the
        // ClusterCursorManager
        auto cursorId =
            ClusterFind::runQuery(txn.get(), *canonicalQuery.getValue(), readPreference, &batch);
        uassertStatusOK(cursorId.getStatus());

        // Build the response document.
        // TODO: this constant should be shared between mongos and mongod, and should
        // not be inside ShardedClientCursor.
        BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE);

        int numResults = 0;
        for (const auto& obj : batch) {
            buffer.appendBuf((void*)obj.objdata(), obj.objsize());
            numResults++;
        }

        replyToQuery(0,  // query result flags
                     r.p(),
                     r.m(),
                     buffer.buf(),
                     buffer.len(),
                     numResults,
                     0,  // startingFrom
                     cursorId.getValue());
        return;
    }

    QuerySpec qSpec((string)q.ns, q.query, q.fields, q.ntoskip, q.ntoreturn, q.queryOptions);

    // Parse "$maxTimeMS".
    StatusWith<int> maxTimeMS = LiteParsedQuery::parseMaxTimeMSQuery(q.query);
    uassert(17233, maxTimeMS.getStatus().reason(), maxTimeMS.isOK());

    if (_isSystemIndexes(q.ns) && doShardedIndexQuery(txn, r, qSpec)) {
        return;
    }

    ParallelSortClusteredCursor* cursor = new ParallelSortClusteredCursor(qSpec, CommandInfo());
    verify(cursor);

    // TODO:  Move out to Request itself, not strategy based
    try {
        cursor->init(txn);

        if (qSpec.isExplain()) {
            BSONObjBuilder explain_builder;
            cursor->explain(explain_builder);
            explain_builder.appendNumber("executionTimeMillis",
                                         static_cast<long long>(queryTimer.millis()));
            BSONObj b = explain_builder.obj();

            replyToQuery(0, r.p(), r.m(), b);
            delete (cursor);
            return;
        }
    } catch (...) {
        delete cursor;
        throw;
    }

    // TODO: Revisit all of this when we revisit the sharded cursor cache

    if (cursor->getNumQueryShards() != 1) {
        // More than one shard (or zero), manage with a ShardedClientCursor
        // NOTE: We may also have *zero* shards here when the returnPartial flag is set.
        // Currently the code in ShardedClientCursor handles this.

        ShardedClientCursorPtr cc(new ShardedClientCursor(q, cursor));

        BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE);
        int docCount = 0;
        const int startFrom = cc->getTotalSent();
        bool hasMore = cc->sendNextBatch(q.ntoreturn, buffer, docCount);

        if (hasMore) {
            LOG(5) << "storing cursor : " << cc->getId();

            int cursorLeftoverMillis = maxTimeMS.getValue() - queryTimer.millis();
            if (maxTimeMS.getValue() == 0) {  // 0 represents "no limit".
                cursorLeftoverMillis = kMaxTimeCursorNoTimeLimit;
            } else if (cursorLeftoverMillis <= 0) {
                cursorLeftoverMillis = kMaxTimeCursorTimeLimitExpired;
            }

            cursorCache.store(cc, cursorLeftoverMillis);
        }

        replyToQuery(0,
                     r.p(),
                     r.m(),
                     buffer.buf(),
                     buffer.len(),
                     docCount,
                     startFrom,
                     hasMore ? cc->getId() : 0);
    } else {
        // Only one shard is used

        // Remote cursors are stored remotely, we shouldn't need this around.
        unique_ptr<ParallelSortClusteredCursor> cursorDeleter(cursor);

        ShardPtr shard = cursor->getQueryShard();
        verify(shard.get());
        DBClientCursorPtr shardCursor = cursor->getShardCursor(shard->getId());

        // Implicitly stores the cursor in the cache
        r.reply(*(shardCursor->getMessage()), shardCursor->originalHost());

        // We don't want to kill the cursor remotely if there's still data left
        shardCursor->decouple();
    }
}
Ejemplo n.º 4
0
    Status ModifierRename::prepare(mutablebson::Element root,
                                   const StringData& matchedField,
                                   ExecInfo* execInfo) {
        // Rename doesn't work with positional fields ($)
        dassert(matchedField.empty());

        _preparedState.reset(new PreparedState(root));

        // Locate the to field name in 'root', which must exist.
        size_t fromIdxFound;
        Status status = pathsupport::findLongestPrefix(_fromFieldRef,
                                                       root,
                                                       &fromIdxFound,
                                                       &_preparedState->fromElemFound);

        // If we can't find the full element in the from field then we can't do anything.
        if (!status.isOK()) {
            execInfo->noOp = true;
            _preparedState->fromElemFound = root.getDocument().end();

            // TODO: remove this special case from existing behavior
            if (status.code() == ErrorCodes::PathNotViable) {
                return status;
            }

            return Status::OK();
        }

        // Ensure no array in ancestry if what we found is not at the root
        mutablebson::Element curr = _preparedState->fromElemFound.parent();
        if (curr != curr.getDocument().root())
            while (curr.ok() && (curr != curr.getDocument().root())) {
                if (curr.getType() == Array)
                    return Status(ErrorCodes::BadValue,
                                  str::stream() << "The source field cannot be an array element, '"
                                  << _fromFieldRef.dottedField() << "' in doc with "
                                  << findElementNamed(root.leftChild(), "_id").toString()
                                  << " has an array field called '" << curr.getFieldName() << "'");
                curr = curr.parent();
            }

        // "To" side validation below

        status = pathsupport::findLongestPrefix(_toFieldRef,
                                                root,
                                                &_preparedState->toIdxFound,
                                                &_preparedState->toElemFound);

        // FindLongestPrefix may return not viable or any other error and then we cannot proceed.
        if (status.code() == ErrorCodes::NonExistentPath) {
            // Not an error condition as we will create the "to" path as needed.
        } else if (!status.isOK()) {
            return status;
        }

        const bool destExists = _preparedState->toElemFound.ok() &&
                                (_preparedState->toIdxFound == (_toFieldRef.numParts()-1));

        // Ensure no array in ancestry of "to" Element
        // Set to either parent, or node depending on if the full path element was found
        curr = (destExists ? _preparedState->toElemFound.parent() : _preparedState->toElemFound);
        if (curr != curr.getDocument().root()) {
            while (curr.ok()) {
                if (curr.getType() == Array)
                    return Status(ErrorCodes::BadValue,
                                  str::stream()
                                  << "The destination field cannot be an array element, '"
                                  << _fromFieldRef.dottedField() << "' in doc with "
                                  << findElementNamed(root.leftChild(), "_id").toString()
                                  << " has an array field called '" << curr.getFieldName() << "'");
                curr = curr.parent();
            }
        }

        // We register interest in the field name. The driver needs this info to sort out if
        // there is any conflict among mods.
        execInfo->fieldRef[0] = &_fromFieldRef;
        execInfo->fieldRef[1] = &_toFieldRef;

        execInfo->noOp = false;

        return Status::OK();
    }
void QuorumChecker::_tabulateHeartbeatResponse(const RemoteCommandRequest& request,
                                               const executor::RemoteCommandResponse& response) {
    ++_numResponses;
    if (!response.isOK()) {
        warning() << "Failed to complete heartbeat request to " << request.target << "; "
                  << response.status;
        _badResponses.push_back(std::make_pair(request.target, response.status));
        return;
    }

    BSONObj resBSON = response.data;
    ReplSetHeartbeatResponse hbResp;
    Status hbStatus = hbResp.initialize(resBSON, 0);

    if (hbStatus.code() == ErrorCodes::InconsistentReplicaSetNames) {
        std::string message = str::stream() << "Our set name did not match that of "
                                            << request.target.toString();
        _vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
        warning() << message;
        return;
    }

    if (!hbStatus.isOK() && hbStatus != ErrorCodes::InvalidReplicaSetConfig) {
        warning() << "Got error (" << hbStatus << ") response on heartbeat request to "
                  << request.target << "; " << hbResp;
        _badResponses.push_back(std::make_pair(request.target, hbStatus));
        return;
    }

    if (!hbResp.getReplicaSetName().empty()) {
        if (hbResp.getConfigVersion() >= _rsConfig->getConfigVersion()) {
            std::string message = str::stream()
                << "Our config version of " << _rsConfig->getConfigVersion()
                << " is no larger than the version on " << request.target.toString()
                << ", which is " << hbResp.getConfigVersion();
            _vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
            warning() << message;
            return;
        }
    }

    if (_rsConfig->hasReplicaSetId()) {
        StatusWith<rpc::ReplSetMetadata> replMetadata =
            rpc::ReplSetMetadata::readFromMetadata(response.metadata);
        if (replMetadata.isOK() && replMetadata.getValue().getReplicaSetId().isSet() &&
            _rsConfig->getReplicaSetId() != replMetadata.getValue().getReplicaSetId()) {
            std::string message = str::stream()
                << "Our replica set ID of " << _rsConfig->getReplicaSetId()
                << " did not match that of " << request.target.toString() << ", which is "
                << replMetadata.getValue().getReplicaSetId();
            _vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
            warning() << message;
        }
    }

    const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
    if (isInitialConfig && hbResp.hasData()) {
        std::string message = str::stream() << "'" << request.target.toString()
                                            << "' has data already, cannot initiate set.";
        _vetoStatus = Status(ErrorCodes::CannotInitializeNodeWithData, message);
        warning() << message;
        return;
    }

    for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
        const MemberConfig& memberConfig = _rsConfig->getMemberAt(i);
        if (memberConfig.getHostAndPort() != request.target) {
            continue;
        }
        if (memberConfig.isElectable()) {
            ++_numElectable;
        }
        if (memberConfig.isVoter()) {
            _voters.push_back(request.target);
        }
        return;
    }
    invariant(false);
}
Ejemplo n.º 6
0
static void buildTargetError(const Status& errStatus, WriteErrorDetail* details) {
    details->setErrCode(errStatus.code());
    details->setErrMessage(errStatus.reason());
}
Ejemplo n.º 7
0
    void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
            const ReplicationExecutor::RemoteCommandCallbackData& cbData, int targetIndex) {

        // remove handle from queued heartbeats
        _untrackHeartbeatHandle(cbData.myHandle);

        // Parse and validate the response.  At the end of this step, if responseStatus is OK then
        // hbResponse is valid.
        Status responseStatus = cbData.response.getStatus();
        if (responseStatus == ErrorCodes::CallbackCanceled) {
            return;
        }

        const HostAndPort& target = cbData.request.target;
        ReplSetHeartbeatResponse hbResponse;
        BSONObj resp;
        if (responseStatus.isOK()) {
            resp = cbData.response.getValue().data;
            responseStatus = hbResponse.initialize(resp);
        }
        const bool isUnauthorized = (responseStatus.code() == ErrorCodes::Unauthorized) ||
                                    (responseStatus.code() == ErrorCodes::AuthenticationFailed);
        const Date_t now = _replExecutor.now();
        const OpTime lastApplied = getMyLastOptime();  // Locks and unlocks _mutex.
        Milliseconds networkTime(0);
        StatusWith<ReplSetHeartbeatResponse> hbStatusResponse(hbResponse);

        if (responseStatus.isOK()) {
            networkTime = cbData.response.getValue().elapsedMillis;
        }
        else {
            log() << "Error in heartbeat request to " << target << "; " << responseStatus;
            if (!resp.isEmpty()) {
                LOG(3) << "heartbeat response: " << resp;
            }

            if (isUnauthorized) {
                networkTime = cbData.response.getValue().elapsedMillis;
            }
            hbStatusResponse = StatusWith<ReplSetHeartbeatResponse>(responseStatus);
        }

        HeartbeatResponseAction action =
            _topCoord->processHeartbeatResponse(
                    now,
                    networkTime,
                    target,
                    hbStatusResponse,
                    lastApplied);

        if (action.getAction() == HeartbeatResponseAction::NoAction &&
                hbStatusResponse.isOK() &&
                hbStatusResponse.getValue().hasOpTime() &&
                targetIndex >= 0) {
            boost::lock_guard<boost::mutex> lk(_mutex);
            if (hbStatusResponse.getValue().getVersion() == _rsConfig.getConfigVersion()) {
                _updateOpTimeFromHeartbeat_inlock(targetIndex,
                                                  hbStatusResponse.getValue().getOpTime());
            }
        }

        _signalStepDownWaiters();

        _scheduleHeartbeatToTarget(
                target,
                targetIndex,
                std::max(now, action.getNextHeartbeatStartDate()));

        _handleHeartbeatResponseAction(action, hbStatusResponse);
    }
Ejemplo n.º 8
0
    void Strategy::queryOp( Request& r ) {

        verify( !NamespaceString( r.getns() ).isCommand() );

        Timer queryTimer;

        QueryMessage q( r.d() );

        NamespaceString ns(q.ns);
        ClientBasic* client = ClientBasic::getCurrent();
        AuthorizationSession* authSession = AuthorizationSession::get(client);
        Status status = authSession->checkAuthForQuery(ns, q.query);
        audit::logQueryAuthzCheck(client, ns, q.query, status.code());
        uassertStatusOK(status);

        LOG(3) << "query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn
               << " options: " << q.queryOptions << endl;

        if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") )
            throw UserException( 8010 , "something is wrong, shouldn't see a command here" );

        if (q.queryOptions & QueryOption_Exhaust) {
            uasserted(18526,
                      string("the 'exhaust' query option is invalid for mongos queries: ") + q.ns
                      + " " + q.query.toString());
        }

        QuerySpec qSpec( (string)q.ns, q.query, q.fields, q.ntoskip, q.ntoreturn, q.queryOptions );

        // Parse "$maxTimeMS".
        StatusWith<int> maxTimeMS = LiteParsedQuery::parseMaxTimeMSQuery( q.query );
        uassert( 17233,
                 maxTimeMS.getStatus().reason(),
                 maxTimeMS.isOK() );

        if ( _isSystemIndexes( q.ns ) && doShardedIndexQuery( r, qSpec )) {
            return;
        }

        ParallelSortClusteredCursor * cursor = new ParallelSortClusteredCursor( qSpec, CommandInfo() );
        verify( cursor );

        // TODO:  Move out to Request itself, not strategy based
        try {
            cursor->init();

            if ( qSpec.isExplain() ) {
                BSONObjBuilder explain_builder;
                cursor->explain( explain_builder );
                explain_builder.appendNumber( "executionTimeMillis",
                                              static_cast<long long>(queryTimer.millis()) );
                BSONObj b = explain_builder.obj();

                replyToQuery( 0 , r.p() , r.m() , b );
                delete( cursor );
                return;
            }
        }
        catch(...) {
            delete cursor;
            throw;
        }

        // TODO: Revisit all of this when we revisit the sharded cursor cache

        if (cursor->getNumQueryShards() != 1) {

            // More than one shard (or zero), manage with a ShardedClientCursor
            // NOTE: We may also have *zero* shards here when the returnPartial flag is set.
            // Currently the code in ShardedClientCursor handles this.

            ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor ));

            BufBuilder buffer( ShardedClientCursor::INIT_REPLY_BUFFER_SIZE );
            int docCount = 0;
            const int startFrom = cc->getTotalSent();
            bool hasMore = cc->sendNextBatch(q.ntoreturn, buffer, docCount);

            if ( hasMore ) {
                LOG(5) << "storing cursor : " << cc->getId() << endl;

                int cursorLeftoverMillis = maxTimeMS.getValue() - queryTimer.millis();
                if ( maxTimeMS.getValue() == 0 ) { // 0 represents "no limit".
                    cursorLeftoverMillis = kMaxTimeCursorNoTimeLimit;
                }
                else if ( cursorLeftoverMillis <= 0 ) {
                    cursorLeftoverMillis = kMaxTimeCursorTimeLimitExpired;
                }

                cursorCache.store( cc, cursorLeftoverMillis );
            }

            replyToQuery( 0, r.p(), r.m(), buffer.buf(), buffer.len(), docCount,
                    startFrom, hasMore ? cc->getId() : 0 );
        }
        else{

            // Only one shard is used

            // Remote cursors are stored remotely, we shouldn't need this around.
            scoped_ptr<ParallelSortClusteredCursor> cursorDeleter( cursor );

            ShardPtr shard = cursor->getQueryShard();
            verify( shard.get() );
            DBClientCursorPtr shardCursor = cursor->getShardCursor(*shard);

            // Implicitly stores the cursor in the cache
            r.reply( *(shardCursor->getMessage()) , shardCursor->originalHost() );

            // We don't want to kill the cursor remotely if there's still data left
            shardCursor->decouple();
        }
    }
Ejemplo n.º 9
0
Status IndexAccessMethod::commitBulk(OperationContext* txn,
                                     std::unique_ptr<BulkBuilder> bulk,
                                     bool mayInterrupt,
                                     bool dupsAllowed,
                                     set<RecordId>* dupsToDrop) {
    Timer timer;

    std::unique_ptr<BulkBuilder::Sorter::Iterator> i(bulk->_sorter->done());

    stdx::unique_lock<Client> lk(*txn->getClient());
    ProgressMeterHolder pm(*txn->setMessage_inlock("Index Bulk Build: (2/3) btree bottom up",
                                                   "Index: (2/3) BTree Bottom Up Progress",
                                                   bulk->_keysInserted,
                                                   10));
    lk.unlock();

    std::unique_ptr<SortedDataBuilderInterface> builder;

    MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
        WriteUnitOfWork wunit(txn);

        if (bulk->_isMultiKey) {
            _btreeState->setMultikey(txn);
        }

        builder.reset(_newInterface->getBulkBuilder(txn, dupsAllowed));
        wunit.commit();
    }
    MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "setting index multikey flag", "");

    while (i->more()) {
        if (mayInterrupt) {
            txn->checkForInterrupt();
        }

        WriteUnitOfWork wunit(txn);
        // Improve performance in the btree-building phase by disabling rollback tracking.
        // This avoids copying all the written bytes to a buffer that is only used to roll back.
        // Note that this is safe to do, as this entire index-build-in-progress will be cleaned
        // up by the index system.
        txn->recoveryUnit()->setRollbackWritesDisabled();

        // Get the next datum and add it to the builder.
        BulkBuilder::Sorter::Data d = i->next();
        Status status = builder->addKey(d.first, d.second);

        if (!status.isOK()) {
            // Overlong key that's OK to skip?
            if (status.code() == ErrorCodes::KeyTooLong && ignoreKeyTooLong(txn)) {
                continue;
            }

            // Check if this is a duplicate that's OK to skip
            if (status.code() == ErrorCodes::DuplicateKey) {
                invariant(!dupsAllowed);  // shouldn't be getting DupKey errors if dupsAllowed.

                if (dupsToDrop) {
                    dupsToDrop->insert(d.second);
                    continue;
                }
            }

            return status;
        }

        // If we're here either it's a dup and we're cool with it or the addKey went just
        // fine.
        pm.hit();
        wunit.commit();
    }

    pm.finished();

    {
        stdx::lock_guard<Client> lk(*txn->getClient());
        CurOp::get(txn)->setMessage_inlock("Index Bulk Build: (3/3) btree-middle",
                                           "Index: (3/3) BTree Middle Progress");
    }

    LOG(timer.seconds() > 10 ? 0 : 1) << "\t done building bottom layer, going to commit";

    builder->commit(mayInterrupt);
    return Status::OK();
}
Ejemplo n.º 10
0
StatusWith<ForwardingCatalogManager::ScopedDistLock*>
ChunkMoveOperationState::acquireMoveMetadata() {
    // Get the distributed lock
    const string whyMessage(stream() << "migrating chunk [" << _minKey << ", " << _maxKey << ") in "
                                     << _nss.ns());
    _distLockStatus = grid.forwardingCatalogManager()->distLock(_txn, _nss.ns(), whyMessage);

    if (!_distLockStatus->isOK()) {
        const string msg = stream() << "could not acquire collection lock for " << _nss.ns()
                                    << " to migrate chunk [" << _minKey << "," << _maxKey << ")"
                                    << causedBy(_distLockStatus->getStatus());
        warning() << msg;
        return Status(_distLockStatus->getStatus().code(), msg);
    }

    ShardingState* const shardingState = ShardingState::get(_txn);

    // Snapshot the metadata
    Status refreshStatus = shardingState->refreshMetadataNow(_txn, _nss.ns(), &_shardVersion);
    if (!refreshStatus.isOK()) {
        const string msg = stream() << "moveChunk cannot start migrate of chunk "
                                    << "[" << _minKey << "," << _maxKey << ")"
                                    << causedBy(refreshStatus.reason());
        warning() << msg;
        return Status(refreshStatus.code(), msg);
    }

    if (_shardVersion.majorVersion() == 0) {
        // It makes no sense to migrate if our version is zero and we have no chunks
        const string msg = stream() << "moveChunk cannot start migrate of chunk "
                                    << "[" << _minKey << "," << _maxKey << ")"
                                    << " with zero shard version";
        warning() << msg;
        return Status(ErrorCodes::IncompatibleShardingMetadata, msg);
    }

    {
        // Mongos >= v3.2 sends the full version, v3.0 only sends the epoch.
        // TODO(SERVER-20742): Stop parsing epoch separately after 3.2.
        auto& operationVersion = OperationShardVersion::get(_txn);
        if (operationVersion.hasShardVersion()) {
            _collectionVersion = operationVersion.getShardVersion(_nss);
            _collectionEpoch = _collectionVersion.epoch();
        }  // else the epoch will already be set from the parsing of the ChunkMoveOperationState

        if (_collectionEpoch != _shardVersion.epoch()) {
            const string msg = stream() << "moveChunk cannot move chunk "
                                        << "[" << _minKey << "," << _maxKey << "), "
                                        << "collection may have been dropped. "
                                        << "current epoch: " << _shardVersion.epoch()
                                        << ", cmd epoch: " << _collectionEpoch;
            warning() << msg;
            throw SendStaleConfigException(_nss.toString(), msg, _collectionVersion, _shardVersion);
        }
    }

    _collMetadata = shardingState->getCollectionMetadata(_nss.ns());

    // With nonzero shard version, we must have a coll version >= our shard version
    invariant(_collMetadata->getCollVersion() >= _shardVersion);

    // With nonzero shard version, we must have a shard key
    invariant(!_collMetadata->getKeyPattern().isEmpty());

    ChunkType origChunk;
    if (!_collMetadata->getNextChunk(_minKey, &origChunk) ||
        origChunk.getMin().woCompare(_minKey) || origChunk.getMax().woCompare(_maxKey)) {
        // Our boundaries are different from those passed in
        const string msg = stream() << "moveChunk cannot find chunk "
                                    << "[" << _minKey << "," << _maxKey << ")"
                                    << " to migrate, the chunk boundaries may be stale";
        warning() << msg;
        throw SendStaleConfigException(_nss.toString(), msg, _collectionVersion, _shardVersion);
    }

    return &_distLockStatus->getValue();
}
Status ShardingCatalogClientImpl::insertConfigDocument(OperationContext* opCtx,
                                                       const NamespaceString& nss,
                                                       const BSONObj& doc,
                                                       const WriteConcernOptions& writeConcern) {
    invariant(nss.db() == NamespaceString::kAdminDb || nss.db() == NamespaceString::kConfigDb);

    const BSONElement idField = doc.getField("_id");
    invariant(!idField.eoo());

    BatchedCommandRequest request([&] {
        write_ops::Insert insertOp(nss);
        insertOp.setDocuments({doc});
        return insertOp;
    }());
    request.setWriteConcern(writeConcern.toBSON());

    auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
    for (int retry = 1; retry <= kMaxWriteRetry; retry++) {
        auto response = configShard->runBatchWriteCommand(
            opCtx, Shard::kDefaultConfigCommandTimeout, request, Shard::RetryPolicy::kNoRetry);

        Status status = response.toStatus();

        if (retry < kMaxWriteRetry &&
            configShard->isRetriableError(status.code(), Shard::RetryPolicy::kIdempotent)) {
            // Pretend like the operation is idempotent because we're handling DuplicateKey errors
            // specially
            continue;
        }

        // If we get DuplicateKey error on the first attempt to insert, this definitively means that
        // we are trying to insert the same entry a second time, so error out. If it happens on a
        // retry attempt though, it is not clear whether we are actually inserting a duplicate key
        // or it is because we failed to wait for write concern on the first attempt. In order to
        // differentiate, fetch the entry and check.
        if (retry > 1 && status == ErrorCodes::DuplicateKey) {
            LOG(1) << "Insert retry failed because of duplicate key error, rechecking.";

            auto fetchDuplicate =
                _exhaustiveFindOnConfig(opCtx,
                                        ReadPreferenceSetting{ReadPreference::PrimaryOnly},
                                        repl::ReadConcernLevel::kMajorityReadConcern,
                                        nss,
                                        idField.wrap(),
                                        BSONObj(),
                                        boost::none);
            if (!fetchDuplicate.isOK()) {
                return fetchDuplicate.getStatus();
            }

            auto existingDocs = fetchDuplicate.getValue().value;
            if (existingDocs.empty()) {
                return {status.withContext(
                    stream() << "DuplicateKey error was returned after a retry attempt, but no "
                                "documents were found. This means a concurrent change occurred "
                                "together with the retries.")};
            }

            invariant(existingDocs.size() == 1);

            BSONObj existing = std::move(existingDocs.front());
            if (existing.woCompare(doc) == 0) {
                // Documents match, so treat the operation as success
                return Status::OK();
            }
        }

        return status;
    }

    MONGO_UNREACHABLE;
}
Ejemplo n.º 12
0
    DiskLoc DataFileMgr::insert(const char* ns,
                                const void* obuf,
                                int32_t len,
                                bool mayInterrupt,
                                bool god,
                                bool mayAddIndex,
                                bool* addedID) {

        Database* database = cc().database();

        bool wouldAddIndex = false;
        massert( 10093 , "cannot insert into reserved $ collection", god || NamespaceString::normal( ns ) );
        uassert( 10094 , str::stream() << "invalid ns: " << ns , isValidNS( ns ) );
        {
            const char *sys = strstr(ns, "system.");
            if ( sys ) {

                if ( !insert_checkSys(sys, ns, wouldAddIndex, obuf, god) )
                    return DiskLoc();

                if ( mayAddIndex && wouldAddIndex ) {
                    // TODO: this should be handled above this function
                    BSONObj spec( static_cast<const char*>( obuf ) );
                    string collectionToIndex = spec.getStringField( "ns" );
                    uassert(10096, "invalid ns to index", collectionToIndex.find( '.' ) != string::npos);
                    massert(10097,
                            str::stream() << "trying to create index on wrong db "
                            << " db: " << database->name() << " collection: " << collectionToIndex,
                            database->ownsNS( collectionToIndex ) );

                    Collection* collection = database->getCollection( collectionToIndex );
                    if ( !collection ) {
                        collection = database->createCollection( collectionToIndex, false, NULL, true );
                        verify( collection );
                        if ( !god )
                            ensureIdIndexForNewNs( collection );
                    }

                    Status status = collection->getIndexCatalog()->createIndex( spec, mayInterrupt );
                    if ( status.code() == ErrorCodes::IndexAlreadyExists )
                        return DiskLoc();
                    uassertStatusOK( status );
                    return DiskLoc();
                }
            }
        }

        Collection* collection = database->getCollection( ns );
        if ( collection == NULL ) {
            collection = database->createCollection( ns, false, NULL, false );

            int ies = Extent::initialSize(len);
            if( str::contains(ns, '$') &&
                len + Record::HeaderSize >= BtreeData_V1::BucketSize - 256 &&
                len + Record::HeaderSize <= BtreeData_V1::BucketSize + 256 ) {
                // probably an index.  so we pick a value here for the first extent instead of using
                // initialExtentSize() which is more for user collections.
                // TODO: we could look at the # of records in the parent collection to be smarter here.
                ies = (32+4) * 1024;
            }
            collection->increaseStorageSize( ies, false);
            if ( !god )
                ensureIdIndexForNewNs( collection );
        }

        NamespaceDetails* d = collection->details();

        IDToInsert idToInsert; // only initialized if needed

        if( !god ) {
            /* Check if we have an _id field. If we don't, we'll add it.
               Note that btree buckets which we insert aren't BSONObj's, but in that case god==true.
            */
            BSONObj io((const char *) obuf);
            BSONElement idField = io.getField( "_id" );
            uassert( 10099 ,  "_id cannot be an array", idField.type() != Array );
            // we don't add _id for capped collections in local as they don't have an _id index
            if( idField.eoo() &&
                !wouldAddIndex &&
                nsToDatabase( ns ) != "local" &&
                d->haveIdIndex() ) {

                if( addedID )
                    *addedID = true;

                idToInsert.init();
                len += idToInsert.size();
            }

            BSONElementManipulator::lookForTimestamps( io );
        }

        int lenWHdr = d->getRecordAllocationSize( len + Record::HeaderSize );
        fassert( 16440, lenWHdr >= ( len + Record::HeaderSize ) );

        // If the collection is capped, check if the new object will violate a unique index
        // constraint before allocating space.
        if ( d->isCapped() && !god) {
            BSONObj temp = BSONObj( reinterpret_cast<const char *>( obuf ) );
            Status ret = collection->getIndexCatalog()->checkNoIndexConflicts( temp );
            uassert(12582, "duplicate key insert for unique index of capped collection", ret.isOK() );
        }

        DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWHdr, god);

        if ( loc.isNull() ) {
            string errmsg = str::stream() << "insert: couldn't alloc space for object ns:" << ns
                                          << " capped:" << d->isCapped();
            log() << errmsg;
            uasserted( 17248, errmsg );
        }

        Record *r = loc.rec();
        {
            verify( r->lengthWithHeaders() >= lenWHdr );
            r = (Record*) getDur().writingPtr(r, lenWHdr);
            if( idToInsert.needed() ) {
                /* a little effort was made here to avoid a double copy when we add an ID */
                int originalSize = *((int*) obuf);
                ((int&)*r->data()) = originalSize + idToInsert.size();
                memcpy(r->data()+4, idToInsert.rawdata(), idToInsert.size());
                memcpy(r->data()+4+idToInsert.size(), ((char*)obuf)+4, originalSize-4);
            }
            else {
                if( obuf ) // obuf can be null from internal callers
                    memcpy(r->data(), obuf, len);
            }
        }

        addRecordToRecListInExtent(r, loc);

        d->incrementStats( r->netLength(), 1 );

        // we don't bother resetting query optimizer stats for the god tables - also god is true when adding a btree bucket
        if ( !god )
            collection->infoCache()->notifyOfWriteOp();

        /* add this record to our indexes */
        if ( d->getTotalIndexCount() > 0 ) {
            try {
                BSONObj obj(r->data());
                collection->getIndexCatalog()->indexRecord(obj, loc);
            }
            catch( AssertionException& e ) {
                // should be a dup key error on _id index
                if( d->isCapped() ) {
                    massert( 12583, "unexpected index insertion failure on capped collection", !d->isCapped() );
                    string s = e.toString();
                    s += " : on addIndex/capped - collection and its index will not match";
                    setLastError(0, s.c_str());
                    error() << s << endl;
                }
                else {
                    // normal case -- we can roll back
                    _deleteRecord(d, ns, r, loc);
                    throw;
                }
            }
        }

        d->paddingFits();

        return loc;
    }
Ejemplo n.º 13
0
void BackgroundSync::_produce(
    OperationContext* txn,
    ReplicationCoordinatorExternalState* replicationCoordinatorExternalState) {
    // this oplog reader does not do a handshake because we don't want the server it's syncing
    // from to track how far it has synced
    {
        stdx::unique_lock<stdx::mutex> lock(_mutex);
        if (_lastOpTimeFetched.isNull()) {
            // then we're initial syncing and we're still waiting for this to be set
            lock.unlock();
            sleepsecs(1);
            // if there is no one to sync from
            return;
        }

        if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary() ||
            inShutdownStrict()) {
            return;
        }
    }

    while (MONGO_FAIL_POINT(rsBgSyncProduce)) {
        sleepmillis(0);
    }


    // find a target to sync from the last optime fetched
    OpTime lastOpTimeFetched;
    HostAndPort source;
    {
        stdx::unique_lock<stdx::mutex> lock(_mutex);
        lastOpTimeFetched = _lastOpTimeFetched;
        _syncSourceHost = HostAndPort();
    }
    SyncSourceResolverResponse syncSourceResp =
        _syncSourceResolver.findSyncSource(txn, lastOpTimeFetched);

    if (syncSourceResp.syncSourceStatus == ErrorCodes::OplogStartMissing) {
        // All (accessible) sync sources were too stale.
        error() << "too stale to catch up -- entering maintenance mode";
        log() << "Our newest OpTime : " << lastOpTimeFetched;
        log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen;
        log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember";
        StorageInterface::get(txn)
            ->setMinValid(txn, {lastOpTimeFetched, syncSourceResp.earliestOpTimeSeen});
        auto status = _replCoord->setMaintenanceMode(true);
        if (!status.isOK()) {
            warning() << "Failed to transition into maintenance mode.";
        }
        bool worked = _replCoord->setFollowerMode(MemberState::RS_RECOVERING);
        if (!worked) {
            warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
                      << ". Current state: " << _replCoord->getMemberState();
        }
        return;
    } else if (syncSourceResp.isOK() && !syncSourceResp.getSyncSource().empty()) {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        _syncSourceHost = syncSourceResp.getSyncSource();
        source = _syncSourceHost;
    } else {
        if (!syncSourceResp.isOK()) {
            log() << "failed to find sync source, received error "
                  << syncSourceResp.syncSourceStatus.getStatus();
        }
        // No sync source found.
        sleepsecs(1);
        return;
    }

    long long lastHashFetched;
    {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        if (_stopped) {
            return;
        }
        lastOpTimeFetched = _lastOpTimeFetched;
        lastHashFetched = _lastFetchedHash;
        _replCoord->signalUpstreamUpdater();
    }

    // "lastFetched" not used. Already set in _enqueueDocuments.
    Status fetcherReturnStatus = Status::OK();
    DataReplicatorExternalStateBackgroundSync dataReplicatorExternalState(
        _replCoord, replicationCoordinatorExternalState, this);
    OplogFetcher* oplogFetcher;
    try {
        auto config = _replCoord->getConfig();
        auto onOplogFetcherShutdownCallbackFn =
            [&fetcherReturnStatus](const Status& status, const OpTimeWithHash& lastFetched) {
                fetcherReturnStatus = status;
            };

        stdx::lock_guard<stdx::mutex> lock(_mutex);
        _oplogFetcher =
            stdx::make_unique<OplogFetcher>(&_threadPoolTaskExecutor,
                                            OpTimeWithHash(lastHashFetched, lastOpTimeFetched),
                                            source,
                                            NamespaceString(rsOplogName),
                                            config,
                                            &dataReplicatorExternalState,
                                            stdx::bind(&BackgroundSync::_enqueueDocuments,
                                                       this,
                                                       stdx::placeholders::_1,
                                                       stdx::placeholders::_2,
                                                       stdx::placeholders::_3,
                                                       stdx::placeholders::_4),
                                            onOplogFetcherShutdownCallbackFn);
        oplogFetcher = _oplogFetcher.get();
    } catch (const mongo::DBException& ex) {
        fassertFailedWithStatus(34440, exceptionToStatus());
    }

    LOG(1) << "scheduling fetcher to read remote oplog on " << _syncSourceHost << " starting at "
           << oplogFetcher->getCommandObject_forTest()["filter"];
    auto scheduleStatus = oplogFetcher->startup();
    if (!scheduleStatus.isOK()) {
        warning() << "unable to schedule fetcher to read remote oplog on " << source << ": "
                  << scheduleStatus;
        return;
    }

    oplogFetcher->join();
    LOG(1) << "fetcher stopped reading remote oplog on " << source;

    // If the background sync is stopped after the fetcher is started, we need to
    // re-evaluate our sync source and oplog common point.
    if (isStopped()) {
        return;
    }

    if (fetcherReturnStatus.code() == ErrorCodes::OplogOutOfOrder) {
        // This is bad because it means that our source
        // has not returned oplog entries in ascending ts order, and they need to be.

        warning() << fetcherReturnStatus.toString();
        // Do not blacklist the server here, it will be blacklisted when we try to reuse it,
        // if it can't return a matching oplog start from the last fetch oplog ts field.
        return;
    } else if (fetcherReturnStatus.code() == ErrorCodes::OplogStartMissing ||
               fetcherReturnStatus.code() == ErrorCodes::RemoteOplogStale) {
        // Rollback is a synchronous operation that uses the task executor and may not be
        // executed inside the fetcher callback.
        const int messagingPortTags = 0;
        ConnectionPool connectionPool(messagingPortTags);
        std::unique_ptr<ConnectionPool::ConnectionPtr> connection;
        auto getConnection = [&connection, &connectionPool, source]() -> DBClientBase* {
            if (!connection.get()) {
                connection.reset(new ConnectionPool::ConnectionPtr(
                    &connectionPool, source, Date_t::now(), kOplogSocketTimeout));
            };
            return connection->get();
        };

        {
            stdx::lock_guard<stdx::mutex> lock(_mutex);
            lastOpTimeFetched = _lastOpTimeFetched;
        }

        log() << "Starting rollback due to " << fetcherReturnStatus;

        // Wait till all buffered oplog entries have drained and been applied.
        auto lastApplied = _replCoord->getMyLastAppliedOpTime();
        if (lastApplied != lastOpTimeFetched) {
            log() << "Waiting for all operations from " << lastApplied << " until "
                  << lastOpTimeFetched << " to be applied before starting rollback.";
            while (lastOpTimeFetched > (lastApplied = _replCoord->getMyLastAppliedOpTime())) {
                sleepmillis(10);
                if (isStopped() || inShutdown()) {
                    return;
                }
            }
        }
        // check that we are at minvalid, otherwise we cannot roll back as we may be in an
        // inconsistent state
        BatchBoundaries boundaries = StorageInterface::get(txn)->getMinValid(txn);
        if (!boundaries.start.isNull() || boundaries.end > lastApplied) {
            fassertNoTrace(18750,
                           Status(ErrorCodes::UnrecoverableRollbackError,
                                  str::stream()
                                      << "need to rollback, but in inconsistent state. "
                                      << "minvalid: " << boundaries.end.toString()
                                      << " > our last optime: " << lastApplied.toString()));
        }

        _rollback(txn, source, getConnection);
        stop();
    } else if (fetcherReturnStatus == ErrorCodes::InvalidBSON) {
        Seconds blacklistDuration(60);
        warning() << "Fetcher got invalid BSON while querying oplog. Blacklisting sync source "
                  << source << " for " << blacklistDuration << ".";
        _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration);
    } else if (!fetcherReturnStatus.isOK()) {
        warning() << "Fetcher error querying oplog: " << fetcherReturnStatus.toString();
    }
}
Ejemplo n.º 14
0
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
            int s = 0;
            bool found = false;

            // TODO: convert to ServerParameters -- SERVER-10515

            if( cmdObj.hasElement("journalCommitInterval") ) {
                if (isMongos()) {
                    errmsg = "cannot set journalCommitInterval on a mongos";
                    return false;
                }
                if(!isJournalingEnabled()) {
                    errmsg = "journaling is off";
                    return false;
                }
                int x = (int) cmdObj["journalCommitInterval"].Number();
                verify( x > 1 && x < 500 );
                setJournalCommitInterval(x);
                log() << "setParameter journalCommitInterval=" << x << endl;
                s++;
            }
            if( cmdObj.hasElement( "traceExceptions" ) ) {
                if( s == 0 ) result.append( "was", DBException::traceExceptions );
                DBException::traceExceptions = cmdObj["traceExceptions"].Bool();
                s++;
            }
            if( cmdObj.hasElement( "replMonitorMaxFailedChecks" ) ) {
                if( s == 0 ) result.append( "was", ReplicaSetMonitor::maxConsecutiveFailedChecks );
                ReplicaSetMonitor::maxConsecutiveFailedChecks =
                    cmdObj["replMonitorMaxFailedChecks"].numberInt();
                s++;
            }

            const ServerParameter::Map& m = ServerParameterSet::getGlobal()->getMap();
            BSONObjIterator i( cmdObj );
            i.next(); // skip past command name
            while ( i.more() ) {
                BSONElement e = i.next();
                ServerParameter::Map::const_iterator j = m.find( e.fieldName() );
                if ( j == m.end() )
                    continue;

                if ( ! j->second->allowedToChangeAtRuntime() ) {
                    errmsg = str::stream()
                        << "not allowed to change ["
                        << e.fieldName()
                        << "] at runtime";
                    return false;
                }

                if ( s == 0 )
                    j->second->append( result, "was" );

                Status status = j->second->set( e );
                if ( status.isOK() ) {
                    s++;
                    continue;
                }
                errmsg = status.reason();
                result.append( "code", status.code() );
                return false;
            }

            if( s == 0 && !found ) {
                errmsg = "no option found to set, use help:true to see options ";
                return false;
            }

            return true;
        }
Ejemplo n.º 15
0
Status ModifierPullAll::prepare(mutablebson::Element root,
                                StringData matchedField,
                                ExecInfo* execInfo) {
    _preparedState.reset(new PreparedState(&root.getDocument()));

    // If we have a $-positional field, it is time to bind it to an actual field part.
    if (_positionalPathIndex) {
        if (matchedField.empty()) {
            return Status(ErrorCodes::BadValue,
                          str::stream() << "The positional operator did not find the match "
                                           "needed from the query. Unexpanded update: "
                                        << _fieldRef.dottedField());
        }
        _fieldRef.setPart(_positionalPathIndex, matchedField);
    }

    // Locate the field name in 'root'. Note that if we don't have the full path in the
    // doc, there isn't anything to unset, really.
    Status status = pathsupport::findLongestPrefix(
        _fieldRef, root, &_preparedState->pathFoundIndex, &_preparedState->pathFoundElement);
    // Check if we didn't find the full path
    if (status.isOK()) {
        const bool destExists = (_preparedState->pathFoundIndex == (_fieldRef.numParts() - 1));

        if (!destExists) {
            execInfo->noOp = true;
        } else {
            // If the path exists, we require the target field to be already an
            // array.
            if (_preparedState->pathFoundElement.getType() != Array) {
                mb::Element idElem = mb::findElementNamed(root.leftChild(), "_id");
                return Status(
                    ErrorCodes::BadValue,
                    str::stream() << "Can only apply $pullAll to an array. " << idElem.toString()
                                  << " has the field "
                                  << _preparedState->pathFoundElement.getFieldName()
                                  << " of non-array type "
                                  << typeName(_preparedState->pathFoundElement.getType()));
            }

            // No children, nothing to do -- not an error state
            if (!_preparedState->pathFoundElement.hasChildren()) {
                execInfo->noOp = true;
            } else {
                mutablebson::Element elem = _preparedState->pathFoundElement.leftChild();
                while (elem.ok()) {
                    if (std::find_if(_elementsToFind.begin(),
                                     _elementsToFind.end(),
                                     mutableElementEqualsBSONElement(elem, _collator)) !=
                        _elementsToFind.end()) {
                        _preparedState->elementsToRemove.push_back(elem);
                    }
                    elem = elem.rightSibling();
                }

                // Nothing to remove so it is a noOp.
                if (_preparedState->elementsToRemove.empty())
                    execInfo->noOp = true;
            }
        }
    } else {
        // Let the caller know we can't do anything given the mod, _fieldRef, and doc.
        execInfo->noOp = true;


        // okay if path not found
        if (status.code() == ErrorCodes::NonExistentPath)
            status = Status::OK();
    }

    // Let the caller know what field we care about
    execInfo->fieldRef[0] = &_fieldRef;

    return status;
}
Ejemplo n.º 16
0
    Status ModifierBit::prepare(mutablebson::Element root,
                                const StringData& matchedField,
                                ExecInfo* execInfo) {

        _preparedState.reset(new PreparedState(root.getDocument()));

        // If we have a $-positional field, it is time to bind it to an actual field part.
        if (_posDollar) {
            if (matchedField.empty()) {
                return Status(ErrorCodes::BadValue, "matched field not provided");
            }
            _preparedState->boundDollar = matchedField.toString();
            _fieldRef.setPart(_posDollar, _preparedState->boundDollar);
        }

        // Locate the field name in 'root'.
        Status status = pathsupport::findLongestPrefix(_fieldRef,
                                                       root,
                                                       &_preparedState->idxFound,
                                                       &_preparedState->elemFound);


        // FindLongestPrefix may say the path does not exist at all, which is fine here, or
        // that the path was not viable or otherwise wrong, in which case, the mod cannot
        // proceed.
        if (status.code() == ErrorCodes::NonExistentPath) {
            _preparedState->elemFound = root.getDocument().end();
        }
        else if (!status.isOK()) {
            return status;
        }

        // We register interest in the field name. The driver needs this info to sort out if
        // there is any conflict among mods.
        execInfo->fieldRef[0] = &_fieldRef;

        //
        // in-place and no-op logic
        //

        // If the field path is not fully present, then this mod cannot be in place, nor is a
        // noOp.
        if (!_preparedState->elemFound.ok() ||
            _preparedState->idxFound < (_fieldRef.numParts() - 1)) {
            // If no target element exists, the value we will write is the result of applying
            // the operation to a zero-initialized integer element.
            _preparedState->newValue = apply(SafeNum(static_cast<int>(0)));
            return Status::OK();
        }

        if (!_preparedState->elemFound.isIntegral())
            return Status(
                ErrorCodes::BadValue,
                "Cannot apply $bit to a value of non-integral type");

        const SafeNum currentValue = _preparedState->elemFound.getValueSafeNum();

        // Apply the op over the existing value and the mod value, and capture the result.
        _preparedState->newValue = apply(currentValue);

        if (!_preparedState->newValue.isValid())
            return Status(ErrorCodes::BadValue,
                          "Failed to apply $bit to current value");

        // If the values are identical (same type, same value), then this is a no-op.
        if (_preparedState->newValue.isIdentical(currentValue)) {
            _preparedState->noOp = execInfo->noOp = true;
            return Status::OK();
        }

        return Status::OK();
    }
Ejemplo n.º 17
0
    // throws DBException
    void buildAnIndex( OperationContext* txn,
                       Collection* collection,
                       IndexCatalogEntry* btreeState,
                       bool mayInterrupt ) {

        const string ns = collection->ns().ns(); // our copy
        verify(txn->lockState()->isWriteLocked(ns));

        const IndexDescriptor* idx = btreeState->descriptor();
        const BSONObj& idxInfo = idx->infoObj();

        LOG(0) << "build index on: " << ns
               << " properties: " << idx->toString() << endl;
        audit::logCreateIndex( currentClient.get(), &idxInfo, idx->indexName(), ns );

        Timer t;

        // this is so that people know there are more keys to look at when doing
        // things like in place updates, etc...
        collection->infoCache()->addedIndex();

        if ( collection->numRecords() == 0 ) {
            Status status = btreeState->accessMethod()->initializeAsEmpty(txn);
            massert( 17343,
                     str::stream() << "IndexAccessMethod::initializeAsEmpty failed" << status.toString(),
                     status.isOK() );
            LOG(0) << "\t added index to empty collection";
            return;
        }

        scoped_ptr<BackgroundOperation> backgroundOperation;
        bool doInBackground = false;

        if ( idxInfo["background"].trueValue() && !inDBRepair ) {
            doInBackground = true;
            backgroundOperation.reset( new BackgroundOperation(ns) );
            uassert( 13130,
                     "can't start bg index b/c in recursive lock (db.eval?)",
                     !txn->lockState()->isRecursive() );
            log() << "\t building index in background";
        }

        Status status = btreeState->accessMethod()->initializeAsEmpty(txn);
        massert( 17342,
                 str::stream()
                 << "IndexAccessMethod::initializeAsEmpty failed"
                 << status.toString(),
                 status.isOK() );

        IndexAccessMethod* bulk = doInBackground ?
            NULL : btreeState->accessMethod()->initiateBulk(txn);
        scoped_ptr<IndexAccessMethod> bulkHolder(bulk);
        IndexAccessMethod* iam = bulk ? bulk : btreeState->accessMethod();

        if ( bulk )
            log() << "\t building index using bulk method";

        unsigned long long n = addExistingToIndex( txn,
                                                   collection,
                                                   btreeState->descriptor(),
                                                   iam,
                                                   doInBackground );

        if ( bulk ) {
            LOG(1) << "\t bulk commit starting";
            std::set<DiskLoc> dupsToDrop;

            Status status = btreeState->accessMethod()->commitBulk( bulk,
                                                                    mayInterrupt,
                                                                    &dupsToDrop );

            // Code above us expects a uassert in case of dupkey errors.
            if (ErrorCodes::DuplicateKey == status.code()) {
                uassertStatusOK(status);
            }

            // Any other errors are probably bad and deserve a massert.
            massert( 17398,
                     str::stream() << "commitBulk failed: " << status.toString(),
                     status.isOK() );

            if ( dupsToDrop.size() )
                log() << "\t bulk dropping " << dupsToDrop.size() << " dups";

            for( set<DiskLoc>::const_iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); ++i ) {
                BSONObj toDelete;
                collection->deleteDocument( txn,
                                            *i,
                                            false /* cappedOk */,
                                            true /* noWarn */,
                                            &toDelete );
                if (repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(
                        collection->ns().db())) {
                    repl::logOp(txn, "d", ns.c_str(), toDelete);
                }
                
                txn->recoveryUnit()->commitIfNeeded();

                RARELY if ( mayInterrupt ) {
                    txn->checkForInterrupt();
                }
            }
        }

        verify( !btreeState->head().isNull() );
        LOG(0) << "build index done.  scanned " << n << " total records. "
               << t.millis() / 1000.0 << " secs" << endl;

        // this one is so people know that the index is finished
        collection->infoCache()->addedIndex();
    }
Ejemplo n.º 18
0
    Status logConfigChange(const ConnectionString& configLoc,
                           const string& clientHost,
                           const string& ns,
                           const string& description,
                           const BSONObj& details)
    {
        //
        // The code for writing to the changelog collection exists elsewhere - we duplicate here to
        // avoid dependency issues.
        // TODO: Merge again once config.cpp is cleaned up.
        //

        string changeID = stream() << getHostNameCached() << "-" << terseCurrentTime() << "-"
                                   << OID::gen();

        ChangelogType changelog;
        changelog.setChangeID(changeID);
        changelog.setServer(getHostNameCached());
        changelog.setClientAddr(clientHost == "" ? "N/A" : clientHost);
        changelog.setTime(jsTime());
        changelog.setWhat(description);
        changelog.setNS(ns);
        changelog.setDetails(details);

        log() << "about to log new metadata event: " << changelog.toBSON() << endl;

        scoped_ptr<ScopedDbConnection> connPtr;

        try {
            connPtr.reset(new ScopedDbConnection(configLoc, 30));
            ScopedDbConnection& conn = *connPtr;

            // TODO: better way here
            static bool createdCapped = false;
            if (!createdCapped) {

                try {
                    conn->createCollection(ChangelogType::ConfigNS, 1024 * 1024 * 10, true);
                }
                catch (const DBException& e) {
                    // don't care, someone else may have done this for us
                    // if there's still a problem, caught in outer try
                    LOG(1) << "couldn't create the changelog, continuing " << e << endl;
                }

                createdCapped = true;
            }
            connPtr->done();
        }
        catch (const DBException& e) {
            // if we got here, it means the config change is only in the log,
            // it didn't make it to config.changelog
            log() << "not logging config change: " << changeID << causedBy(e) << endl;
            return e.toStatus();
        }

        Status result = grid.catalogManager()->insert(ChangelogType::ConfigNS,
                                                      changelog.toBSON(),
                                                      NULL);
        if (!result.isOK()) {
            return Status(result.code(),
                          str::stream() << "failed to write to changelog: "
                                        << result.reason());
        }

        return result;
    }
Ejemplo n.º 19
0
    // static
    void Explain::explainStages(PlanExecutor* exec,
                                ExplainCommon::Verbosity verbosity,
                                BSONObjBuilder* out) {
        //
        // Step 1: run the stages as required by the verbosity level.
        //

        // Inspect the tree to see if there is a MultiPlanStage.
        MultiPlanStage* mps = getMultiPlanStage(exec->getRootStage());

        // Get stats of the winning plan from the trial period, if the verbosity level
        // is high enough and there was a runoff between multiple plans.
        auto_ptr<PlanStageStats> winningStatsTrial;
        if (verbosity >= ExplainCommon::EXEC_ALL_PLANS && NULL != mps) {
            winningStatsTrial.reset(exec->getStats());
            invariant(winningStatsTrial.get());
        }

        // If we need execution stats, then run the plan in order to gather the stats.
        Status executePlanStatus = Status::OK();
        if (verbosity >= ExplainCommon::EXEC_STATS) {
            executePlanStatus = exec->executePlan();
        }

        //
        // Step 2: collect plan stats (which also give the structure of the plan tree).
        //

        // Get stats for the winning plan.
        scoped_ptr<PlanStageStats> winningStats(exec->getStats());

        // Get stats for the rejected plans, if more than one plan was considered.
        OwnedPointerVector<PlanStageStats> allPlansStats;
        if (NULL != mps) {
            allPlansStats = mps->generateCandidateStats();
        }

        //
        // Step 3: use the stats trees to produce explain BSON.
        //

        CanonicalQuery* query = exec->getCanonicalQuery();
        if (verbosity >= ExplainCommon::QUERY_PLANNER) {
            generatePlannerInfo(query, winningStats.get(), allPlansStats.vector(), out);
        }

        if (verbosity >= ExplainCommon::EXEC_STATS) {
            BSONObjBuilder execBob(out->subobjStart("executionStats"));

            // If there is an execution error while running the query, the error is reported under
            // the "executionStats" section and the explain as a whole succeeds.
            execBob.append("executionSuccess", executePlanStatus.isOK());
            if (!executePlanStatus.isOK()) {
                execBob.append("errorMessage", executePlanStatus.reason());
                execBob.append("errorCode", executePlanStatus.code());
            }

            // Generate exec stats BSON for the winning plan.
            OperationContext* opCtx = exec->getOpCtx();
            long long totalTimeMillis = opCtx->getCurOp()->elapsedMillis();
            generateExecStats(winningStats.get(), verbosity, &execBob, totalTimeMillis);

            // Also generate exec stats for all plans, if the verbosity level is high enough.
            // These stats reflect what happened during the trial period that ranked the plans.
            if (verbosity >= ExplainCommon::EXEC_ALL_PLANS) {
                // If we ranked multiple plans against each other, then add stats collected
                // from the trial period of the winning plan. The "allPlansExecution" section
                // will contain an apples-to-apples comparison of the winning plan's stats against
                // all rejected plans' stats collected during the trial period.
                if (NULL != mps) {
                    invariant(winningStatsTrial.get());
                    allPlansStats.push_back(winningStatsTrial.release());
                }

                BSONArrayBuilder allPlansBob(execBob.subarrayStart("allPlansExecution"));
                for (size_t i = 0; i < allPlansStats.size(); ++i) {
                    BSONObjBuilder planBob(allPlansBob.subobjStart());
                    generateExecStats(allPlansStats[i], verbosity, &planBob);
                    planBob.doneFast();
                }
                allPlansBob.doneFast();
            }

            execBob.doneFast();
        }

        generateServerInfo(out);
    }
Ejemplo n.º 20
0
 static void buildFsyncErrorFrom( const Status& status, FsyncResponse* response ) {
     response->setOk( false );
     response->setErrCode( static_cast<int>( status.code() ) );
     response->setErrMessage( status.reason() );
 }
Ejemplo n.º 21
0
    KVStorageEngine::KVStorageEngine( KVEngine* engine )
        : _engine( engine )
        , _supportsDocLocking(_engine->supportsDocLocking()) {

        OperationContextNoop opCtx( _engine->newRecoveryUnit() );
        {
            WriteUnitOfWork uow( &opCtx );

            Status status = _engine->createRecordStore( &opCtx,
                                                        catalogInfo,
                                                        catalogInfo,
                                                        CollectionOptions() );
            // BadValue is usually caused by invalid configuration string.
            // We still fassert() but without a stack trace.
            if (status.code() == ErrorCodes::BadValue) {
                fassertFailedNoTrace(28562);
            }
            fassert( 28520, status );

            _catalogRecordStore.reset( _engine->getRecordStore( &opCtx,
                                                                catalogInfo,
                                                                catalogInfo,
                                                                CollectionOptions() ) );
            _catalog.reset( new KVCatalog( _catalogRecordStore.get(), _supportsDocLocking ) );
            _catalog->init( &opCtx );

            std::vector<std::string> collections;
            _catalog->getAllCollections( &collections );

            for ( size_t i = 0; i < collections.size(); i++ ) {
                std::string coll = collections[i];
                NamespaceString nss( coll );
                string dbName = nss.db().toString();

                // No rollback since this is only for committed dbs.
                KVDatabaseCatalogEntry*& db = _dbs[dbName];
                if ( !db ) {
                    db = new KVDatabaseCatalogEntry( dbName, this );
                }
                db->initCollection( &opCtx, coll );
            }

            uow.commit();
        }

        opCtx.recoveryUnit()->commitAndRestart();

        // now clean up orphaned idents

        {
            // get all idents
            std::set<std::string> allIdents;
            {
                std::vector<std::string> v = _engine->getAllIdents( &opCtx );
                allIdents.insert( v.begin(), v.end() );
                allIdents.erase( catalogInfo );
            }

            // remove ones still in use
            {
                vector<string> idents = _catalog->getAllIdents( &opCtx );
                for ( size_t i = 0; i < idents.size(); i++ ) {
                    allIdents.erase( idents[i] );
                }
            }

            for ( std::set<std::string>::const_iterator it = allIdents.begin();
                  it != allIdents.end();
                  ++it ) {
                const std::string& toRemove = *it;
                if ( !_catalog->isUserDataIdent( toRemove ) )
                    continue;
                log() << "dropping unused ident: " << toRemove;
                WriteUnitOfWork wuow( &opCtx );
                _engine->dropIdent( &opCtx, toRemove );
                wuow.commit();
            }
        }

    }
Ejemplo n.º 22
0
    /**
     * Upgrades v5 to v6.
     */
    bool doUpgradeV5ToV6(const ConnectionString& configLoc,
                         const VersionType& lastVersionInfo,
                         string* errMsg)
    {
        string dummy;
        if (!errMsg) errMsg = &dummy;

        verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_DummyBumpPre2_6);
        Status result = preUpgradeCheck(configLoc, lastVersionInfo, minMongoProcessVersion);

        if (!result.isOK()) {
            if (result.code() == ErrorCodes::ManualInterventionRequired) {
                *errMsg = cannotCleanupMessage;
            }
            else {
                *errMsg = result.toString();
            }

            return false;
        }

        // This is not needed because we are not actually going to make any modifications
        // on the other collections in the config server for this particular upgrade.
        // startConfigUpgrade(configLoc.toString(),
        //                    lastVersionInfo.getCurrentVersion(),
        //                    OID::gen());

        // If we actually need to modify something in the config servers these need to follow
        // after calling startConfigUpgrade(...):
        //
        // 1. Acquire necessary locks.
        // 2. Make a backup of the collections we are about to modify.
        // 3. Perform the upgrade process on the backup collection.
        // 4. Verify that no changes were made to the collections since the backup was performed.
        // 5. Call enterConfigUpgradeCriticalSection(configLoc.toString(),
        //    lastVersionInfo.getCurrentVersion()).
        // 6. Rename the backup collection to the name of the original collection with
        //    dropTarget set to true.

        // Make sure the { ts: 1 } index is not unique by dropping the existing one
        // and rebuilding the index with the right specification.

        const BSONObj lockIdxKey = BSON(LocksType::lockID() << 1);
        const NamespaceString indexNS(LocksType::ConfigNS);

        bool dropOk = false;
        try {
            ScopedDbConnection conn(configLoc);
            BSONObj dropResponse;
            dropOk = conn->runCommand(indexNS.db().toString(),
                                      BSON("dropIndexes" << indexNS.coll()
                                           << "index" << lockIdxKey),
                                      dropResponse);
            conn.done();
        }
        catch (const DBException& ex) {
            if (ex.getCode() == 13105) {
                // 13105 is the exception code from SyncClusterConnection::findOne that gets
                // thrown when one of the command responses has an "ok" field that is not true.
                dropOk = false;
            }
            else {
                *errMsg = str::stream() << "Failed to drop { ts: 1 } index" << causedBy(ex);
                return false;
            }
        }

        if (!dropOk && hasBadIndex(configLoc, errMsg)) {
            // Fail only if the index still exists.
            return false;
        }

        result = clusterCreateIndex(LocksType::ConfigNS,
                                    BSON(LocksType::lockID() << 1),
                                    false, // unique
                                    WriteConcernOptions::AllConfigs,
                                    NULL);

        if (!result.isOK()) {
            *errMsg = str::stream() << "error while creating { ts: 1 } index on config db"
                                    << causedBy(result);
            return false;
        }

        LOG(1) << "Checking to make sure that the right { ts: 1 } index is created...";

        if (hasBadIndex(configLoc, errMsg)) {
            return false;
        }

        // We're only after the version bump in commitConfigUpgrade here since we never
        // get into the critical section.
        Status commitStatus = commitConfigUpgrade(configLoc.toString(),
                                                  lastVersionInfo.getCurrentVersion(),
                                                  MIN_COMPATIBLE_CONFIG_VERSION,
                                                  CURRENT_CONFIG_VERSION);

        if (!commitStatus.isOK()) {
            *errMsg = commitStatus.toString();
            return false;
        }

        return true;
    }
Ejemplo n.º 23
0
StatusWith<CursorResponse> CursorResponse::parseFromBSON(const BSONObj& cmdResponse) {
    Status cmdStatus = getStatusFromCommandResult(cmdResponse);
    if (!cmdStatus.isOK()) {
        if (ErrorCodes::isStaleShardVersionError(cmdStatus.code())) {
            auto vWanted = ChunkVersion::fromBSON(cmdResponse, "vWanted");
            auto vReceived = ChunkVersion::fromBSON(cmdResponse, "vReceived");
            if (!vWanted.hasEqualEpoch(vReceived)) {
                return Status(ErrorCodes::StaleEpoch, cmdStatus.reason());
            }
        }
        return cmdStatus;
    }

    std::string fullns;
    BSONObj batchObj;
    CursorId cursorId;

    BSONElement cursorElt = cmdResponse[kCursorField];
    if (cursorElt.type() != BSONType::Object) {
        return {ErrorCodes::TypeMismatch,
                str::stream() << "Field '" << kCursorField << "' must be a nested object in: "
                              << cmdResponse};
    }
    BSONObj cursorObj = cursorElt.Obj();

    BSONElement idElt = cursorObj[kIdField];
    if (idElt.type() != BSONType::NumberLong) {
        return {
            ErrorCodes::TypeMismatch,
            str::stream() << "Field '" << kIdField << "' must be of type long in: " << cmdResponse};
    }
    cursorId = idElt.Long();

    BSONElement nsElt = cursorObj[kNsField];
    if (nsElt.type() != BSONType::String) {
        return {ErrorCodes::TypeMismatch,
                str::stream() << "Field '" << kNsField << "' must be of type string in: "
                              << cmdResponse};
    }
    fullns = nsElt.String();

    BSONElement batchElt = cursorObj[kBatchField];
    if (batchElt.eoo()) {
        batchElt = cursorObj[kBatchFieldInitial];
    }

    if (batchElt.type() != BSONType::Array) {
        return {ErrorCodes::TypeMismatch,
                str::stream() << "Must have array field '" << kBatchFieldInitial << "' or '"
                              << kBatchField
                              << "' in: "
                              << cmdResponse};
    }
    batchObj = batchElt.Obj();

    std::vector<BSONObj> batch;
    for (BSONElement elt : batchObj) {
        if (elt.type() != BSONType::Object) {
            return {ErrorCodes::BadValue,
                    str::stream() << "getMore response batch contains a non-object element: "
                                  << elt};
        }

        batch.push_back(elt.Obj());
    }

    for (auto& doc : batch) {
        doc.shareOwnershipWith(cmdResponse);
    }

    auto latestOplogTimestampElem = cmdResponse[kInternalLatestOplogTimestampField];
    if (latestOplogTimestampElem && latestOplogTimestampElem.type() != BSONType::bsonTimestamp) {
        return {
            ErrorCodes::BadValue,
            str::stream()
                << "invalid _internalLatestOplogTimestamp format; expected timestamp but found: "
                << latestOplogTimestampElem.type()};
    }

    auto writeConcernError = cmdResponse["writeConcernError"];

    if (writeConcernError && writeConcernError.type() != BSONType::Object) {
        return {ErrorCodes::BadValue,
                str::stream() << "invalid writeConcernError format; expected object but found: "
                              << writeConcernError.type()};
    }

    return {{NamespaceString(fullns),
             cursorId,
             std::move(batch),
             boost::none,
             latestOplogTimestampElem ? latestOplogTimestampElem.timestamp()
                                      : boost::optional<Timestamp>{},
             writeConcernError ? writeConcernError.Obj().getOwned() : boost::optional<BSONObj>{}}};
}
Ejemplo n.º 24
0
static void buildErrorFrom( const Status& status, BatchedErrorDetail* error ) {
    error->setErrCode( status.code() );
    error->setErrMessage( status.reason() );
}
Ejemplo n.º 25
0
Status MultiIndexBlockImpl::insertAllDocumentsInCollection(std::set<RecordId>* dupsOut) {
    const char* curopMessage = _buildInBackground ? "Index Build (background)" : "Index Build";
    const auto numRecords = _collection->numRecords(_opCtx);
    stdx::unique_lock<Client> lk(*_opCtx->getClient());
    ProgressMeterHolder progress(
        CurOp::get(_opCtx)->setMessage_inlock(curopMessage, curopMessage, numRecords));
    lk.unlock();

    Timer t;

    unsigned long long n = 0;

    PlanExecutor::YieldPolicy yieldPolicy;
    if (_buildInBackground) {
        invariant(_allowInterruption);
        yieldPolicy = PlanExecutor::YIELD_AUTO;
    } else {
        yieldPolicy = PlanExecutor::WRITE_CONFLICT_RETRY_ONLY;
    }
    auto exec =
        InternalPlanner::collectionScan(_opCtx, _collection->ns().ns(), _collection, yieldPolicy);

    Snapshotted<BSONObj> objToIndex;
    RecordId loc;
    PlanExecutor::ExecState state;
    int retries = 0;  // non-zero when retrying our last document.
    while (retries ||
           (PlanExecutor::ADVANCED == (state = exec->getNextSnapshotted(&objToIndex, &loc))) ||
           MONGO_FAIL_POINT(hangAfterStartingIndexBuild)) {
        try {
            if (_allowInterruption)
                _opCtx->checkForInterrupt();

            if (!(retries || (PlanExecutor::ADVANCED == state))) {
                // The only reason we are still in the loop is hangAfterStartingIndexBuild.
                log() << "Hanging index build due to 'hangAfterStartingIndexBuild' failpoint";
                invariant(_allowInterruption);
                sleepmillis(1000);
                continue;
            }

            // Make sure we are working with the latest version of the document.
            if (objToIndex.snapshotId() != _opCtx->recoveryUnit()->getSnapshotId() &&
                !_collection->findDoc(_opCtx, loc, &objToIndex)) {
                // doc was deleted so don't index it.
                retries = 0;
                continue;
            }

            // Done before insert so we can retry document if it WCEs.
            progress->setTotalWhileRunning(_collection->numRecords(_opCtx));

            WriteUnitOfWork wunit(_opCtx);
            Status ret = insert(objToIndex.value(), loc);
            if (_buildInBackground)
                exec->saveState();
            if (ret.isOK()) {
                wunit.commit();
            } else if (dupsOut && ret.code() == ErrorCodes::DuplicateKey) {
                // If dupsOut is non-null, we should only fail the specific insert that
                // led to a DuplicateKey rather than the whole index build.
                dupsOut->insert(loc);
            } else {
                // Fail the index build hard.
                return ret;
            }
            if (_buildInBackground) {
                auto restoreStatus = exec->restoreState();  // Handles any WCEs internally.
                if (!restoreStatus.isOK()) {
                    return restoreStatus;
                }
            }

            // Go to the next document
            progress->hit();
            n++;
            retries = 0;
        } catch (const WriteConflictException&) {
            CurOp::get(_opCtx)->debug().writeConflicts++;
            retries++;  // logAndBackoff expects this to be 1 on first call.
            WriteConflictException::logAndBackoff(
                retries, "index creation", _collection->ns().ns());

            // Can't use writeConflictRetry since we need to save/restore exec around call to
            // abandonSnapshot.
            exec->saveState();
            _opCtx->recoveryUnit()->abandonSnapshot();
            auto restoreStatus = exec->restoreState();  // Handles any WCEs internally.
            if (!restoreStatus.isOK()) {
                return restoreStatus;
            }
        }
    }

    uassert(28550,
            "Unable to complete index build due to collection scan failure: " +
                WorkingSetCommon::toStatusString(objToIndex.value()),
            state == PlanExecutor::IS_EOF);

    if (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) {
        // Unlock before hanging so replication recognizes we've completed.
        Locker::LockSnapshot lockInfo;
        _opCtx->lockState()->saveLockStateAndUnlock(&lockInfo);
        while (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) {
            log() << "Hanging index build with no locks due to "
                     "'hangAfterStartingIndexBuildUnlocked' failpoint";
            sleepmillis(1000);
        }
        // If we want to support this, we'd need to regrab the lock and be sure that all callers are
        // ok with us yielding. They should be for BG indexes, but not for foreground.
        invariant(!"the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off");
    }

    progress->finished();

    Status ret = doneInserting(dupsOut);
    if (!ret.isOK())
        return ret;

    log() << "build index done.  scanned " << n << " total records. " << t.seconds() << " secs";

    return Status::OK();
}
StatusWith<Shard::CommandResponse> ShardingCatalogManagerImpl::_runCommandForAddShard(
    OperationContext* opCtx,
    RemoteCommandTargeter* targeter,
    const std::string& dbName,
    const BSONObj& cmdObj) {
    auto swHost = targeter->findHost(opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly});
    if (!swHost.isOK()) {
        return swHost.getStatus();
    }
    auto host = std::move(swHost.getValue());

    executor::RemoteCommandRequest request(
        host, dbName, cmdObj, rpc::makeEmptyMetadata(), nullptr, Seconds(30));

    executor::RemoteCommandResponse response =
        Status(ErrorCodes::InternalError, "Internal error running command");

    auto swCallbackHandle = _executorForAddShard->scheduleRemoteCommand(
        request, [&response](const executor::TaskExecutor::RemoteCommandCallbackArgs& args) {
            response = args.response;
        });
    if (!swCallbackHandle.isOK()) {
        return swCallbackHandle.getStatus();
    }

    // Block until the command is carried out
    _executorForAddShard->wait(swCallbackHandle.getValue());

    if (response.status == ErrorCodes::ExceededTimeLimit) {
        LOG(0) << "Operation timed out with status " << redact(response.status);
    }

    if (!response.isOK()) {
        if (!Shard::shouldErrorBePropagated(response.status.code())) {
            return {ErrorCodes::OperationFailed,
                    str::stream() << "failed to run command " << cmdObj
                                  << " when attempting to add shard "
                                  << targeter->connectionString().toString()
                                  << causedBy(response.status)};
        }
        return response.status;
    }

    BSONObj result = response.data.getOwned();

    Status commandStatus = getStatusFromCommandResult(result);
    if (!Shard::shouldErrorBePropagated(commandStatus.code())) {
        commandStatus = {ErrorCodes::OperationFailed,
                         str::stream() << "failed to run command " << cmdObj
                                       << " when attempting to add shard "
                                       << targeter->connectionString().toString()
                                       << causedBy(commandStatus)};
    }

    Status writeConcernStatus = getWriteConcernStatusFromCommandResult(result);
    if (!Shard::shouldErrorBePropagated(writeConcernStatus.code())) {
        writeConcernStatus = {ErrorCodes::OperationFailed,
                              str::stream() << "failed to satisfy writeConcern for command "
                                            << cmdObj
                                            << " when attempting to add shard "
                                            << targeter->connectionString().toString()
                                            << causedBy(writeConcernStatus)};
    }

    return Shard::CommandResponse(std::move(host),
                                  std::move(result),
                                  response.metadata.getOwned(),
                                  std::move(commandStatus),
                                  std::move(writeConcernStatus));
}
Ejemplo n.º 27
0
Status ModifierSet::prepare(mutablebson::Element root,
                            const StringData& matchedField,
                            ExecInfo* execInfo) {
    _preparedState.reset(new PreparedState(&root.getDocument()));

    // If we have a $-positional field, it is time to bind it to an actual field part.
    if (_posDollar) {
        if (matchedField.empty()) {
            return Status(ErrorCodes::BadValue,
                          str::stream() << "The positional operator did not find the match "
                                           "needed from the query. Unexpanded update: "
                                        << _fieldRef.dottedField());
        }
        _fieldRef.setPart(_posDollar, matchedField);
    }

    // Locate the field name in 'root'. Note that we may not have all the parts in the path
    // in the doc -- which is fine. Our goal now is merely to reason about whether this mod
    // apply is a noOp or whether is can be in place. The remainin path, if missing, will
    // be created during the apply.
    Status status = pathsupport::findLongestPrefix(
        _fieldRef, root, &_preparedState->idxFound, &_preparedState->elemFound);

    // FindLongestPrefix may say the path does not exist at all, which is fine here, or
    // that the path was not viable or otherwise wrong, in which case, the mod cannot
    // proceed.
    if (status.code() == ErrorCodes::NonExistentPath) {
        _preparedState->elemFound = root.getDocument().end();
    } else if (_modOptions.fromReplication && status.code() == ErrorCodes::PathNotViable) {
        // If we are coming from replication and it is an invalid path,
        // then push on indicating that we had a blocking element, which we stopped at
        _preparedState->elemIsBlocking = true;
    } else if (!status.isOK()) {
        return status;
    }

    if (_setMode == SET_ON_INSERT) {
        execInfo->context = ModifierInterface::ExecInfo::INSERT_CONTEXT;
    }

    // We register interest in the field name. The driver needs this info to sort out if
    // there is any conflict among mods.
    execInfo->fieldRef[0] = &_fieldRef;

    //
    // in-place and no-op logic
    //

    // If the field path is not fully present, then this mod cannot be in place, nor is a
    // noOp.
    if (!_preparedState->elemFound.ok() || _preparedState->idxFound < (_fieldRef.numParts() - 1)) {
        return Status::OK();
    }

    // If the value being $set is the same as the one already in the doc, than this is a
    // noOp.
    if (_preparedState->elemFound.ok() && _preparedState->idxFound == (_fieldRef.numParts() - 1) &&
        _preparedState->elemFound.compareWithBSONElement(_val, false /*ignore field*/) == 0) {
        execInfo->noOp = _preparedState->noOp = true;
    }

    return Status::OK();
}
StatusWith<ShardType> ShardingCatalogManagerImpl::_validateHostAsShard(
    OperationContext* opCtx,
    std::shared_ptr<RemoteCommandTargeter> targeter,
    const std::string* shardProposedName,
    const ConnectionString& connectionString) {

    // Check if the node being added is a mongos or a version of mongod too old to speak the current
    // communication protocol.
    auto swCommandResponse =
        _runCommandForAddShard(opCtx, targeter.get(), "admin", BSON("isMaster" << 1));
    if (!swCommandResponse.isOK()) {
        if (swCommandResponse.getStatus() == ErrorCodes::RPCProtocolNegotiationFailed) {
            // Mongos to mongos commands are no longer supported in the wire protocol
            // (because mongos does not support OP_COMMAND), similarly for a new mongos
            // and an old mongod. So the call will fail in such cases.
            // TODO: If/When mongos ever supports opCommands, this logic will break because
            // cmdStatus will be OK.
            return {ErrorCodes::RPCProtocolNegotiationFailed,
                    str::stream() << targeter->connectionString().toString()
                                  << " does not recognize the RPC protocol being used. This is"
                                  << " likely because it contains a node that is a mongos or an old"
                                  << " version of mongod."};
        } else {
            return swCommandResponse.getStatus();
        }
    }

    // Check for a command response error
    auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus);
    if (!resIsMasterStatus.isOK()) {
        return {resIsMasterStatus.code(),
                str::stream() << "Error running isMaster against "
                              << targeter->connectionString().toString()
                              << ": "
                              << causedBy(resIsMasterStatus)};
    }

    auto resIsMaster = std::move(swCommandResponse.getValue().response);

    // Check that the node being added is a new enough version.
    // If we're running this code, that means the mongos that the addShard request originated from
    // must be at least version 3.4 (since 3.2 mongoses don't know about the _configsvrAddShard
    // command).  Since it is illegal to have v3.4 mongoses with v3.2 shards, we should reject
    // adding any shards that are not v3.4.  We can determine this by checking that the
    // maxWireVersion reported in isMaster is at least COMMANDS_ACCEPT_WRITE_CONCERN.
    // TODO(SERVER-25623): This approach won't work to prevent v3.6 mongoses from adding v3.4
    // shards, so we'll have to rethink this during the 3.5 development cycle.

    long long maxWireVersion;
    Status status = bsonExtractIntegerField(resIsMaster, "maxWireVersion", &maxWireVersion);
    if (!status.isOK()) {
        return Status(status.code(),
                      str::stream() << "isMaster returned invalid 'maxWireVersion' "
                                    << "field when attempting to add "
                                    << connectionString.toString()
                                    << " as a shard: "
                                    << status.reason());
    }
    if (maxWireVersion < WireVersion::COMMANDS_ACCEPT_WRITE_CONCERN) {
        return Status(ErrorCodes::IncompatibleServerVersion,
                      str::stream() << "Cannot add " << connectionString.toString()
                                    << " as a shard because we detected a mongod with server "
                                       "version older than 3.4.0.  It is invalid to add v3.2 and "
                                       "older shards through a v3.4 mongos.");
    }


    // Check whether there is a master. If there isn't, the replica set may not have been
    // initiated. If the connection is a standalone, it will return true for isMaster.
    bool isMaster;
    status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster);
    if (!status.isOK()) {
        return Status(status.code(),
                      str::stream() << "isMaster returned invalid 'ismaster' "
                                    << "field when attempting to add "
                                    << connectionString.toString()
                                    << " as a shard: "
                                    << status.reason());
    }
    if (!isMaster) {
        return {ErrorCodes::NotMaster,
                str::stream()
                    << connectionString.toString()
                    << " does not have a master. If this is a replica set, ensure that it has a"
                    << " healthy primary and that the set has been properly initiated."};
    }

    const std::string providedSetName = connectionString.getSetName();
    const std::string foundSetName = resIsMaster["setName"].str();

    // Make sure the specified replica set name (if any) matches the actual shard's replica set
    if (providedSetName.empty() && !foundSetName.empty()) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "host is part of set " << foundSetName << "; "
                              << "use replica set url format "
                              << "<setname>/<server1>,<server2>, ..."};
    }

    if (!providedSetName.empty() && foundSetName.empty()) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "host did not return a set name; "
                              << "is the replica set still initializing? "
                              << resIsMaster};
    }

    // Make sure the set name specified in the connection string matches the one where its hosts
    // belong into
    if (!providedSetName.empty() && (providedSetName != foundSetName)) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "the provided connection string (" << connectionString.toString()
                              << ") does not match the actual set name "
                              << foundSetName};
    }

    // Is it a config server?
    if (resIsMaster.hasField("configsvr")) {
        return {ErrorCodes::OperationFailed,
                str::stream() << "Cannot add " << connectionString.toString()
                              << " as a shard since it is a config server"};
    }

    // If the shard is part of a replica set, make sure all the hosts mentioned in the connection
    // string are part of the set. It is fine if not all members of the set are mentioned in the
    // connection string, though.
    if (!providedSetName.empty()) {
        std::set<std::string> hostSet;

        BSONObjIterator iter(resIsMaster["hosts"].Obj());
        while (iter.more()) {
            hostSet.insert(iter.next().String());  // host:port
        }

        if (resIsMaster["passives"].isABSONObj()) {
            BSONObjIterator piter(resIsMaster["passives"].Obj());
            while (piter.more()) {
                hostSet.insert(piter.next().String());  // host:port
            }
        }

        if (resIsMaster["arbiters"].isABSONObj()) {
            BSONObjIterator piter(resIsMaster["arbiters"].Obj());
            while (piter.more()) {
                hostSet.insert(piter.next().String());  // host:port
            }
        }

        for (const auto& hostEntry : connectionString.getServers()) {
            const auto& host = hostEntry.toString();  // host:port
            if (hostSet.find(host) == hostSet.end()) {
                return {ErrorCodes::OperationFailed,
                        str::stream() << "in seed list " << connectionString.toString() << ", host "
                                      << host
                                      << " does not belong to replica set "
                                      << foundSetName
                                      << "; found "
                                      << resIsMaster.toString()};
            }
        }
    }

    std::string actualShardName;

    if (shardProposedName) {
        actualShardName = *shardProposedName;
    } else if (!foundSetName.empty()) {
        // Default it to the name of the replica set
        actualShardName = foundSetName;
    }

    // Disallow adding shard replica set with name 'config'
    if (actualShardName == NamespaceString::kConfigDb) {
        return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"};
    }

    // Retrieve the most up to date connection string that we know from the replica set monitor (if
    // this is a replica set shard, otherwise it will be the same value as connectionString).
    ConnectionString actualShardConnStr = targeter->connectionString();

    ShardType shard;
    shard.setName(actualShardName);
    shard.setHost(actualShardConnStr.toString());
    shard.setState(ShardType::ShardState::kShardAware);

    return shard;
}
Ejemplo n.º 29
0
void Strategy::getMore(OperationContext* txn, Request& r) {
    Timer getMoreTimer;

    const char* ns = r.getns();
    const int ntoreturn = r.d().pullInt();
    const long long id = r.d().pullInt64();

    // TODO:  Handle stale config exceptions here from coll being dropped or sharded during op
    // for now has same semantics as legacy request
    const NamespaceString nss(ns);
    auto statusGetDb = grid.catalogCache()->getDatabase(txn, nss.db().toString());
    if (statusGetDb == ErrorCodes::DatabaseNotFound) {
        cursorCache.remove(id);
        replyToQuery(ResultFlag_CursorNotFound, r.p(), r.m(), 0, 0, 0);
        return;
    }

    uassertStatusOK(statusGetDb);

    shared_ptr<DBConfig> config = statusGetDb.getValue();

    ShardPtr primary;
    ChunkManagerPtr info;
    config->getChunkManagerOrPrimary(ns, info, primary);

    //
    // TODO: Cleanup cursor cache, consolidate into single codepath
    //
    const string host = cursorCache.getRef(id);
    ShardedClientCursorPtr cursor = cursorCache.get(id);
    int cursorMaxTimeMS = cursorCache.getMaxTimeMS(id);

    // Cursor ids should not overlap between sharded and unsharded cursors
    massert(17012,
            str::stream() << "duplicate sharded and unsharded cursor id " << id << " detected for "
                          << ns << ", duplicated on host " << host,
            NULL == cursorCache.get(id).get() || host.empty());

    ClientBasic* client = ClientBasic::getCurrent();
    NamespaceString nsString(ns);
    AuthorizationSession* authSession = AuthorizationSession::get(client);
    Status status = authSession->checkAuthForGetMore(nsString, id);
    audit::logGetMoreAuthzCheck(client, nsString, id, status.code());
    uassertStatusOK(status);

    if (!host.empty()) {
        LOG(3) << "single getmore: " << ns;

        // we used ScopedDbConnection because we don't get about config versions
        // not deleting data is handled elsewhere
        // and we don't want to call setShardVersion
        ScopedDbConnection conn(host);

        Message response;
        bool ok = conn->callRead(r.m(), response);
        uassert(10204, "dbgrid: getmore: error calling db", ok);

        bool hasMore = (response.singleData().getCursor() != 0);

        if (!hasMore) {
            cursorCache.removeRef(id);
        }

        r.reply(response, "" /*conn->getServerAddress() */);
        conn.done();
        return;
    } else if (cursor) {
        if (cursorMaxTimeMS == kMaxTimeCursorTimeLimitExpired) {
            cursorCache.remove(id);
            uasserted(ErrorCodes::ExceededTimeLimit, "operation exceeded time limit");
        }

        // TODO: Try to match logic of mongod, where on subsequent getMore() we pull lots more data?
        BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE);
        int docCount = 0;
        const int startFrom = cursor->getTotalSent();
        bool hasMore = cursor->sendNextBatch(ntoreturn, buffer, docCount);

        if (hasMore) {
            // still more data
            cursor->accessed();

            if (cursorMaxTimeMS != kMaxTimeCursorNoTimeLimit) {
                // Update remaining amount of time in cursor cache.
                int cursorLeftoverMillis = cursorMaxTimeMS - getMoreTimer.millis();
                if (cursorLeftoverMillis <= 0) {
                    cursorLeftoverMillis = kMaxTimeCursorTimeLimitExpired;
                }
                cursorCache.updateMaxTimeMS(id, cursorLeftoverMillis);
            }
        } else {
            // we've exhausted the cursor
            cursorCache.remove(id);
        }

        replyToQuery(0,
                     r.p(),
                     r.m(),
                     buffer.buf(),
                     buffer.len(),
                     docCount,
                     startFrom,
                     hasMore ? cursor->getId() : 0);
        return;
    } else {
        LOG(3) << "could not find cursor " << id << " in cache for " << ns;

        replyToQuery(ResultFlag_CursorNotFound, r.p(), r.m(), 0, 0, 0);
        return;
    }
}
Ejemplo n.º 30
0
Status IndexAccessMethod::commitBulk(OperationContext* opCtx,
                                     std::unique_ptr<BulkBuilder> bulk,
                                     bool mayInterrupt,
                                     bool dupsAllowed,
                                     set<RecordId>* dupsToDrop,
                                     bool assignTimestamp) {
    // Do not track multikey path info for index builds.
    ScopeGuard restartTracker =
        MakeGuard([opCtx] { MultikeyPathTracker::get(opCtx).startTrackingMultikeyPathInfo(); });
    if (!MultikeyPathTracker::get(opCtx).isTrackingMultikeyPathInfo()) {
        restartTracker.Dismiss();
    }
    MultikeyPathTracker::get(opCtx).stopTrackingMultikeyPathInfo();
    Timer timer;

    std::unique_ptr<BulkBuilder::Sorter::Iterator> i(bulk->_sorter->done());

    stdx::unique_lock<Client> lk(*opCtx->getClient());
    ProgressMeterHolder pm(
        CurOp::get(opCtx)->setMessage_inlock("Index Bulk Build: (2/3) btree bottom up",
                                             "Index: (2/3) BTree Bottom Up Progress",
                                             bulk->_keysInserted,
                                             10));
    lk.unlock();

    std::unique_ptr<SortedDataBuilderInterface> builder;

    writeConflictRetry(opCtx, "setting index multikey flag", "", [&] {
        WriteUnitOfWork wunit(opCtx);

        if (bulk->_everGeneratedMultipleKeys || isMultikeyFromPaths(bulk->_indexMultikeyPaths)) {
            _btreeState->setMultikey(opCtx, bulk->_indexMultikeyPaths);
        }

        builder.reset(_newInterface->getBulkBuilder(opCtx, dupsAllowed));
        if (assignTimestamp) {
            fassertStatusOK(50705,
                            opCtx->recoveryUnit()->setTimestamp(
                                LogicalClock::get(opCtx)->getClusterTime().asTimestamp()));
        }
        wunit.commit();
    });

    while (i->more()) {
        if (mayInterrupt) {
            opCtx->checkForInterrupt();
        }

        WriteUnitOfWork wunit(opCtx);
        // Improve performance in the btree-building phase by disabling rollback tracking.
        // This avoids copying all the written bytes to a buffer that is only used to roll back.
        // Note that this is safe to do, as this entire index-build-in-progress will be cleaned
        // up by the index system.
        opCtx->recoveryUnit()->setRollbackWritesDisabled();

        // Get the next datum and add it to the builder.
        BulkBuilder::Sorter::Data d = i->next();
        Status status = builder->addKey(d.first, d.second);

        if (!status.isOK()) {
            // Overlong key that's OK to skip?
            if (status.code() == ErrorCodes::KeyTooLong && ignoreKeyTooLong(opCtx)) {
                continue;
            }

            // Check if this is a duplicate that's OK to skip
            if (status.code() == ErrorCodes::DuplicateKey) {
                invariant(!dupsAllowed);  // shouldn't be getting DupKey errors if dupsAllowed.

                if (dupsToDrop) {
                    dupsToDrop->insert(d.second);
                    continue;
                }
            }

            return status;
        }

        // If we're here either it's a dup and we're cool with it or the addKey went just
        // fine.
        pm.hit();
        if (assignTimestamp) {
            fassertStatusOK(50704,
                            opCtx->recoveryUnit()->setTimestamp(
                                LogicalClock::get(opCtx)->getClusterTime().asTimestamp()));
        }
        wunit.commit();
    }

    pm.finished();

    {
        stdx::lock_guard<Client> lk(*opCtx->getClient());
        CurOp::get(opCtx)->setMessage_inlock("Index Bulk Build: (3/3) btree-middle",
                                             "Index: (3/3) BTree Middle Progress");
    }

    LOG(timer.seconds() > 10 ? 0 : 1) << "\t done building bottom layer, going to commit";

    std::unique_ptr<TimestampBlock> tsBlock;
    if (assignTimestamp) {
        tsBlock = stdx::make_unique<TimestampBlock>(
            opCtx, LogicalClock::get(opCtx)->getClusterTime().asTimestamp());
    }
    builder->commit(mayInterrupt);
    return Status::OK();
}