Пример #1
0
static bool checkShardVersion( ShardingState* shardingState,
                               const BatchedCommandRequest& request,
                               WriteErrorDetail** error ) {

    const NamespaceString nss( request.getTargetingNS() );
    Lock::assertWriteLocked( nss.ns() );

    ChunkVersion requestShardVersion =
        request.isMetadataSet() && request.getMetadata()->isShardVersionSet() ?
        request.getMetadata()->getShardVersion() : ChunkVersion::IGNORED();

    if ( shardingState->enabled() ) {

        CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() );

        if ( !ChunkVersion::isIgnoredVersion( requestShardVersion ) ) {

            ChunkVersion shardVersion =
                metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED();

            if ( !requestShardVersion.isWriteCompatibleWith( shardVersion ) ) {
                *error = new WriteErrorDetail;
                buildStaleError( requestShardVersion, shardVersion, *error );
                return false;
            }
        }
    }

    return true;
}
Пример #2
0
BatchedCommandResponse Shard::runBatchWriteCommandOnConfig(
    OperationContext* txn, const BatchedCommandRequest& batchRequest, RetryPolicy retryPolicy) {
    invariant(isConfig());

    const std::string dbname = batchRequest.getNS().db().toString();
    invariant(batchRequest.sizeWriteOps() == 1);

    const BSONObj cmdObj = batchRequest.toBSON();

    for (int retry = 1; retry <= kOnErrorNumRetries; ++retry) {
        auto response = _runCommand(txn,
                                    ReadPreferenceSetting{ReadPreference::PrimaryOnly},
                                    dbname,
                                    kDefaultConfigCommandTimeout,
                                    cmdObj);

        BatchedCommandResponse batchResponse;
        Status writeStatus =
            CommandResponse::processBatchWriteResponse(response.commandResponse, &batchResponse);

        if (!writeStatus.isOK() && response.host) {
            updateReplSetMonitor(response.host.get(), writeStatus);
        }

        if (retry < kOnErrorNumRetries && isRetriableError(writeStatus.code(), retryPolicy)) {
            LOG(2) << "Batch write command failed with retriable error and will be retried"
                   << causedBy(redact(writeStatus));
            continue;
        }

        return batchResponse;
    }
    MONGO_UNREACHABLE;
}
Пример #3
0
static bool checkIndexConstraints( ShardingState* shardingState,
                                   const BatchedCommandRequest& request,
                                   WriteErrorDetail** error ) {

    const NamespaceString nss( request.getTargetingNS() );
    Lock::assertWriteLocked( nss.ns() );

    if ( !request.isUniqueIndexRequest() )
        return true;

    if ( shardingState->enabled() ) {

        CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() );

        if ( metadata ) {
            if ( !isUniqueIndexCompatible( metadata->getKeyPattern(),
                                           request.getIndexKeyPattern() ) ) {

                *error = new WriteErrorDetail;
                buildUniqueIndexError( metadata->getKeyPattern(),
                                       request.getIndexKeyPattern(),
                                       *error );

                return false;
            }
        }
    }

    return true;
}
Пример #4
0
    static bool checkShardVersion(OperationContext* txn,
                                  ShardingState* shardingState,
                                  const BatchedCommandRequest& request,
                                  WriteOpResult* result) {

        const NamespaceString nss( request.getTargetingNS() );
        txn->lockState()->assertWriteLocked( nss.ns() );

        ChunkVersion requestShardVersion =
            request.isMetadataSet() && request.getMetadata()->isShardVersionSet() ?
                request.getMetadata()->getShardVersion() : ChunkVersion::IGNORED();

        if ( shardingState->enabled() ) {

            CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() );

            if ( !ChunkVersion::isIgnoredVersion( requestShardVersion ) ) {

                ChunkVersion shardVersion =
                    metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED();

                if ( !requestShardVersion.isWriteCompatibleWith( shardVersion ) ) {
                    result->setError(new WriteErrorDetail);
                    buildStaleError(requestShardVersion, shardVersion, result->getError());
                    return false;
                }
            }
        }

        return true;
    }
Пример #5
0
    BatchedCommandRequest* msgToBatchUpdate( const Message& updateMsg ) {

        // Parsing DbMessage throws
        DbMessage dbMsg( updateMsg );
        NamespaceString nss( dbMsg.getns() );
        int flags = dbMsg.pullInt();
        bool upsert = flags & UpdateOption_Upsert;
        bool multi = flags & UpdateOption_Multi;
        const BSONObj query = dbMsg.nextJsObj();
        const BSONObj updateExpr = dbMsg.nextJsObj();

        // No exceptions from here on
        BatchedUpdateDocument* updateDoc = new BatchedUpdateDocument;
        updateDoc->setQuery( query );
        updateDoc->setUpdateExpr( updateExpr );
        updateDoc->setUpsert( upsert );
        updateDoc->setMulti( multi );

        BatchedCommandRequest* request =
            new BatchedCommandRequest( BatchedCommandRequest::BatchType_Update );
        request->setNS( nss.ns() );
        request->getUpdateRequest()->addToUpdates( updateDoc );

        return request;
    }
Пример #6
0
    static bool checkIndexConstraints(OperationContext* txn,
                                      ShardingState* shardingState,
                                      const BatchedCommandRequest& request,
                                      WriteOpResult* result) {

        const NamespaceString nss( request.getTargetingNS() );
        txn->lockState()->assertWriteLocked( nss.ns() );

        if ( !request.isUniqueIndexRequest() )
            return true;

        if ( shardingState->enabled() ) {

            CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() );

            if ( metadata ) {
                if ( !isUniqueIndexCompatible( metadata->getKeyPattern(),
                                               request.getIndexKeyPattern() ) ) {

                    result->setError(new WriteErrorDetail);
                    buildUniqueIndexError(metadata->getKeyPattern(),
                                          request.getIndexKeyPattern(),
                                          result->getError());

                    return false;
                }
            }
        }

        return true;
    }
Пример #7
0
void ClusterWriter::write(OperationContext* opCtx,
                          const BatchedCommandRequest& request,
                          BatchWriteExecStats* stats,
                          BatchedCommandResponse* response) {
    const NamespaceString& nss = request.getNS();

    LastError::Disabled disableLastError(&LastError::get(opCtx->getClient()));

    // Config writes and shard writes are done differently
    if (nss.db() == NamespaceString::kConfigDb || nss.db() == NamespaceString::kAdminDb) {
        Grid::get(opCtx)->catalogClient()->writeConfigServerDirect(opCtx, request, response);
    } else {
        TargeterStats targeterStats;

        {
            ChunkManagerTargeter targeter(request.getTargetingNS(), &targeterStats);

            Status targetInitStatus = targeter.init(opCtx);
            if (!targetInitStatus.isOK()) {
                toBatchError({targetInitStatus.code(),
                              str::stream() << "unable to target"
                                            << (request.isInsertIndexRequest() ? " index" : "")
                                            << " write op for collection "
                                            << request.getTargetingNS().ns()
                                            << causedBy(targetInitStatus)},
                             response);
                return;
            }

            BatchWriteExec::executeBatch(opCtx, targeter, request, response, stats);
        }

        splitIfNeeded(opCtx, request.getNS(), targeterStats);
    }
}
Пример #8
0
    void ClusterWriter::shardWrite( const BatchedCommandRequest& request,
                                    BatchedCommandResponse* response ) {

        ChunkManagerTargeter targeter;
        Status targetInitStatus = targeter.init( NamespaceString( request.getTargetingNS() ) );

        if ( !targetInitStatus.isOK() ) {

            warning() << "could not initialize targeter for"
                      << ( request.isInsertIndexRequest() ? " index" : "" )
                      << " write op in collection " << request.getTargetingNS() << endl;

            // Errors will be reported in response if we are unable to target
        }

        DBClientShardResolver resolver;
        DBClientMultiCommand dispatcher;
        BatchWriteExec exec( &targeter, &resolver, &dispatcher );
        exec.executeBatch( request, response );

        if ( _autoSplit )
            splitIfNeeded( request.getNS(), *targeter.getStats() );

        _stats->setShardStats( exec.releaseStats() );
    }
Пример #9
0
BatchedCommandResponse Shard::runBatchWriteCommand(OperationContext* opCtx,
                                                   const Milliseconds maxTimeMS,
                                                   const BatchedCommandRequest& batchRequest,
                                                   RetryPolicy retryPolicy) {
    const std::string dbname = batchRequest.getNS().db().toString();

    const BSONObj cmdObj = batchRequest.toBSON();

    for (int retry = 1; retry <= kOnErrorNumRetries; ++retry) {
        // Note: write commands can only be issued against a primary.
        auto swResponse = _runCommand(
            opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, dbname, maxTimeMS, cmdObj);

        BatchedCommandResponse batchResponse;
        auto writeStatus = CommandResponse::processBatchWriteResponse(swResponse, &batchResponse);
        if (retry < kOnErrorNumRetries && isRetriableError(writeStatus.code(), retryPolicy)) {
            LOG(2) << "Batch write command to " << getId()
                   << " failed with retriable error and will be retried"
                   << causedBy(redact(writeStatus));
            continue;
        }

        return batchResponse;
    }
    MONGO_UNREACHABLE;
}
Пример #10
0
    BatchedCommandRequest* msgToBatchInsert( const Message& insertMsg ) {

        // Parsing DbMessage throws
        DbMessage dbMsg( insertMsg );
        NamespaceString nss( dbMsg.getns() );
        bool coe = dbMsg.reservedField() & Reserved_InsertOption_ContinueOnError;

        vector<BSONObj> docs;
        do {
            docs.push_back( dbMsg.nextJsObj() );
        }
        while ( dbMsg.moreJSObjs() );

        // Continue-on-error == unordered
        bool ordered = !coe;

        // No exceptions from here on
        BatchedCommandRequest* request =
            new BatchedCommandRequest( BatchedCommandRequest::BatchType_Insert );
        request->setNS( nss.ns() );
        for ( vector<BSONObj>::const_iterator it = docs.begin(); it != docs.end(); ++it ) {
            request->getInsertRequest()->addToDocuments( *it );
        }
        request->setOrdered( ordered );

        return request;
    }
Пример #11
0
    void BatchSafeWriter::safeWriteBatch( DBClientBase* conn,
                                          const BatchedCommandRequest& request,
                                          BatchedCommandResponse* response ) {

        // N starts at zero, and we add to it for each item
        response->setN( 0 );

        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {

            BatchItemRef itemRef( &request, static_cast<int>( i ) );
            LastError lastError;

            _safeWriter->safeWrite( conn, itemRef, &lastError );

            // Register the error if we need to
            BatchedErrorDetail* batchError = lastErrorToBatchError( lastError );
            if ( batchError ) {
                batchError->setIndex( i );
                response->addToErrDetails( batchError );
            }

            response->setN( response->getN() + lastError.nObjects );

            if ( !lastError.upsertedId.isEmpty() ) {
                BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail;
                upsertedId->setIndex( i );
                upsertedId->setUpsertedID( lastError.upsertedId );
                response->addToUpsertDetails( upsertedId );
            }

            // Break on first error if we're ordered
            if ( request.getOrdered() && BatchSafeWriter::isFailedOp( lastError ) ) break;
        }

        if ( request.sizeWriteOps() == 1 && response->isErrDetailsSet()
             && !response->isErrCodeSet() ) {

            // Promote single error to batch error
            const BatchedErrorDetail* error = response->getErrDetailsAt( 0 );
            response->setErrCode( error->getErrCode() );
            if ( error->isErrInfoSet() ) response->setErrInfo( error->getErrInfo() );
            response->setErrMessage( error->getErrMessage() );

            response->unsetErrDetails();
        }

        if ( request.sizeWriteOps() == 1 && response->isUpsertDetailsSet() ) {

            // Promote single upsert to batch upsert
            const BatchedUpsertDetail* upsertedId = response->getUpsertDetailsAt( 0 );
            response->setSingleUpserted( upsertedId->getUpsertedID() );

            response->unsetUpsertDetails();
        }

        response->setOk( !response->isErrCodeSet() );
        dassert( response->isValid( NULL ) );
    }
Пример #12
0
    // Goes over the request and preprocesses normalized versions of all the inserts in the request
    static void normalizeInserts( const BatchedCommandRequest& request,
                                  vector<StatusWith<BSONObj> >* normalInserts ) {

        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {
            StatusWith<BSONObj> normalInsert = normalizeInsert( BatchItemRef( &request, i ) );
            normalInserts->push_back( normalInsert );
            if ( request.getOrdered() && !normalInsert.isOK() )
                break;
        }
    }
Пример #13
0
// Goes over the request and preprocesses normalized versions of all the inserts in the request
static void normalizeInserts( const BatchedCommandRequest& request,
                              vector<StatusWith<BSONObj> >* normalInserts ) {

    for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {
        BSONObj insertDoc = request.getInsertRequest()->getDocumentsAt( i );
        StatusWith<BSONObj> normalInsert = fixDocumentForInsert( insertDoc );
        normalInserts->push_back( normalInsert );
        if ( request.getOrdered() && !normalInsert.isOK() )
            break;
    }
}
Пример #14
0
    void WriteBatchExecutor::bulkExecute( const BatchedCommandRequest& request,
                                          std::vector<BatchedUpsertDetail*>* upsertedIds,
                                          std::vector<WriteErrorDetail*>* errors ) {

        if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) {
            execInserts( request, errors );
        }
        else if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) {
            for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) {

                WriteErrorDetail* error = NULL;
                BSONObj upsertedId;
                execUpdate( BatchItemRef( &request, i ), &upsertedId, &error );

                if ( !upsertedId.isEmpty() ) {
                    BatchedUpsertDetail* batchUpsertedId = new BatchedUpsertDetail;
                    batchUpsertedId->setIndex( i );
                    batchUpsertedId->setUpsertedID( upsertedId );
                    upsertedIds->push_back( batchUpsertedId );
                }

                if ( error ) {
                    errors->push_back( error );
                    if ( request.getOrdered() )
                        break;
                }
            }
        }
        else {
            dassert( request.getBatchType() == BatchedCommandRequest::BatchType_Delete );
            for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) {

                WriteErrorDetail* error = NULL;
                execRemove( BatchItemRef( &request, i ), &error );

                if ( error ) {
                    errors->push_back( error );
                    if ( request.getOrdered() )
                        break;
                }
            }
        }

        // Fill in stale version errors for unordered batches (update/delete can't do this on own)
        if ( !errors->empty() && !request.getOrdered() ) {

            const WriteErrorDetail* finalError = errors->back();

            if ( finalError->getErrCode() == ErrorCodes::StaleShardVersion ) {
                for ( size_t i = finalError->getIndex() + 1; i < request.sizeWriteOps(); i++ ) {
                    WriteErrorDetail* dupStaleError = new WriteErrorDetail;
                    finalError->cloneTo( dupStaleError );
                    errors->push_back( dupStaleError );
                }
            }
        }
    }
Пример #15
0
bool BatchedCommandRequest::containsNoIDUpsert(const BatchedCommandRequest& request) {
    if (request.getBatchType() != BatchedCommandRequest::BatchType_Update)
        return false;

    const vector<BatchedUpdateDocument*>& updates = request.getUpdateRequest()->getUpdates();

    for (vector<BatchedUpdateDocument*>::const_iterator it = updates.begin(); it != updates.end();
         ++it) {
        const BatchedUpdateDocument* updateDoc = *it;
        if (updateDoc->getUpsert() && updateDoc->getQuery()["_id"].eoo())
            return true;
    }

    return false;
}
Пример #16
0
void Strategy::writeOp(OperationContext* txn, int op, Request& request) {
    // make sure we have a last error
    dassert(&LastError::get(cc()));

    OwnedPointerVector<BatchedCommandRequest> commandRequestsOwned;
    vector<BatchedCommandRequest*>& commandRequests = commandRequestsOwned.mutableVector();

    msgToBatchRequests(request.m(), &commandRequests);

    for (vector<BatchedCommandRequest*>::iterator it = commandRequests.begin();
         it != commandRequests.end();
         ++it) {
        // Multiple commands registered to last error as multiple requests
        if (it != commandRequests.begin())
            LastError::get(cc()).startRequest();

        BatchedCommandRequest* commandRequest = *it;

        // Adjust namespaces for command
        NamespaceString fullNS(commandRequest->getNS());
        string cmdNS = fullNS.getCommandNS();
        // We only pass in collection name to command
        commandRequest->setNS(fullNS);

        BSONObjBuilder builder;
        BSONObj requestBSON = commandRequest->toBSON();

        {
            // Disable the last error object for the duration of the write cmd
            LastError::Disabled disableLastError(&LastError::get(cc()));
            Command::runAgainstRegistered(txn, cmdNS.c_str(), requestBSON, builder, 0);
        }

        BatchedCommandResponse commandResponse;
        bool parsed = commandResponse.parseBSON(builder.done(), NULL);
        (void)parsed;  // for compile
        dassert(parsed && commandResponse.isValid(NULL));

        // Populate the lastError object based on the write response
        LastError::get(cc()).reset();
        bool hadError =
            batchErrorToLastError(*commandRequest, commandResponse, &LastError::get(cc()));

        // Check if this is an ordered batch and we had an error which should stop processing
        if (commandRequest->getOrdered() && hadError)
            break;
    }
}
Пример #17
0
    void msgToBatchInserts( const Message& insertMsg,
                            vector<BatchedCommandRequest*>* insertRequests ) {

        // Parsing DbMessage throws
        DbMessage dbMsg( insertMsg );
        NamespaceString nss( dbMsg.getns() );

        // Continue-on-error == unordered
        bool coe = dbMsg.reservedField() & Reserved_InsertOption_ContinueOnError;
        bool ordered = !coe;

        while ( insertRequests->empty() || dbMsg.moreJSObjs() ) {

            // Collect docs for next batch, but don't exceed maximum size
            int totalInsertSize = 0;
            vector<BSONObj> docs;
            do {
                const char* prevObjMark = dbMsg.markGet();
                BSONObj nextObj = dbMsg.nextJsObj();
                if ( totalInsertSize + nextObj.objsize() <= BSONObjMaxUserSize ) {
                    docs.push_back( nextObj );
                    totalInsertSize += docs.back().objsize();
                }
                else {
                    // Size limit exceeded, rollback to previous insert position
                    dbMsg.markReset( prevObjMark );
                    break;
                }
            }
            while ( docs.size() < BatchedCommandRequest::kMaxWriteBatchSize
                    && dbMsg.moreJSObjs() );

            dassert( !docs.empty() );

            // No exceptions from here on
            BatchedCommandRequest* request =
                new BatchedCommandRequest( BatchedCommandRequest::BatchType_Insert );
            request->setNSS( nss );
            for ( vector<BSONObj>::const_iterator it = docs.begin(); it != docs.end(); ++it ) {
                request->getInsertRequest()->addToDocuments( *it );
            }
            request->setOrdered( ordered );
            request->setWriteConcern( WriteConcernOptions::Acknowledged );

            insertRequests->push_back( request );
        }
    }
Пример #18
0
    void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request,
                                          std::vector<WriteErrorDetail*>* errors ) {

        // Theory of operation:
        //
        // Instantiates an ExecInsertsState, which represents all of the state involved in the batch
        // insert execution algorithm.  Most importantly, encapsulates the lock state.
        //
        // Every iteration of the loop in execInserts() processes one document insertion, by calling
        // insertOne() exactly once for a given value of state.currIndex.
        //
        // If the ExecInsertsState indicates that the requisite write locks are not held, insertOne
        // acquires them and performs lock-acquisition-time checks.  However, on non-error
        // execution, it does not release the locks.  Therefore, the yielding logic in the while
        // loop in execInserts() is solely responsible for lock release in the non-error case.
        //
        // Internally, insertOne loops performing the single insert until it completes without a
        // PageFaultException, or until it fails with some kind of error.  Errors are mostly
        // propagated via the request->error field, but DBExceptions or std::exceptions may escape,
        // particularly on operation interruption.  These kinds of errors necessarily prevent
        // further insertOne calls, and stop the batch.  As a result, the only expected source of
        // such exceptions are interruptions.
        ExecInsertsState state(&request);
        normalizeInserts(request, &state.normalizedInserts, &state.pregeneratedKeys);

        ElapsedTracker elapsedTracker(128, 10); // 128 hits or 10 ms, matching RunnerYieldPolicy's

        for (state.currIndex = 0;
             state.currIndex < state.request->sizeWriteOps();
             ++state.currIndex) {

            if (elapsedTracker.intervalHasElapsed()) {
                // Consider yielding between inserts.

                if (state.hasLock()) {
                    int micros = ClientCursor::suggestYieldMicros();
                    if (micros > 0) {
                        state.unlock();
                        killCurrentOp.checkForInterrupt();
                        sleepmicros(micros);
                    }
                }
                killCurrentOp.checkForInterrupt();
                elapsedTracker.resetLastTime();
            }

            WriteErrorDetail* error = NULL;
            execOneInsert(&state, &error);
            if (error) {
                errors->push_back(error);
                error->setIndex(state.currIndex);
                if (request.getOrdered())
                    return;
            }
        }
    }
Пример #19
0
void CatalogManagerReplicaSet::writeConfigServerDirect(const BatchedCommandRequest& batchRequest,
                                                       BatchedCommandResponse* batchResponse) {
    std::string dbname = batchRequest.getNS().db().toString();
    invariant(dbname == "config" || dbname == "admin");
    const BSONObj cmdObj = batchRequest.toBSON();

    auto response = _runConfigServerCommandWithNotMasterRetries(dbname, cmdObj);
    if (!response.isOK()) {
        _toBatchError(response.getStatus(), batchResponse);
        return;
    }

    string errmsg;
    if (!batchResponse->parseBSON(response.getValue(), &errmsg)) {
        _toBatchError(Status(ErrorCodes::FailedToParse,
                             str::stream() << "Failed to parse config server response: " << errmsg),
                      batchResponse);
    }
}
Пример #20
0
    BatchedCommandRequest* msgToBatchDelete( const Message& deleteMsg ) {

        // Parsing DbMessage throws
        DbMessage dbMsg( deleteMsg );
        NamespaceString nss( dbMsg.getns() );
        int flags = dbMsg.pullInt();
        const BSONObj query = dbMsg.nextJsObj();
        int limit = ( flags & RemoveOption_JustOne ) ? 1 : 0;

        // No exceptions from here on
        BatchedDeleteDocument* deleteDoc = new BatchedDeleteDocument;
        deleteDoc->setLimit( limit );
        deleteDoc->setQuery( query );

        BatchedCommandRequest* request =
            new BatchedCommandRequest( BatchedCommandRequest::BatchType_Delete );
        request->setNS( nss.ns() );
        request->getDeleteRequest()->addToDeletes( deleteDoc );

        return request;
    }
Пример #21
0
    // Goes over the request and preprocesses normalized versions of all the inserts in the request
    static void normalizeInserts( const BatchedCommandRequest& request,
                                  vector<StatusWith<BSONObj> >* normalizedInserts,
                                  vector<PregeneratedKeys>* pregen ) {

        normalizedInserts->reserve(request.sizeWriteOps());
        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {
            BSONObj insertDoc = request.getInsertRequest()->getDocumentsAt( i );
            StatusWith<BSONObj> normalInsert = fixDocumentForInsert( insertDoc );
            normalizedInserts->push_back( normalInsert );
            if ( request.getOrdered() && !normalInsert.isOK() )
                break;

            if ( !normalInsert.getValue().isEmpty() )
                insertDoc = normalInsert.getValue();

            pregen->push_back( PregeneratedKeys() );
            GeneratorHolder::getInstance()->prepare( request.getTargetingNS(),
                                                     insertDoc,
                                                     &pregen->back() );
        }
    }
Пример #22
0
    void BatchSafeWriter::safeWriteBatch( DBClientBase* conn,
                                          const BatchedCommandRequest& request,
                                          BatchedCommandResponse* response ) {

        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {

            BatchItemRef itemRef( &request, static_cast<int>( i ) );
            LastError lastError;

            _safeWriter->safeWrite( conn, itemRef, &lastError );

            // Register the error if we need to
            BatchedErrorDetail* batchError = lastErrorToBatchError( lastError );
            batchError->setIndex( i );
            response->addToErrDetails( batchError );

            // TODO: Other stats, etc.

            // Break on first error if we're ordered
            if ( request.getOrdered() && BatchSafeWriter::isFailedOp( lastError ) ) break;
        }
    }
Пример #23
0
    // static
    Status WriteBatchExecutor::validateBatch( const BatchedCommandRequest& request ) {

        // Validate namespace
        const NamespaceString nss = NamespaceString( request.getNS() );
        if ( !nss.isValid() ) {
            return Status( ErrorCodes::InvalidNamespace,
                           nss.ns() + " is not a valid namespace" );
        }

        // Make sure we can write to the namespace
        Status allowedStatus = userAllowedWriteNS( nss );
        if ( !allowedStatus.isOK() ) {
            return allowedStatus;
        }

        // Validate insert index requests
        // TODO: Push insert index requests through createIndex once all upgrade paths support it
        string errMsg;
        if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) {
            return Status( ErrorCodes::InvalidOptions, errMsg );
        }

        return Status::OK();
    }
Пример #24
0
    void clusterWrite( const BatchedCommandRequest& request,
                       BatchedCommandResponse* response,
                       bool autoSplit ) {

        // App-level validation of a create index insert
        if ( request.isInsertIndexRequest() ) {
            if ( request.sizeWriteOps() != 1 || request.isWriteConcernSet() ) {

                // Invalid request to create index
                response->setOk( false );
                response->setErrCode( ErrorCodes::InvalidOptions );
                response->setErrMessage( "invalid batch request for index creation" );

                dassert( response->isValid( NULL ) );
                return;
            }
        }

        // Config writes and shard writes are done differently
        string dbName = NamespaceString( request.getNS() ).db().toString();
        if ( dbName == "config" || dbName == "admin" ) {

            bool verboseWC = request.isVerboseWC();

            // We only support batch sizes of one and {w:0} write concern for config writes
            if ( request.sizeWriteOps() != 1 || ( verboseWC && request.isWriteConcernSet() ) ) {
                // Invalid config server write
                response->setOk( false );
                response->setErrCode( ErrorCodes::InvalidOptions );
                response->setErrMessage( "invalid batch request for config write" );

                dassert( response->isValid( NULL ) );
                return;
            }

            // We need to support "best-effort" writes for pings to the config server.
            // {w:0} (!verbose) writes are interpreted as best-effort in this case - they may still
            // error, but do not do the initial fsync check.
            configWrite( request, response, verboseWC );
        }
        else {
            shardWrite( request, response, autoSplit );
        }
    }
Пример #25
0
    void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request,
                                           BatchedCommandResponse* response ) {

        // Validate namespace
        const NamespaceString nss = NamespaceString( request.getNS() );
        if ( !nss.isValid() ) {
            toBatchError( Status( ErrorCodes::InvalidNamespace,
                                  nss.ns() + " is not a valid namespace" ),
                          response );
            return;
        }

        // Make sure we can write to the namespace
        Status allowedStatus = userAllowedWriteNS( nss );
        if ( !allowedStatus.isOK() ) {
            toBatchError( allowedStatus, response );
            return;
        }

        // Validate insert index requests
        // TODO: Push insert index requests through createIndex once all upgrade paths support it
        string errMsg;
        if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) {
            toBatchError( Status( ErrorCodes::InvalidOptions, errMsg ), response );
            return;
        }

        // Validate write concern
        // TODO: Lift write concern parsing out of this entirely
        WriteConcernOptions writeConcern;

        BSONObj wcDoc;
        if ( request.isWriteConcernSet() ) {
            wcDoc = request.getWriteConcern();
        }

        Status wcStatus = Status::OK();
        if ( wcDoc.isEmpty() ) {

            // The default write concern if empty is w : 1
            // Specifying w : 0 is/was allowed, but is interpreted identically to w : 1

            wcStatus = writeConcern.parse(
                _defaultWriteConcern.isEmpty() ?
                    WriteConcernOptions::Acknowledged : _defaultWriteConcern );

            if ( writeConcern.wNumNodes == 0 && writeConcern.wMode.empty() ) {
                writeConcern.wNumNodes = 1;
            }
        }
        else {
            wcStatus = writeConcern.parse( wcDoc );
        }

        if ( wcStatus.isOK() ) {
            wcStatus = validateWriteConcern( writeConcern );
        }

        if ( !wcStatus.isOK() ) {
            toBatchError( wcStatus, response );
            return;
        }

        if ( request.sizeWriteOps() == 0u ) {
            toBatchError( Status( ErrorCodes::InvalidLength,
                                  "no write ops were included in the batch" ),
                          response );
            return;
        }

        // Validate batch size
        if ( request.sizeWriteOps() > BatchedCommandRequest::kMaxWriteBatchSize ) {
            toBatchError( Status( ErrorCodes::InvalidLength,
                                  stream() << "exceeded maximum write batch size of "
                                           << BatchedCommandRequest::kMaxWriteBatchSize ),
                          response );
            return;
        }

        //
        // End validation
        //

        bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0
                        && writeConcern.syncMode == WriteConcernOptions::NONE;

        Timer commandTimer;

        OwnedPointerVector<WriteErrorDetail> writeErrorsOwned;
        vector<WriteErrorDetail*>& writeErrors = writeErrorsOwned.mutableVector();

        OwnedPointerVector<BatchedUpsertDetail> upsertedOwned;
        vector<BatchedUpsertDetail*>& upserted = upsertedOwned.mutableVector();

        //
        // Apply each batch item, possibly bulking some items together in the write lock.
        // Stops on error if batch is ordered.
        //

        bulkExecute( request, &upserted, &writeErrors );

        //
        // Try to enforce the write concern if everything succeeded (unordered or ordered)
        // OR if something succeeded and we're unordered.
        //

        auto_ptr<WCErrorDetail> wcError;
        bool needToEnforceWC = writeErrors.empty()
                               || ( !request.getOrdered()
                                    && writeErrors.size() < request.sizeWriteOps() );

        if ( needToEnforceWC ) {

            _client->curop()->setMessage( "waiting for write concern" );

            WriteConcernResult res;
            Status status = waitForWriteConcern( _txn, writeConcern, _client->getLastOp(), &res );

            if ( !status.isOK() ) {
                wcError.reset( toWriteConcernError( status, res ) );
            }
        }

        //
        // Refresh metadata if needed
        //

        bool staleBatch = !writeErrors.empty()
                          && writeErrors.back()->getErrCode() == ErrorCodes::StaleShardVersion;

        if ( staleBatch ) {

            const BatchedRequestMetadata* requestMetadata = request.getMetadata();
            dassert( requestMetadata );

            // Make sure our shard name is set or is the same as what was set previously
            if ( shardingState.setShardName( requestMetadata->getShardName() ) ) {

                //
                // First, we refresh metadata if we need to based on the requested version.
                //

                ChunkVersion latestShardVersion;
                shardingState.refreshMetadataIfNeeded( request.getTargetingNS(),
                                                       requestMetadata->getShardVersion(),
                                                       &latestShardVersion );

                // Report if we're still changing our metadata
                // TODO: Better reporting per-collection
                if ( shardingState.inCriticalMigrateSection() ) {
                    noteInCriticalSection( writeErrors.back() );
                }

                if ( queueForMigrationCommit ) {

                    //
                    // Queue up for migration to end - this allows us to be sure that clients will
                    // not repeatedly try to refresh metadata that is not yet written to the config
                    // server.  Not necessary for correctness.
                    // Exposed as optional parameter to allow testing of queuing behavior with
                    // different network timings.
                    //

                    const ChunkVersion& requestShardVersion = requestMetadata->getShardVersion();

                    //
                    // Only wait if we're an older version (in the current collection epoch) and
                    // we're not write compatible, implying that the current migration is affecting
                    // writes.
                    //

                    if ( requestShardVersion.isOlderThan( latestShardVersion ) &&
                         !requestShardVersion.isWriteCompatibleWith( latestShardVersion ) ) {

                        while ( shardingState.inCriticalMigrateSection() ) {

                            log() << "write request to old shard version "
                                  << requestMetadata->getShardVersion().toString()
                                  << " waiting for migration commit" << endl;

                            shardingState.waitTillNotInCriticalSection( 10 /* secs */);
                        }
                    }
                }
            }
            else {
                // If our shard name is stale, our version must have been stale as well
                dassert( writeErrors.size() == request.sizeWriteOps() );
            }
        }

        //
        // Construct response
        //

        response->setOk( true );

        if ( !silentWC ) {

            if ( upserted.size() ) {
                response->setUpsertDetails( upserted );
            }

            if ( writeErrors.size() ) {
                response->setErrDetails( writeErrors );
            }

            if ( wcError.get() ) {
                response->setWriteConcernError( wcError.release() );
            }

            const repl::ReplicationCoordinator::Mode replMode =
                    repl::getGlobalReplicationCoordinator()->getReplicationMode();
            if (replMode != repl::ReplicationCoordinator::modeNone) {
                response->setLastOp( _client->getLastOp() );
                if (replMode == repl::ReplicationCoordinator::modeReplSet) {
                    response->setElectionId(repl::theReplSet->getElectionId());
                }
            }

            // Set the stats for the response
            response->setN( _stats->numInserted + _stats->numUpserted + _stats->numMatched
                            + _stats->numDeleted );
            if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update )
                response->setNModified( _stats->numModified );
        }

        dassert( response->isValid( NULL ) );
    }
Пример #26
0
    void BatchWriteExec::executeBatch( const BatchedCommandRequest& clientRequest,
                                       BatchedCommandResponse* clientResponse ) {

        BatchWriteOp batchOp;
        batchOp.initClientRequest( &clientRequest );

        // Current batch status
        bool refreshedTargeter = false;
        int rounds = 0;
        int numCompletedOps = 0;
        int numRoundsWithoutProgress = 0;

        while ( !batchOp.isFinished() ) {

            //
            // Get child batches to send using the targeter
            //
            // Targeting errors can be caused by remote metadata changing (the collection could have
            // been dropped and recreated, for example with a new shard key).  If a remote metadata
            // change occurs *before* a client sends us a batch, we need to make sure that we don't
            // error out just because we're staler than the client - otherwise mongos will be have
            // unpredictable behavior.
            //
            // (If a metadata change happens *during* or *after* a client sends us a batch, however,
            // we make no guarantees about delivery.)
            //
            // For this reason, we don't record targeting errors until we've refreshed our targeting
            // metadata at least once *after* receiving the client batch - at that point, we know:
            //
            // 1) our new metadata is the same as the metadata when the client sent a batch, and so
            //    targeting errors are real.
            // OR
            // 2) our new metadata is a newer version than when the client sent a batch, and so
            //    the metadata must have changed after the client batch was sent.  We don't need to
            //    deliver in this case, since for all the client knows we may have gotten the batch
            //    exactly when the metadata changed.
            //

            vector<TargetedWriteBatch*> childBatches;

            // If we've already had a targeting error, we've refreshed the metadata once and can
            // record target errors definitively.
            bool recordTargetErrors = refreshedTargeter;
            Status targetStatus = batchOp.targetBatch( *_targeter,
                                                       recordTargetErrors,
                                                       &childBatches );
            if ( !targetStatus.isOK() ) {
                // Don't do anything until a targeter refresh
                _targeter->noteCouldNotTarget();
                refreshedTargeter = true;
                ++_stats->numTargetErrors;
                dassert( childBatches.size() == 0u );
            }

            //
            // Send all child batches
            //

            size_t numSent = 0;
            size_t numToSend = childBatches.size();
            bool remoteMetadataChanging = false;
            while ( numSent != numToSend ) {

                // Collect batches out on the network, mapped by endpoint
                HostBatchMap pendingBatches;

                //
                // Send side
                //

                // Get as many batches as we can at once
                for ( vector<TargetedWriteBatch*>::iterator it = childBatches.begin();
                    it != childBatches.end(); ++it ) {

                    //
                    // Collect the info needed to dispatch our targeted batch
                    //

                    TargetedWriteBatch* nextBatch = *it;
                    // If the batch is NULL, we sent it previously, so skip
                    if ( nextBatch == NULL ) continue;

                    // Figure out what host we need to dispatch our targeted batch
                    ConnectionString shardHost;
                    Status resolveStatus = _resolver->chooseWriteHost( nextBatch->getEndpoint()
                                                                           .shardName,
                                                                       &shardHost );
                    if ( !resolveStatus.isOK() ) {

                        ++_stats->numResolveErrors;

                        // Record a resolve failure
                        // TODO: It may be necessary to refresh the cache if stale, or maybe just
                        // cancel and retarget the batch
                        WriteErrorDetail error;
                        buildErrorFrom( resolveStatus, &error );
                        batchOp.noteBatchError( *nextBatch, error );

                        // We're done with this batch
                        *it = NULL;
                        --numToSend;
                        continue;
                    }

                    // If we already have a batch for this host, wait until the next time
                    HostBatchMap::iterator pendingIt = pendingBatches.find( shardHost );
                    if ( pendingIt != pendingBatches.end() ) continue;

                    //
                    // We now have all the info needed to dispatch the batch
                    //

                    BatchedCommandRequest request( clientRequest.getBatchType() );
                    batchOp.buildBatchRequest( *nextBatch, &request );

                    // Internally we use full namespaces for request/response, but we send the
                    // command to a database with the collection name in the request.
                    NamespaceString nss( request.getNS() );
                    request.setNS( nss.coll() );

                    _dispatcher->addCommand( shardHost, nss.db(), request );

                    // Indicate we're done by setting the batch to NULL
                    // We'll only get duplicate hostEndpoints if we have broadcast and non-broadcast
                    // endpoints for the same host, so this should be pretty efficient without
                    // moving stuff around.
                    *it = NULL;

                    // Recv-side is responsible for cleaning up the nextBatch when used
                    pendingBatches.insert( make_pair( shardHost, nextBatch ) );
                }

                // Send them all out
                _dispatcher->sendAll();
                numSent += pendingBatches.size();

                //
                // Recv side
                //

                while ( _dispatcher->numPending() > 0 ) {

                    // Get the response
                    ConnectionString shardHost;
                    BatchedCommandResponse response;
                    Status dispatchStatus = _dispatcher->recvAny( &shardHost, &response );

                    // Get the TargetedWriteBatch to find where to put the response
                    dassert( pendingBatches.find( shardHost ) != pendingBatches.end() );
                    TargetedWriteBatch* batchRaw = pendingBatches.find( shardHost )->second;
                    scoped_ptr<TargetedWriteBatch> batch( batchRaw );

                    if ( dispatchStatus.isOK() ) {

                        TrackedErrors trackedErrors;
                        trackedErrors.startTracking( ErrorCodes::StaleShardVersion );

                        // Dispatch was ok, note response
                        batchOp.noteBatchResponse( *batch, response, &trackedErrors );

                        // Note if anything was stale
                        const vector<ShardError*>& staleErrors =
                            trackedErrors.getErrors( ErrorCodes::StaleShardVersion );

                        if ( staleErrors.size() > 0 ) {
                            noteStaleResponses( staleErrors, _targeter );
                            ++_stats->numStaleBatches;
                        }

                        // Remember if the shard is actively changing metadata right now
                        if ( isShardMetadataChanging( staleErrors ) ) {
                            remoteMetadataChanging = true;
                        }

                        // Remember that we successfully wrote to this shard
                        // NOTE: This will record lastOps for shards where we actually didn't update
                        // or delete any documents, which preserves old behavior but is conservative
                        _stats->noteWriteAt( shardHost,
                                             response.isLastOpSet() ? 
                                             response.getLastOp() : OpTime(),
                                             response.isElectionIdSet() ?
                                             response.getElectionId() : OID());
                    }
                    else {

                        // Error occurred dispatching, note it
                        WriteErrorDetail error;
                        buildErrorFrom( dispatchStatus, &error );
                        batchOp.noteBatchError( *batch, error );
                    }
                }
            }

            ++rounds;
            ++_stats->numRounds;

            // If we're done, get out
            if ( batchOp.isFinished() )
                break;

            // MORE WORK TO DO

            //
            // Refresh the targeter if we need to (no-op if nothing stale)
            //

            bool targeterChanged = false;
            Status refreshStatus = _targeter->refreshIfNeeded( &targeterChanged );

            if ( !refreshStatus.isOK() ) {

                // It's okay if we can't refresh, we'll just record errors for the ops if
                // needed.
                warning() << "could not refresh targeter" << causedBy( refreshStatus.reason() )
                          << endl;
            }

            //
            // Ensure progress is being made toward completing the batch op
            //

            int currCompletedOps = batchOp.numWriteOpsIn( WriteOpState_Completed );
            if ( currCompletedOps == numCompletedOps && !targeterChanged
                 && !remoteMetadataChanging ) {
                ++numRoundsWithoutProgress;
            }
            else {
                numRoundsWithoutProgress = 0;
            }
            numCompletedOps = currCompletedOps;

            if ( numRoundsWithoutProgress > kMaxRoundsWithoutProgress ) {

                stringstream msg;
                msg << "no progress was made executing batch write op in " << clientRequest.getNS()
                    << " after " << kMaxRoundsWithoutProgress << " rounds (" << numCompletedOps
                    << " ops completed in " << rounds << " rounds total)";

                WriteErrorDetail error;
                buildErrorFrom( Status( ErrorCodes::NoProgressMade, msg.str() ), &error );
                batchOp.setBatchError( error );
                break;
            }
        }

        batchOp.buildClientResponse( clientResponse );
    }
Пример #27
0
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request,
                                      std::vector<WriteErrorDetail*>* errors ) {

    // Bulk insert is a bit different from other bulk operations in that multiple request docs
    // can be processed at once inside the write lock.

    const NamespaceString nss( request.getTargetingNS() );
    scoped_ptr<BatchItemRef> currInsertItem( new BatchItemRef( &request, 0 ) );

    // Go through our request and do some preprocessing on insert documents outside the lock to
    // validate and put them in a normalized form - i.e. put _id in front and fill in
    // timestamps.  The insert document may also be invalid.
    // TODO:  Might be more efficient to do in batches.
    vector<StatusWith<BSONObj> > normalInserts;
    normalizeInserts( request, &normalInserts );

    while ( currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) {

        WriteOpResult currResult;

        // Don't (re-)acquire locks and create database until it's necessary
        if ( !normalInserts[currInsertItem->getItemIndex()].isOK() ) {
            currResult.error =
                toWriteError( normalInserts[currInsertItem->getItemIndex()].getStatus() );
        }
        else {

            PageFaultRetryableSection pFaultSection;

            ////////////////////////////////////
            Lock::DBWrite writeLock( nss.ns() );
            ////////////////////////////////////

            // Check version inside of write lock

            if ( checkIsMasterForCollection( nss, &currResult.error )
                    && checkShardVersion( &shardingState, request, &currResult.error )
                    && checkIndexConstraints( &shardingState, request, &currResult.error ) ) {

                //
                // Get the collection for the insert
                //

                scoped_ptr<Client::Context> writeContext;
                Collection* collection = NULL;

                try {
                    // Context once we're locked, to set more details in currentOp()
                    // TODO: better constructor?
                    writeContext.reset( new Client::Context( request.getNS(),
                                        storageGlobalParams.dbpath,
                                        false /* don't check version */) );

                    Database* database = writeContext->db();
                    dassert( database );
                    collection = database->getCollection( nss.ns() );

                    if ( !collection ) {
                        // Implicitly create if it doesn't exist
                        collection = database->createCollection( nss.ns() );
                        if ( !collection ) {
                            currResult.error =
                                toWriteError( Status( ErrorCodes::InternalError,
                                                      "could not create collection" ) );
                        }
                    }
                }
                catch ( const DBException& ex ) {
                    Status status(ex.toStatus());
                    if (ErrorCodes::isInterruption(status.code())) {
                        throw;
                    }
                    currResult.error = toWriteError(status);
                }

                //
                // Perform writes inside write lock
                //

                while ( collection
                        && currInsertItem->getItemIndex()
                        < static_cast<int>( request.sizeWriteOps() ) ) {

                    //
                    // BEGIN CURRENT OP
                    //

                    scoped_ptr<CurOp> currentOp( beginCurrentOp( _client, *currInsertItem ) );
                    incOpStats( *currInsertItem );

                    // Get the actual document we want to write, assuming it's valid
                    const StatusWith<BSONObj>& normalInsert = //
                        normalInserts[currInsertItem->getItemIndex()];

                    const BSONObj& normalInsertDoc =
                        normalInsert.getValue().isEmpty() ?
                        currInsertItem->getDocument() : normalInsert.getValue();

                    if ( !normalInsert.isOK() ) {
                        // This insert failed on preprocessing
                        currResult.error = toWriteError( normalInsert.getStatus() );
                    }
                    else if ( !request.isInsertIndexRequest() ) {
                        // Try the insert
                        singleInsert( *currInsertItem,
                                      normalInsertDoc,
                                      collection,
                                      &currResult );
                    }
                    else {
                        // Try the create index
                        singleCreateIndex( *currInsertItem,
                                           normalInsertDoc,
                                           collection,
                                           &currResult );
                    }

                    //
                    // END CURRENT OP
                    //

                    finishCurrentOp( _client, currentOp.get(), currResult.error );

                    // Faults release the write lock
                    if ( currResult.fault )
                        break;

                    // In general, we might have stats and errors
                    incWriteStats( *currInsertItem,
                                   currResult.stats,
                                   currResult.error,
                                   currentOp.get() );

                    // Errors release the write lock
                    if ( currResult.error )
                        break;

                    // Increment in the write lock and reset the stats for next time
                    currInsertItem.reset( new BatchItemRef( &request,
                                                            currInsertItem->getItemIndex()
                                                            + 1 ) );
                    currResult.reset();

                    // Destruct curop so that our parent curop is restored, so that we
                    // record the yield count in the parent.
                    currentOp.reset(NULL);

                    // yield sometimes
                    int micros = ClientCursor::suggestYieldMicros();
                    if (micros > 0) {
                        ClientCursor::staticYield(micros, "", NULL);
                    }
                }
            }

        } // END WRITE LOCK

        //
        // Store the current error if it exists
        //

        if ( currResult.error ) {

            errors->push_back( currResult.releaseError() );
            errors->back()->setIndex( currInsertItem->getItemIndex() );

            // Break early for ordered batches
            if ( request.getOrdered() )
                break;
        }

        //
        // Fault or increment
        //

        if ( currResult.fault ) {
            // Check page fault out of lock
            currResult.fault->touch();
        }
        else {
            // Increment if not a fault
            currInsertItem.reset( new BatchItemRef( &request,
                                                    currInsertItem->getItemIndex() + 1 ) );
        }
    }

}
Пример #28
0
bool batchErrorToLastError(const BatchedCommandRequest& request,
                           const BatchedCommandResponse& response,
                           LastError* error) {
    unique_ptr<WriteErrorDetail> commandError;
    WriteErrorDetail* lastBatchError = NULL;

    if (!response.getOk()) {
        // Command-level error, all writes failed

        commandError.reset(new WriteErrorDetail);
        buildErrorFromResponse(response, commandError.get());
        lastBatchError = commandError.get();
    } else if (response.isErrDetailsSet()) {
        // The last error in the batch is always reported - this matches expected COE
        // semantics for insert batches. For updates and deletes, error is only reported
        // if the error was on the last item.

        const bool lastOpErrored = response.getErrDetails().back()->getIndex() ==
            static_cast<int>(request.sizeWriteOps() - 1);
        if (request.getBatchType() == BatchedCommandRequest::BatchType_Insert || lastOpErrored) {
            lastBatchError = response.getErrDetails().back();
        }
    } else {
        // We don't care about write concern errors, these happen in legacy mode in GLE.
    }

    // Record an error if one exists
    if (lastBatchError) {
        string errMsg = lastBatchError->getErrMessage();
        error->setLastError(lastBatchError->getErrCode(),
                            errMsg.empty() ? "see code for details" : errMsg.c_str());
        return true;
    }

    // Record write stats otherwise
    // NOTE: For multi-write batches, our semantics change a little because we don't have
    // un-aggregated "n" stats.
    if (request.getBatchType() == BatchedCommandRequest::BatchType_Update) {
        BSONObj upsertedId;
        if (response.isUpsertDetailsSet()) {
            // Only report the very last item's upserted id if applicable
            if (response.getUpsertDetails().back()->getIndex() + 1 ==
                static_cast<int>(request.sizeWriteOps())) {
                upsertedId = response.getUpsertDetails().back()->getUpsertedID();
            }
        }

        int numUpserted = 0;
        if (response.isUpsertDetailsSet())
            numUpserted = response.sizeUpsertDetails();

        int numMatched = response.getN() - numUpserted;
        dassert(numMatched >= 0);

        // Wrap upserted id in "upserted" field
        BSONObj leUpsertedId;
        if (!upsertedId.isEmpty())
            leUpsertedId = upsertedId.firstElement().wrap(kUpsertedFieldName);

        error->recordUpdate(numMatched > 0, response.getN(), leUpsertedId);
    } else if (request.getBatchType() == BatchedCommandRequest::BatchType_Delete) {
        error->recordDelete(response.getN());
    }

    return false;
}
Пример #29
0
    void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request,
                                           BatchedCommandResponse* response ) {

        Timer commandTimer;

        WriteStats stats;
        std::auto_ptr<BatchedErrorDetail> error( new BatchedErrorDetail );
        BSONObj upsertedID = BSONObj();
        bool batchSuccess = true;
        bool staleBatch = false;

        // Apply each batch item, stopping on an error if we were asked to apply the batch
        // sequentially.
        size_t numBatchOps = request.sizeWriteOps();
        bool verbose = request.isVerboseWC();
        for ( size_t i = 0; i < numBatchOps; i++ ) {

            if ( applyWriteItem( BatchItemRef( &request, i ),
                                 &stats,
                                 &upsertedID,
                                 error.get() ) ) {

                // In case updates turned out to be upserts, the callers may be interested
                // in learning what _id was used for that document.
                if ( !upsertedID.isEmpty() ) {
                    if ( numBatchOps == 1 ) {
                        response->setSingleUpserted(upsertedID);
                    }
                    else if ( verbose ) {
                        std::auto_ptr<BatchedUpsertDetail> upsertDetail(new BatchedUpsertDetail);
                        upsertDetail->setIndex(i);
                        upsertDetail->setUpsertedID(upsertedID);
                        response->addToUpsertDetails(upsertDetail.release());
                    }
                    upsertedID = BSONObj();
                }

            }
            else {

                // The applyWriteItem did not go thgrou
                // If the error is sharding related, we'll have to investigate whether we
                // have a stale view of sharding state.
                if ( error->getErrCode() == ErrorCodes::StaleShardVersion ) staleBatch = true;

                // Don't bother recording if the user doesn't want a verbose answer. We want to
                // keep the error if this is a one-item batch, since we already compact the
                // response for those.
                if (verbose || numBatchOps == 1) {
                    error->setIndex( static_cast<int>( i ) );
                    response->addToErrDetails( error.release() );
                }

                batchSuccess = false;

                if ( request.getOrdered() ) break;

                error.reset( new BatchedErrorDetail );
            }
        }

        // So far, we may have failed some of the batch's items. So we record
        // that. Rergardless, we still need to apply the write concern.  If that generates a
        // more specific error, we'd replace for the intermediate error here. Note that we
        // "compatct" the error messge if this is an one-item batch. (See rationale later in
        // this file.)
        if ( !batchSuccess ) {

            if (numBatchOps > 1) {
                // TODO
                // Define the final error code here.
                // Might be used as a final error, depending on write concern success.
                response->setErrCode( 99999 );
                response->setErrMessage( "batch op errors occurred" );
            }
            else {
                // Promote the single error.
                const BatchedErrorDetail* error = response->getErrDetailsAt( 0 );
                response->setErrCode( error->getErrCode() );
                if ( error->isErrInfoSet() ) response->setErrInfo( error->getErrInfo() );
                response->setErrMessage( error->getErrMessage() );
                response->unsetErrDetails();
                error = NULL;
            }
        }

        // Apply write concern. Note, again, that we're only assembling a full response if the
        // user is interested in it.
        BSONObj writeConcern;
        if ( request.isWriteConcernSet() ) {
            writeConcern = request.getWriteConcern();
        }
        else {
            writeConcern = _defaultWriteConcern;
        }

        string errMsg;
        BSONObjBuilder wcResultsB;
        if ( !waitForWriteConcern( writeConcern, !batchSuccess, &wcResultsB, &errMsg ) ) {

            // TODO Revisit when user visible family error codes are set
            response->setErrCode( ErrorCodes::WriteConcernFailed );
            response->setErrMessage( errMsg );

            if ( verbose ) {
                response->setErrInfo( wcResultsB.obj() );
            }
        }

        // TODO: Audit where we want to queue here
        if ( staleBatch ) {
            ChunkVersion latestShardVersion;
            shardingState.refreshMetadataIfNeeded( request.getTargetingNS(),
                                                   request.getShardVersion(),
                                                   &latestShardVersion );
        }

        // Set the main body of the response. We assume that, if there was an error, the error
        // code would already be set.
        response->setOk( !response->isErrCodeSet() );
        response->setN( stats.numInserted + stats.numUpserted + stats.numUpdated
                        + stats.numDeleted );
        dassert( response->isValid( NULL ) );
    }
Пример #30
0
    void BatchWriteExec::executeBatch( const BatchedCommandRequest& clientRequest,
                                       BatchedCommandResponse* clientResponse ) {

        BatchWriteOp batchOp;
        batchOp.initClientRequest( &clientRequest );

        int numTargetErrors = 0;
        int numStaleBatches = 0;

        for ( int rounds = 0; !batchOp.isFinished(); rounds++ ) {

            //
            // Refresh the targeter if we need to (no-op if nothing stale)
            //

            Status refreshStatus = _targeter->refreshIfNeeded();

            if ( !refreshStatus.isOK() ) {

                // It's okay if we can't refresh, we'll just record errors for the ops if
                // needed.
                warning() << "could not refresh targeter" << causedBy( refreshStatus.reason() )
                          << endl;
            }

            //
            // Get child batches to send
            //

            vector<TargetedWriteBatch*> childBatches;

            //
            // Targeting errors can be caused by remote metadata changing (the collection could have
            // been dropped and recreated, for example with a new shard key).  If a remote metadata
            // change occurs *before* a client sends us a batch, we need to make sure that we don't
            // error out just because we're staler than the client - otherwise mongos will be have
            // unpredictable behavior.
            //
            // (If a metadata change happens *during* or *after* a client sends us a batch, however,
            // we make no guarantees about delivery.)
            //
            // For this reason, we don't record targeting errors until we've refreshed our targeting
            // metadata at least once *after* receiving the client batch - at that point, we know:
            //
            // 1) our new metadata is the same as the metadata when the client sent a batch, and so
            //    targeting errors are real.
            // OR
            // 2) our new metadata is a newer version than when the client sent a batch, and so
            //    the metadata must have changed after the client batch was sent.  We don't need to
            //    deliver in this case, since for all the client knows we may have gotten the batch
            //    exactly when the metadata changed.
            //
            // If we've had a targeting error or stale error, we've refreshed the metadata once and
            // can record target errors.
            bool recordTargetErrors = numTargetErrors > 0 || numStaleBatches > 0;

            Status targetStatus = batchOp.targetBatch( *_targeter,
                                                       recordTargetErrors,
                                                       &childBatches );
            if ( !targetStatus.isOK() ) {
                _targeter->noteCouldNotTarget();
                ++numTargetErrors;
                continue;
            }

            //
            // Send all child batches
            //

            size_t numSent = 0;
            while ( numSent != childBatches.size() ) {

                // Collect batches out on the network, mapped by endpoint
                EndpointBatchMap pendingBatches;

                //
                // Send side
                //

                // Get as many batches as we can at once
                for ( vector<TargetedWriteBatch*>::iterator it = childBatches.begin();
                    it != childBatches.end(); ++it ) {

                    TargetedWriteBatch* nextBatch = *it;
                    // If the batch is NULL, we sent it previously, so skip
                    if ( nextBatch == NULL ) continue;
                    const ConnectionString& hostEndpoint = nextBatch->getEndpoint().shardHost;

                    EndpointBatchMap::iterator pendingIt = pendingBatches.find( &hostEndpoint );

                    // If we already have a batch for this endpoint, continue
                    if ( pendingIt != pendingBatches.end() ) continue;

                    // Otherwise send it out to the endpoint via a command to a database

                    BatchedCommandRequest request( clientRequest.getBatchType() );
                    batchOp.buildBatchRequest( *nextBatch, &request );

                    // Internally we use full namespaces for request/response, but we send the
                    // command to a database with the collection name in the request.
                    NamespaceString nss( request.getNS() );
                    request.setNS( nss.coll() );

                    _dispatcher->addCommand( hostEndpoint, nss.db(), request );

                    // Indicate we're done by setting the batch to NULL
                    // We'll only get duplicate hostEndpoints if we have broadcast and non-broadcast
                    // endpoints for the same host, so this should be pretty efficient without
                    // moving stuff around.
                    *it = NULL;

                    // Recv-side is responsible for cleaning up the nextBatch when used
                    pendingBatches.insert( make_pair( &hostEndpoint, nextBatch ) );
                }

                // Send them all out
                _dispatcher->sendAll();
                numSent += pendingBatches.size();

                //
                // Recv side
                //

                while ( _dispatcher->numPending() > 0 ) {

                    // Get the response
                    ConnectionString endpoint;
                    BatchedCommandResponse response;
                    Status dispatchStatus = _dispatcher->recvAny( &endpoint, &response );

                    // Get the TargetedWriteBatch to find where to put the response
                    TargetedWriteBatch* batchRaw = pendingBatches.find( &endpoint )->second;
                    scoped_ptr<TargetedWriteBatch> batch( batchRaw );

                    if ( dispatchStatus.isOK() ) {

                        TrackedErrors trackedErrors;
                        trackedErrors.startTracking( ErrorCodes::StaleShardVersion );

                        // Dispatch was ok, note response
                        batchOp.noteBatchResponse( *batch, response, &trackedErrors );

                        // Note if anything was stale
                        const vector<ShardError*>& staleErrors =
                            trackedErrors.getErrors( ErrorCodes::StaleShardVersion );

                        if ( staleErrors.size() > 0 ) {
                            noteStaleResponses( staleErrors, _targeter );
                            ++numStaleBatches;
                        }
                    }
                    else {

                        // Error occurred dispatching, note it
                        BatchedErrorDetail error;
                        buildErrorFrom( dispatchStatus, &error );
                        batchOp.noteBatchError( *batch, error );
                    }
                }
            }
        }

        batchOp.buildClientResponse( clientResponse );
    }