Beispiel #1
0
void WriteBatchExecutor::bulkExecute( const BatchedCommandRequest& request,
                                      std::vector<BatchedUpsertDetail*>* upsertedIds,
                                      std::vector<WriteErrorDetail*>* errors ) {

    if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) {
        execInserts( request, errors );
    }
    else if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) {
        for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) {

            WriteErrorDetail* error = NULL;
            BSONObj upsertedId;
            execUpdate( BatchItemRef( &request, i ), &upsertedId, &error );

            if ( !upsertedId.isEmpty() ) {
                BatchedUpsertDetail* batchUpsertedId = new BatchedUpsertDetail;
                batchUpsertedId->setIndex( i );
                batchUpsertedId->setUpsertedID( upsertedId );
                upsertedIds->push_back( batchUpsertedId );
            }

            if ( error ) {
                errors->push_back( error );
                if ( request.getOrdered() )
                    break;
            }
        }
    }
    else {
        dassert( request.getBatchType() == BatchedCommandRequest::BatchType_Delete );
        for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) {

            WriteErrorDetail* error = NULL;
            execRemove( BatchItemRef( &request, i ), &error );

            if ( error ) {
                errors->push_back( error );
                if ( request.getOrdered() )
                    break;
            }
        }
    }

    // Fill in stale version errors for unordered batches (update/delete can't do this on own)
    if ( !errors->empty() && !request.getOrdered() ) {

        const WriteErrorDetail* finalError = errors->back();

        if ( finalError->getErrCode() == ErrorCodes::StaleShardVersion ) {
            for ( size_t i = finalError->getIndex() + 1; i < request.sizeWriteOps(); i++ ) {
                WriteErrorDetail* dupStaleError = new WriteErrorDetail;
                finalError->cloneTo( dupStaleError );
                errors->push_back( dupStaleError );
            }
        }
    }
}
Beispiel #2
0
    void BatchSafeWriter::safeWriteBatch( DBClientBase* conn,
                                          const BatchedCommandRequest& request,
                                          BatchedCommandResponse* response ) {

        // N starts at zero, and we add to it for each item
        response->setN( 0 );

        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {

            BatchItemRef itemRef( &request, static_cast<int>( i ) );
            LastError lastError;

            _safeWriter->safeWrite( conn, itemRef, &lastError );

            // Register the error if we need to
            BatchedErrorDetail* batchError = lastErrorToBatchError( lastError );
            if ( batchError ) {
                batchError->setIndex( i );
                response->addToErrDetails( batchError );
            }

            response->setN( response->getN() + lastError.nObjects );

            if ( !lastError.upsertedId.isEmpty() ) {
                BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail;
                upsertedId->setIndex( i );
                upsertedId->setUpsertedID( lastError.upsertedId );
                response->addToUpsertDetails( upsertedId );
            }

            // Break on first error if we're ordered
            if ( request.getOrdered() && BatchSafeWriter::isFailedOp( lastError ) ) break;
        }

        if ( request.sizeWriteOps() == 1 && response->isErrDetailsSet()
             && !response->isErrCodeSet() ) {

            // Promote single error to batch error
            const BatchedErrorDetail* error = response->getErrDetailsAt( 0 );
            response->setErrCode( error->getErrCode() );
            if ( error->isErrInfoSet() ) response->setErrInfo( error->getErrInfo() );
            response->setErrMessage( error->getErrMessage() );

            response->unsetErrDetails();
        }

        if ( request.sizeWriteOps() == 1 && response->isUpsertDetailsSet() ) {

            // Promote single upsert to batch upsert
            const BatchedUpsertDetail* upsertedId = response->getUpsertDetailsAt( 0 );
            response->setSingleUpserted( upsertedId->getUpsertedID() );

            response->unsetUpsertDetails();
        }

        response->setOk( !response->isErrCodeSet() );
        dassert( response->isValid( NULL ) );
    }
    void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request,
                                          std::vector<WriteErrorDetail*>* errors ) {

        // Theory of operation:
        //
        // Instantiates an ExecInsertsState, which represents all of the state involved in the batch
        // insert execution algorithm.  Most importantly, encapsulates the lock state.
        //
        // Every iteration of the loop in execInserts() processes one document insertion, by calling
        // insertOne() exactly once for a given value of state.currIndex.
        //
        // If the ExecInsertsState indicates that the requisite write locks are not held, insertOne
        // acquires them and performs lock-acquisition-time checks.  However, on non-error
        // execution, it does not release the locks.  Therefore, the yielding logic in the while
        // loop in execInserts() is solely responsible for lock release in the non-error case.
        //
        // Internally, insertOne loops performing the single insert until it completes without a
        // PageFaultException, or until it fails with some kind of error.  Errors are mostly
        // propagated via the request->error field, but DBExceptions or std::exceptions may escape,
        // particularly on operation interruption.  These kinds of errors necessarily prevent
        // further insertOne calls, and stop the batch.  As a result, the only expected source of
        // such exceptions are interruptions.
        ExecInsertsState state(&request);
        normalizeInserts(request, &state.normalizedInserts, &state.pregeneratedKeys);

        ElapsedTracker elapsedTracker(128, 10); // 128 hits or 10 ms, matching RunnerYieldPolicy's

        for (state.currIndex = 0;
             state.currIndex < state.request->sizeWriteOps();
             ++state.currIndex) {

            if (elapsedTracker.intervalHasElapsed()) {
                // Consider yielding between inserts.

                if (state.hasLock()) {
                    int micros = ClientCursor::suggestYieldMicros();
                    if (micros > 0) {
                        state.unlock();
                        killCurrentOp.checkForInterrupt();
                        sleepmicros(micros);
                    }
                }
                killCurrentOp.checkForInterrupt();
                elapsedTracker.resetLastTime();
            }

            WriteErrorDetail* error = NULL;
            execOneInsert(&state, &error);
            if (error) {
                errors->push_back(error);
                error->setIndex(state.currIndex);
                if (request.getOrdered())
                    return;
            }
        }
    }
Beispiel #4
0
    // Goes over the request and preprocesses normalized versions of all the inserts in the request
    static void normalizeInserts( const BatchedCommandRequest& request,
                                  vector<StatusWith<BSONObj> >* normalInserts ) {

        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {
            StatusWith<BSONObj> normalInsert = normalizeInsert( BatchItemRef( &request, i ) );
            normalInserts->push_back( normalInsert );
            if ( request.getOrdered() && !normalInsert.isOK() )
                break;
        }
    }
Beispiel #5
0
// Goes over the request and preprocesses normalized versions of all the inserts in the request
static void normalizeInserts( const BatchedCommandRequest& request,
                              vector<StatusWith<BSONObj> >* normalInserts ) {

    for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {
        BSONObj insertDoc = request.getInsertRequest()->getDocumentsAt( i );
        StatusWith<BSONObj> normalInsert = fixDocumentForInsert( insertDoc );
        normalInserts->push_back( normalInsert );
        if ( request.getOrdered() && !normalInsert.isOK() )
            break;
    }
}
Beispiel #6
0
void Strategy::writeOp(OperationContext* txn, int op, Request& request) {
    // make sure we have a last error
    dassert(&LastError::get(cc()));

    OwnedPointerVector<BatchedCommandRequest> commandRequestsOwned;
    vector<BatchedCommandRequest*>& commandRequests = commandRequestsOwned.mutableVector();

    msgToBatchRequests(request.m(), &commandRequests);

    for (vector<BatchedCommandRequest*>::iterator it = commandRequests.begin();
         it != commandRequests.end();
         ++it) {
        // Multiple commands registered to last error as multiple requests
        if (it != commandRequests.begin())
            LastError::get(cc()).startRequest();

        BatchedCommandRequest* commandRequest = *it;

        // Adjust namespaces for command
        NamespaceString fullNS(commandRequest->getNS());
        string cmdNS = fullNS.getCommandNS();
        // We only pass in collection name to command
        commandRequest->setNS(fullNS);

        BSONObjBuilder builder;
        BSONObj requestBSON = commandRequest->toBSON();

        {
            // Disable the last error object for the duration of the write cmd
            LastError::Disabled disableLastError(&LastError::get(cc()));
            Command::runAgainstRegistered(txn, cmdNS.c_str(), requestBSON, builder, 0);
        }

        BatchedCommandResponse commandResponse;
        bool parsed = commandResponse.parseBSON(builder.done(), NULL);
        (void)parsed;  // for compile
        dassert(parsed && commandResponse.isValid(NULL));

        // Populate the lastError object based on the write response
        LastError::get(cc()).reset();
        bool hadError =
            batchErrorToLastError(*commandRequest, commandResponse, &LastError::get(cc()));

        // Check if this is an ordered batch and we had an error which should stop processing
        if (commandRequest->getOrdered() && hadError)
            break;
    }
}
    // Goes over the request and preprocesses normalized versions of all the inserts in the request
    static void normalizeInserts( const BatchedCommandRequest& request,
                                  vector<StatusWith<BSONObj> >* normalizedInserts,
                                  vector<PregeneratedKeys>* pregen ) {

        normalizedInserts->reserve(request.sizeWriteOps());
        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {
            BSONObj insertDoc = request.getInsertRequest()->getDocumentsAt( i );
            StatusWith<BSONObj> normalInsert = fixDocumentForInsert( insertDoc );
            normalizedInserts->push_back( normalInsert );
            if ( request.getOrdered() && !normalInsert.isOK() )
                break;

            if ( !normalInsert.getValue().isEmpty() )
                insertDoc = normalInsert.getValue();

            pregen->push_back( PregeneratedKeys() );
            GeneratorHolder::getInstance()->prepare( request.getTargetingNS(),
                                                     insertDoc,
                                                     &pregen->back() );
        }
    }
Beispiel #8
0
    void BatchSafeWriter::safeWriteBatch( DBClientBase* conn,
                                          const BatchedCommandRequest& request,
                                          BatchedCommandResponse* response ) {

        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {

            BatchItemRef itemRef( &request, static_cast<int>( i ) );
            LastError lastError;

            _safeWriter->safeWrite( conn, itemRef, &lastError );

            // Register the error if we need to
            BatchedErrorDetail* batchError = lastErrorToBatchError( lastError );
            batchError->setIndex( i );
            response->addToErrDetails( batchError );

            // TODO: Other stats, etc.

            // Break on first error if we're ordered
            if ( request.getOrdered() && BatchSafeWriter::isFailedOp( lastError ) ) break;
        }
    }
Beispiel #9
0
    void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request,
                                           BatchedCommandResponse* response ) {

        Timer commandTimer;

        WriteStats stats;
        std::auto_ptr<BatchedErrorDetail> error( new BatchedErrorDetail );
        BSONObj upsertedID = BSONObj();
        bool batchSuccess = true;
        bool staleBatch = false;

        // Apply each batch item, stopping on an error if we were asked to apply the batch
        // sequentially.
        size_t numBatchOps = request.sizeWriteOps();
        bool verbose = request.isVerboseWC();
        for ( size_t i = 0; i < numBatchOps; i++ ) {

            if ( applyWriteItem( BatchItemRef( &request, i ),
                                 &stats,
                                 &upsertedID,
                                 error.get() ) ) {

                // In case updates turned out to be upserts, the callers may be interested
                // in learning what _id was used for that document.
                if ( !upsertedID.isEmpty() ) {
                    if ( numBatchOps == 1 ) {
                        response->setSingleUpserted(upsertedID);
                    }
                    else if ( verbose ) {
                        std::auto_ptr<BatchedUpsertDetail> upsertDetail(new BatchedUpsertDetail);
                        upsertDetail->setIndex(i);
                        upsertDetail->setUpsertedID(upsertedID);
                        response->addToUpsertDetails(upsertDetail.release());
                    }
                    upsertedID = BSONObj();
                }

            }
            else {

                // The applyWriteItem did not go thgrou
                // If the error is sharding related, we'll have to investigate whether we
                // have a stale view of sharding state.
                if ( error->getErrCode() == ErrorCodes::StaleShardVersion ) staleBatch = true;

                // Don't bother recording if the user doesn't want a verbose answer. We want to
                // keep the error if this is a one-item batch, since we already compact the
                // response for those.
                if (verbose || numBatchOps == 1) {
                    error->setIndex( static_cast<int>( i ) );
                    response->addToErrDetails( error.release() );
                }

                batchSuccess = false;

                if ( request.getOrdered() ) break;

                error.reset( new BatchedErrorDetail );
            }
        }

        // So far, we may have failed some of the batch's items. So we record
        // that. Rergardless, we still need to apply the write concern.  If that generates a
        // more specific error, we'd replace for the intermediate error here. Note that we
        // "compatct" the error messge if this is an one-item batch. (See rationale later in
        // this file.)
        if ( !batchSuccess ) {

            if (numBatchOps > 1) {
                // TODO
                // Define the final error code here.
                // Might be used as a final error, depending on write concern success.
                response->setErrCode( 99999 );
                response->setErrMessage( "batch op errors occurred" );
            }
            else {
                // Promote the single error.
                const BatchedErrorDetail* error = response->getErrDetailsAt( 0 );
                response->setErrCode( error->getErrCode() );
                if ( error->isErrInfoSet() ) response->setErrInfo( error->getErrInfo() );
                response->setErrMessage( error->getErrMessage() );
                response->unsetErrDetails();
                error = NULL;
            }
        }

        // Apply write concern. Note, again, that we're only assembling a full response if the
        // user is interested in it.
        BSONObj writeConcern;
        if ( request.isWriteConcernSet() ) {
            writeConcern = request.getWriteConcern();
        }
        else {
            writeConcern = _defaultWriteConcern;
        }

        string errMsg;
        BSONObjBuilder wcResultsB;
        if ( !waitForWriteConcern( writeConcern, !batchSuccess, &wcResultsB, &errMsg ) ) {

            // TODO Revisit when user visible family error codes are set
            response->setErrCode( ErrorCodes::WriteConcernFailed );
            response->setErrMessage( errMsg );

            if ( verbose ) {
                response->setErrInfo( wcResultsB.obj() );
            }
        }

        // TODO: Audit where we want to queue here
        if ( staleBatch ) {
            ChunkVersion latestShardVersion;
            shardingState.refreshMetadataIfNeeded( request.getTargetingNS(),
                                                   request.getShardVersion(),
                                                   &latestShardVersion );
        }

        // Set the main body of the response. We assume that, if there was an error, the error
        // code would already be set.
        response->setOk( !response->isErrCodeSet() );
        response->setN( stats.numInserted + stats.numUpserted + stats.numUpdated
                        + stats.numDeleted );
        dassert( response->isValid( NULL ) );
    }
Beispiel #10
0
    void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request,
                                           BatchedCommandResponse* response ) {

        // Validate namespace
        const NamespaceString nss = NamespaceString( request.getNS() );
        if ( !nss.isValid() ) {
            toBatchError( Status( ErrorCodes::InvalidNamespace,
                                  nss.ns() + " is not a valid namespace" ),
                          response );
            return;
        }

        // Make sure we can write to the namespace
        Status allowedStatus = userAllowedWriteNS( nss );
        if ( !allowedStatus.isOK() ) {
            toBatchError( allowedStatus, response );
            return;
        }

        // Validate insert index requests
        // TODO: Push insert index requests through createIndex once all upgrade paths support it
        string errMsg;
        if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) {
            toBatchError( Status( ErrorCodes::InvalidOptions, errMsg ), response );
            return;
        }

        // Validate write concern
        // TODO: Lift write concern parsing out of this entirely
        WriteConcernOptions writeConcern;

        BSONObj wcDoc;
        if ( request.isWriteConcernSet() ) {
            wcDoc = request.getWriteConcern();
        }

        Status wcStatus = Status::OK();
        if ( wcDoc.isEmpty() ) {

            // The default write concern if empty is w : 1
            // Specifying w : 0 is/was allowed, but is interpreted identically to w : 1

            wcStatus = writeConcern.parse(
                _defaultWriteConcern.isEmpty() ?
                    WriteConcernOptions::Acknowledged : _defaultWriteConcern );

            if ( writeConcern.wNumNodes == 0 && writeConcern.wMode.empty() ) {
                writeConcern.wNumNodes = 1;
            }
        }
        else {
            wcStatus = writeConcern.parse( wcDoc );
        }

        if ( wcStatus.isOK() ) {
            wcStatus = validateWriteConcern( writeConcern );
        }

        if ( !wcStatus.isOK() ) {
            toBatchError( wcStatus, response );
            return;
        }

        if ( request.sizeWriteOps() == 0u ) {
            toBatchError( Status( ErrorCodes::InvalidLength,
                                  "no write ops were included in the batch" ),
                          response );
            return;
        }

        // Validate batch size
        if ( request.sizeWriteOps() > BatchedCommandRequest::kMaxWriteBatchSize ) {
            toBatchError( Status( ErrorCodes::InvalidLength,
                                  stream() << "exceeded maximum write batch size of "
                                           << BatchedCommandRequest::kMaxWriteBatchSize ),
                          response );
            return;
        }

        //
        // End validation
        //

        bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0
                        && writeConcern.syncMode == WriteConcernOptions::NONE;

        Timer commandTimer;

        OwnedPointerVector<WriteErrorDetail> writeErrorsOwned;
        vector<WriteErrorDetail*>& writeErrors = writeErrorsOwned.mutableVector();

        OwnedPointerVector<BatchedUpsertDetail> upsertedOwned;
        vector<BatchedUpsertDetail*>& upserted = upsertedOwned.mutableVector();

        //
        // Apply each batch item, possibly bulking some items together in the write lock.
        // Stops on error if batch is ordered.
        //

        bulkExecute( request, &upserted, &writeErrors );

        //
        // Try to enforce the write concern if everything succeeded (unordered or ordered)
        // OR if something succeeded and we're unordered.
        //

        auto_ptr<WCErrorDetail> wcError;
        bool needToEnforceWC = writeErrors.empty()
                               || ( !request.getOrdered()
                                    && writeErrors.size() < request.sizeWriteOps() );

        if ( needToEnforceWC ) {

            _client->curop()->setMessage( "waiting for write concern" );

            WriteConcernResult res;
            Status status = waitForWriteConcern( _txn, writeConcern, _client->getLastOp(), &res );

            if ( !status.isOK() ) {
                wcError.reset( toWriteConcernError( status, res ) );
            }
        }

        //
        // Refresh metadata if needed
        //

        bool staleBatch = !writeErrors.empty()
                          && writeErrors.back()->getErrCode() == ErrorCodes::StaleShardVersion;

        if ( staleBatch ) {

            const BatchedRequestMetadata* requestMetadata = request.getMetadata();
            dassert( requestMetadata );

            // Make sure our shard name is set or is the same as what was set previously
            if ( shardingState.setShardName( requestMetadata->getShardName() ) ) {

                //
                // First, we refresh metadata if we need to based on the requested version.
                //

                ChunkVersion latestShardVersion;
                shardingState.refreshMetadataIfNeeded( request.getTargetingNS(),
                                                       requestMetadata->getShardVersion(),
                                                       &latestShardVersion );

                // Report if we're still changing our metadata
                // TODO: Better reporting per-collection
                if ( shardingState.inCriticalMigrateSection() ) {
                    noteInCriticalSection( writeErrors.back() );
                }

                if ( queueForMigrationCommit ) {

                    //
                    // Queue up for migration to end - this allows us to be sure that clients will
                    // not repeatedly try to refresh metadata that is not yet written to the config
                    // server.  Not necessary for correctness.
                    // Exposed as optional parameter to allow testing of queuing behavior with
                    // different network timings.
                    //

                    const ChunkVersion& requestShardVersion = requestMetadata->getShardVersion();

                    //
                    // Only wait if we're an older version (in the current collection epoch) and
                    // we're not write compatible, implying that the current migration is affecting
                    // writes.
                    //

                    if ( requestShardVersion.isOlderThan( latestShardVersion ) &&
                         !requestShardVersion.isWriteCompatibleWith( latestShardVersion ) ) {

                        while ( shardingState.inCriticalMigrateSection() ) {

                            log() << "write request to old shard version "
                                  << requestMetadata->getShardVersion().toString()
                                  << " waiting for migration commit" << endl;

                            shardingState.waitTillNotInCriticalSection( 10 /* secs */);
                        }
                    }
                }
            }
            else {
                // If our shard name is stale, our version must have been stale as well
                dassert( writeErrors.size() == request.sizeWriteOps() );
            }
        }

        //
        // Construct response
        //

        response->setOk( true );

        if ( !silentWC ) {

            if ( upserted.size() ) {
                response->setUpsertDetails( upserted );
            }

            if ( writeErrors.size() ) {
                response->setErrDetails( writeErrors );
            }

            if ( wcError.get() ) {
                response->setWriteConcernError( wcError.release() );
            }

            const repl::ReplicationCoordinator::Mode replMode =
                    repl::getGlobalReplicationCoordinator()->getReplicationMode();
            if (replMode != repl::ReplicationCoordinator::modeNone) {
                response->setLastOp( _client->getLastOp() );
                if (replMode == repl::ReplicationCoordinator::modeReplSet) {
                    response->setElectionId(repl::theReplSet->getElectionId());
                }
            }

            // Set the stats for the response
            response->setN( _stats->numInserted + _stats->numUpserted + _stats->numMatched
                            + _stats->numDeleted );
            if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update )
                response->setNModified( _stats->numModified );
        }

        dassert( response->isValid( NULL ) );
    }
Beispiel #11
0
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request,
                                      std::vector<WriteErrorDetail*>* errors ) {

    // Bulk insert is a bit different from other bulk operations in that multiple request docs
    // can be processed at once inside the write lock.

    const NamespaceString nss( request.getTargetingNS() );
    scoped_ptr<BatchItemRef> currInsertItem( new BatchItemRef( &request, 0 ) );

    // Go through our request and do some preprocessing on insert documents outside the lock to
    // validate and put them in a normalized form - i.e. put _id in front and fill in
    // timestamps.  The insert document may also be invalid.
    // TODO:  Might be more efficient to do in batches.
    vector<StatusWith<BSONObj> > normalInserts;
    normalizeInserts( request, &normalInserts );

    while ( currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) {

        WriteOpResult currResult;

        // Don't (re-)acquire locks and create database until it's necessary
        if ( !normalInserts[currInsertItem->getItemIndex()].isOK() ) {
            currResult.error =
                toWriteError( normalInserts[currInsertItem->getItemIndex()].getStatus() );
        }
        else {

            PageFaultRetryableSection pFaultSection;

            ////////////////////////////////////
            Lock::DBWrite writeLock( nss.ns() );
            ////////////////////////////////////

            // Check version inside of write lock

            if ( checkIsMasterForCollection( nss, &currResult.error )
                    && checkShardVersion( &shardingState, request, &currResult.error )
                    && checkIndexConstraints( &shardingState, request, &currResult.error ) ) {

                //
                // Get the collection for the insert
                //

                scoped_ptr<Client::Context> writeContext;
                Collection* collection = NULL;

                try {
                    // Context once we're locked, to set more details in currentOp()
                    // TODO: better constructor?
                    writeContext.reset( new Client::Context( request.getNS(),
                                        storageGlobalParams.dbpath,
                                        false /* don't check version */) );

                    Database* database = writeContext->db();
                    dassert( database );
                    collection = database->getCollection( nss.ns() );

                    if ( !collection ) {
                        // Implicitly create if it doesn't exist
                        collection = database->createCollection( nss.ns() );
                        if ( !collection ) {
                            currResult.error =
                                toWriteError( Status( ErrorCodes::InternalError,
                                                      "could not create collection" ) );
                        }
                    }
                }
                catch ( const DBException& ex ) {
                    Status status(ex.toStatus());
                    if (ErrorCodes::isInterruption(status.code())) {
                        throw;
                    }
                    currResult.error = toWriteError(status);
                }

                //
                // Perform writes inside write lock
                //

                while ( collection
                        && currInsertItem->getItemIndex()
                        < static_cast<int>( request.sizeWriteOps() ) ) {

                    //
                    // BEGIN CURRENT OP
                    //

                    scoped_ptr<CurOp> currentOp( beginCurrentOp( _client, *currInsertItem ) );
                    incOpStats( *currInsertItem );

                    // Get the actual document we want to write, assuming it's valid
                    const StatusWith<BSONObj>& normalInsert = //
                        normalInserts[currInsertItem->getItemIndex()];

                    const BSONObj& normalInsertDoc =
                        normalInsert.getValue().isEmpty() ?
                        currInsertItem->getDocument() : normalInsert.getValue();

                    if ( !normalInsert.isOK() ) {
                        // This insert failed on preprocessing
                        currResult.error = toWriteError( normalInsert.getStatus() );
                    }
                    else if ( !request.isInsertIndexRequest() ) {
                        // Try the insert
                        singleInsert( *currInsertItem,
                                      normalInsertDoc,
                                      collection,
                                      &currResult );
                    }
                    else {
                        // Try the create index
                        singleCreateIndex( *currInsertItem,
                                           normalInsertDoc,
                                           collection,
                                           &currResult );
                    }

                    //
                    // END CURRENT OP
                    //

                    finishCurrentOp( _client, currentOp.get(), currResult.error );

                    // Faults release the write lock
                    if ( currResult.fault )
                        break;

                    // In general, we might have stats and errors
                    incWriteStats( *currInsertItem,
                                   currResult.stats,
                                   currResult.error,
                                   currentOp.get() );

                    // Errors release the write lock
                    if ( currResult.error )
                        break;

                    // Increment in the write lock and reset the stats for next time
                    currInsertItem.reset( new BatchItemRef( &request,
                                                            currInsertItem->getItemIndex()
                                                            + 1 ) );
                    currResult.reset();

                    // Destruct curop so that our parent curop is restored, so that we
                    // record the yield count in the parent.
                    currentOp.reset(NULL);

                    // yield sometimes
                    int micros = ClientCursor::suggestYieldMicros();
                    if (micros > 0) {
                        ClientCursor::staticYield(micros, "", NULL);
                    }
                }
            }

        } // END WRITE LOCK

        //
        // Store the current error if it exists
        //

        if ( currResult.error ) {

            errors->push_back( currResult.releaseError() );
            errors->back()->setIndex( currInsertItem->getItemIndex() );

            // Break early for ordered batches
            if ( request.getOrdered() )
                break;
        }

        //
        // Fault or increment
        //

        if ( currResult.fault ) {
            // Check page fault out of lock
            currResult.fault->touch();
        }
        else {
            // Increment if not a fault
            currInsertItem.reset( new BatchItemRef( &request,
                                                    currInsertItem->getItemIndex() + 1 ) );
        }
    }

}
Beispiel #12
0
    void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request,
                                           BatchedCommandResponse* response ) {

        // TODO: Lift write concern parsing out of this entirely.
        WriteConcernOptions writeConcern;
        Status status = Status::OK();

        BSONObj wcDoc;
        if ( request.isWriteConcernSet() ) {
            wcDoc = request.getWriteConcern();
        }

        if ( wcDoc.isEmpty() ) {
            status = writeConcern.parse( _defaultWriteConcern );
        }
        else {
            status = writeConcern.parse( wcDoc );
        }

        if ( status.isOK() ) {
            status = validateWriteConcern( writeConcern );
        }

        if ( !status.isOK() ) {
            response->setErrCode( status.code() );
            response->setErrMessage( status.reason() );
            response->setOk( false );
            dassert( response->isValid(NULL) );
            return;
        }

        bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0
                        && writeConcern.syncMode == WriteConcernOptions::NONE;

        Timer commandTimer;

        OwnedPointerVector<WriteErrorDetail> writeErrorsOwned;
        vector<WriteErrorDetail*>& writeErrors = writeErrorsOwned.mutableVector();

        OwnedPointerVector<BatchedUpsertDetail> upsertedOwned;
        vector<BatchedUpsertDetail*>& upserted = upsertedOwned.mutableVector();

        //
        // Apply each batch item, possibly bulking some items together in the write lock.
        // Stops on error if batch is ordered.
        //

        bulkExecute( request, &upserted, &writeErrors );

        //
        // Try to enforce the write concern if everything succeeded (unordered or ordered)
        // OR if something succeeded and we're unordered.
        //

        auto_ptr<WCErrorDetail> wcError;
        bool needToEnforceWC = writeErrors.empty()
                               || ( !request.getOrdered()
                                    && writeErrors.size() < request.sizeWriteOps() );

        if ( needToEnforceWC ) {

            _client->curop()->setMessage( "waiting for write concern" );

            WriteConcernResult res;
            status = waitForWriteConcern( writeConcern, _client->getLastOp(), &res );

            if ( !status.isOK() ) {
                wcError.reset( toWriteConcernError( status, res ) );
            }
        }

        //
        // Refresh metadata if needed
        //

        bool staleBatch = !writeErrors.empty()
                          && writeErrors.back()->getErrCode() == ErrorCodes::StaleShardVersion;

        if ( staleBatch ) {

            const BatchedRequestMetadata* requestMetadata = request.getMetadata();
            dassert( requestMetadata );

            // Make sure our shard name is set or is the same as what was set previously
            if ( shardingState.setShardName( requestMetadata->getShardName() ) ) {

                //
                // First, we refresh metadata if we need to based on the requested version.
                //

                ChunkVersion latestShardVersion;
                shardingState.refreshMetadataIfNeeded( request.getTargetingNS(),
                                                       requestMetadata->getShardVersion(),
                                                       &latestShardVersion );

                // Report if we're still changing our metadata
                // TODO: Better reporting per-collection
                if ( shardingState.inCriticalMigrateSection() ) {
                    noteInCriticalSection( writeErrors.back() );
                }

                if ( queueForMigrationCommit ) {

                    //
                    // Queue up for migration to end - this allows us to be sure that clients will
                    // not repeatedly try to refresh metadata that is not yet written to the config
                    // server.  Not necessary for correctness.
                    // Exposed as optional parameter to allow testing of queuing behavior with
                    // different network timings.
                    //

                    const ChunkVersion& requestShardVersion = requestMetadata->getShardVersion();

                    //
                    // Only wait if we're an older version (in the current collection epoch) and
                    // we're not write compatible, implying that the current migration is affecting
                    // writes.
                    //

                    if ( requestShardVersion.isOlderThan( latestShardVersion ) &&
                         !requestShardVersion.isWriteCompatibleWith( latestShardVersion ) ) {

                        while ( shardingState.inCriticalMigrateSection() ) {

                            log() << "write request to old shard version "
                                  << requestMetadata->getShardVersion().toString()
                                  << " waiting for migration commit" << endl;

                            shardingState.waitTillNotInCriticalSection( 10 /* secs */);
                        }
                    }
                }
            }
            else {
                // If our shard name is stale, our version must have been stale as well
                dassert( writeErrors.size() == request.sizeWriteOps() );
            }
        }

        //
        // Construct response
        //

        response->setOk( true );

        if ( !silentWC ) {

            if ( upserted.size() ) {
                response->setUpsertDetails( upserted );
                upserted.clear();
            }

            if ( writeErrors.size() ) {
                response->setErrDetails( writeErrors );
                writeErrors.clear();
            }

            if ( wcError.get() ) {
                response->setWriteConcernError( wcError.release() );
            }

            if ( anyReplEnabled() ) {
                response->setLastOp( _client->getLastOp() );
                if (theReplSet) {
                    response->setElectionId( theReplSet->getElectionId() );
                }
            }

            // Set the stats for the response
            response->setN( _stats->numInserted + _stats->numUpserted + _stats->numMatched
                            + _stats->numDeleted );
            if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update )
                response->setNModified( _stats->numModified );
        }

        dassert( response->isValid( NULL ) );
    }
Beispiel #13
0
    void BatchSafeWriter::safeWriteBatch( DBClientBase* conn,
                                          const BatchedCommandRequest& request,
                                          BatchedCommandResponse* response ) {

        const NamespaceString nss( request.getNS() );

        // N starts at zero, and we add to it for each item
        response->setN( 0 );

        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {

            // Break on first error if we're ordered
            if ( request.getOrdered() && response->isErrDetailsSet() )
                break;

            BatchItemRef itemRef( &request, static_cast<int>( i ) );
            bool isLastItem = ( i == request.sizeWriteOps() - 1 );

            BSONObj writeConcern;
            if ( isLastItem && request.isWriteConcernSet() ) {
                writeConcern = request.getWriteConcern();
                // Pre-2.4.2 mongods react badly to 'w' being set on config servers
                if ( nss.db() == "config" )
                    writeConcern = fixWCForConfig( writeConcern );
            }

            BSONObj gleResult;
            GLEErrors errors;
            Status status = _safeWriter->safeWrite( conn, itemRef, writeConcern, &gleResult );
            if ( status.isOK() ) {
                status = extractGLEErrors( gleResult, &errors );
            }

            if ( !status.isOK() ) {
                response->clear();
                response->setOk( false );
                response->setErrCode( status.code() );
                response->setErrMessage( status.reason() );
                return;
            }

            //
            // STATS HANDLING
            //

            GLEStats stats;
            extractGLEStats( gleResult, &stats );

            // Special case for making legacy "n" field result for insert match the write
            // command result.
            if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert
                 && !errors.writeError.get() ) {
                // n is always 0 for legacy inserts.
                dassert( stats.n == 0 );
                stats.n = 1;
            }

            response->setN( response->getN() + stats.n );

            if ( !stats.upsertedId.isEmpty() ) {
                BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail;
                upsertedId->setIndex( i );
                upsertedId->setUpsertedID( stats.upsertedId );
                response->addToUpsertDetails( upsertedId );
            }

            response->setLastOp( stats.lastOp );

            //
            // WRITE ERROR HANDLING
            //

            // If any error occurs (except stale config) the previous GLE was not enforced
            bool enforcedWC = !errors.writeError.get()
                              || errors.writeError->getErrCode() == ErrorCodes::StaleShardVersion;

            // Save write error
            if ( errors.writeError.get() ) {
                errors.writeError->setIndex( i );
                response->addToErrDetails( errors.writeError.release() );
            }

            //
            // WRITE CONCERN ERROR HANDLING
            //

            // The last write is weird, since we enforce write concern and check the error through
            // the same GLE if possible.  If the last GLE was an error, the write concern may not
            // have been enforced in that same GLE, so we need to send another after resetting the
            // error.
            if ( isLastItem ) {

                // Try to enforce the write concern if everything succeeded (unordered or ordered)
                // OR if something succeeded and we're unordered.
                bool needToEnforceWC =
                    !response->isErrDetailsSet()
                    || ( !request.getOrdered()
                         && response->sizeErrDetails() < request.sizeWriteOps() );

                if ( !enforcedWC && needToEnforceWC ) {
                    dassert( !errors.writeError.get() ); // emptied above

                    // Might have gotten a write concern validity error earlier, these are
                    // enforced even if the wc isn't applied, so we ignore.
                    errors.wcError.reset();

                    Status status = _safeWriter->enforceWriteConcern( conn,
                                                                      nss.db().toString(),
                                                                      writeConcern,
                                                                      &gleResult );

                    if ( status.isOK() ) {
                        status = extractGLEErrors( gleResult, &errors );
                    }

                    if ( !status.isOK() ) {
                        response->clear();
                        response->setOk( false );
                        response->setErrCode( status.code() );
                        response->setErrMessage( status.reason() );
                        return;
                    }
                }
                // END Write concern retry

                if ( errors.wcError.get() ) {
                    response->setWriteConcernError( errors.wcError.release() );
                }
            }
        }

        response->setOk( true );
        dassert( response->isValid( NULL ) );
    }
    void BatchSafeWriter::safeWriteBatch( DBClientBase* conn,
                                          const BatchedCommandRequest& request,
                                          BatchedCommandResponse* response ) {

        const NamespaceString nss( request.getNS() );

        // N starts at zero, and we add to it for each item
        response->setN( 0 );

        // GLE path always sets nModified to -1 (sentinel) to indicate we should omit it later.
        response->setNModified(-1);

        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {

            // Break on first error if we're ordered
            if ( request.getOrdered() && response->isErrDetailsSet() )
                break;

            BatchItemRef itemRef( &request, static_cast<int>( i ) );

            BSONObj gleResult;
            GLEErrors errors;
            Status status = _safeWriter->safeWrite( conn,
                                                    itemRef,
                                                    WriteConcernOptions::Acknowledged,
                                                    &gleResult );

            if ( status.isOK() ) {
                status = extractGLEErrors( gleResult, &errors );
            }

            if ( !status.isOK() ) {
                response->clear();
                response->setOk( false );
                response->setErrCode( ErrorCodes::RemoteResultsUnavailable );
                
                StringBuilder builder;
                builder << "could not get write error from safe write";
                builder << causedBy( status.toString() );
                response->setErrMessage( builder.str() );
                return;
            }

            if ( errors.wcError.get() ) {
                response->setWriteConcernError( errors.wcError.release() );
            }

            //
            // STATS HANDLING
            //

            GLEStats stats;
            extractGLEStats( gleResult, &stats );

            // Special case for making legacy "n" field result for insert match the write
            // command result.
            if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert
                 && !errors.writeError.get() ) {
                // n is always 0 for legacy inserts.
                dassert( stats.n == 0 );
                stats.n = 1;
            }

            response->setN( response->getN() + stats.n );

            if ( !stats.upsertedId.isEmpty() ) {
                BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail;
                upsertedId->setIndex( i );
                upsertedId->setUpsertedID( stats.upsertedId );
                response->addToUpsertDetails( upsertedId );
            }

            response->setLastOp( stats.lastOp );

            // Save write error
            if ( errors.writeError.get() ) {
                errors.writeError->setIndex( i );
                response->addToErrDetails( errors.writeError.release() );
            }
        }

        //
        // WRITE CONCERN ERROR HANDLING
        //

        // The last write is weird, since we enforce write concern and check the error through
        // the same GLE if possible.  If the last GLE was an error, the write concern may not
        // have been enforced in that same GLE, so we need to send another after resetting the
        // error.

        BSONObj writeConcern;
        if ( request.isWriteConcernSet() ) {
            writeConcern = request.getWriteConcern();
            // Pre-2.4.2 mongods react badly to 'w' being set on config servers
            if ( nss.db() == "config" )
                writeConcern = fixWCForConfig( writeConcern );
        }

        bool needToEnforceWC = WriteConcernOptions::Acknowledged.woCompare(writeConcern) != 0 &&
                WriteConcernOptions::Unacknowledged.woCompare(writeConcern) != 0;

        if ( needToEnforceWC &&
                ( !response->isErrDetailsSet() ||
                        ( !request.getOrdered() &&
                                // Not all errored. Note: implicit response->isErrDetailsSet().
                                response->sizeErrDetails() < request.sizeWriteOps() ))) {

            // Might have gotten a write concern validity error earlier, these are
            // enforced even if the wc isn't applied, so we ignore.
            response->unsetWriteConcernError();

            const string dbName( nss.db().toString() );

            Status status( Status::OK() );

            if ( response->isErrDetailsSet() ) {
                const WriteErrorDetail* lastError = response->getErrDetails().back();

                // If last write op was an error.
                if ( lastError->getIndex() == static_cast<int>( request.sizeWriteOps() - 1 )) {
                    // Reset previous errors so we can apply the write concern no matter what
                    // as long as it is valid.
                    status = _safeWriter->clearErrors( conn, dbName );
                }
            }

            BSONObj gleResult;
            if ( status.isOK() ) {
                status = _safeWriter->enforceWriteConcern( conn,
                                                           dbName,
                                                           writeConcern,
                                                           &gleResult );
            }

            GLEErrors errors;
            if ( status.isOK() ) {
                status = extractGLEErrors( gleResult, &errors );
            }
            
            if ( !status.isOK() ) {
                auto_ptr<WCErrorDetail> wcError( new WCErrorDetail );
                wcError->setErrCode( status.code() );
                wcError->setErrMessage( status.reason() );
                response->setWriteConcernError( wcError.release() ); 
            }
            else if ( errors.wcError.get() ) {
                response->setWriteConcernError( errors.wcError.release() );
            }
        }

        response->setOk( true );
        dassert( response->isValid( NULL ) );
    }