void WriteBatchExecutor::bulkExecute( const BatchedCommandRequest& request, std::vector<BatchedUpsertDetail*>* upsertedIds, std::vector<WriteErrorDetail*>* errors ) { if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) { execInserts( request, errors ); } else if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) { for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) { WriteErrorDetail* error = NULL; BSONObj upsertedId; execUpdate( BatchItemRef( &request, i ), &upsertedId, &error ); if ( !upsertedId.isEmpty() ) { BatchedUpsertDetail* batchUpsertedId = new BatchedUpsertDetail; batchUpsertedId->setIndex( i ); batchUpsertedId->setUpsertedID( upsertedId ); upsertedIds->push_back( batchUpsertedId ); } if ( error ) { errors->push_back( error ); if ( request.getOrdered() ) break; } } } else { dassert( request.getBatchType() == BatchedCommandRequest::BatchType_Delete ); for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) { WriteErrorDetail* error = NULL; execRemove( BatchItemRef( &request, i ), &error ); if ( error ) { errors->push_back( error ); if ( request.getOrdered() ) break; } } } // Fill in stale version errors for unordered batches (update/delete can't do this on own) if ( !errors->empty() && !request.getOrdered() ) { const WriteErrorDetail* finalError = errors->back(); if ( finalError->getErrCode() == ErrorCodes::StaleShardVersion ) { for ( size_t i = finalError->getIndex() + 1; i < request.sizeWriteOps(); i++ ) { WriteErrorDetail* dupStaleError = new WriteErrorDetail; finalError->cloneTo( dupStaleError ); errors->push_back( dupStaleError ); } } } }
void BatchSafeWriter::safeWriteBatch( DBClientBase* conn, const BatchedCommandRequest& request, BatchedCommandResponse* response ) { // N starts at zero, and we add to it for each item response->setN( 0 ); for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) { BatchItemRef itemRef( &request, static_cast<int>( i ) ); LastError lastError; _safeWriter->safeWrite( conn, itemRef, &lastError ); // Register the error if we need to BatchedErrorDetail* batchError = lastErrorToBatchError( lastError ); if ( batchError ) { batchError->setIndex( i ); response->addToErrDetails( batchError ); } response->setN( response->getN() + lastError.nObjects ); if ( !lastError.upsertedId.isEmpty() ) { BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail; upsertedId->setIndex( i ); upsertedId->setUpsertedID( lastError.upsertedId ); response->addToUpsertDetails( upsertedId ); } // Break on first error if we're ordered if ( request.getOrdered() && BatchSafeWriter::isFailedOp( lastError ) ) break; } if ( request.sizeWriteOps() == 1 && response->isErrDetailsSet() && !response->isErrCodeSet() ) { // Promote single error to batch error const BatchedErrorDetail* error = response->getErrDetailsAt( 0 ); response->setErrCode( error->getErrCode() ); if ( error->isErrInfoSet() ) response->setErrInfo( error->getErrInfo() ); response->setErrMessage( error->getErrMessage() ); response->unsetErrDetails(); } if ( request.sizeWriteOps() == 1 && response->isUpsertDetailsSet() ) { // Promote single upsert to batch upsert const BatchedUpsertDetail* upsertedId = response->getUpsertDetailsAt( 0 ); response->setSingleUpserted( upsertedId->getUpsertedID() ); response->unsetUpsertDetails(); } response->setOk( !response->isErrCodeSet() ); dassert( response->isValid( NULL ) ); }
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request, std::vector<WriteErrorDetail*>* errors ) { // Theory of operation: // // Instantiates an ExecInsertsState, which represents all of the state involved in the batch // insert execution algorithm. Most importantly, encapsulates the lock state. // // Every iteration of the loop in execInserts() processes one document insertion, by calling // insertOne() exactly once for a given value of state.currIndex. // // If the ExecInsertsState indicates that the requisite write locks are not held, insertOne // acquires them and performs lock-acquisition-time checks. However, on non-error // execution, it does not release the locks. Therefore, the yielding logic in the while // loop in execInserts() is solely responsible for lock release in the non-error case. // // Internally, insertOne loops performing the single insert until it completes without a // PageFaultException, or until it fails with some kind of error. Errors are mostly // propagated via the request->error field, but DBExceptions or std::exceptions may escape, // particularly on operation interruption. These kinds of errors necessarily prevent // further insertOne calls, and stop the batch. As a result, the only expected source of // such exceptions are interruptions. ExecInsertsState state(&request); normalizeInserts(request, &state.normalizedInserts, &state.pregeneratedKeys); ElapsedTracker elapsedTracker(128, 10); // 128 hits or 10 ms, matching RunnerYieldPolicy's for (state.currIndex = 0; state.currIndex < state.request->sizeWriteOps(); ++state.currIndex) { if (elapsedTracker.intervalHasElapsed()) { // Consider yielding between inserts. if (state.hasLock()) { int micros = ClientCursor::suggestYieldMicros(); if (micros > 0) { state.unlock(); killCurrentOp.checkForInterrupt(); sleepmicros(micros); } } killCurrentOp.checkForInterrupt(); elapsedTracker.resetLastTime(); } WriteErrorDetail* error = NULL; execOneInsert(&state, &error); if (error) { errors->push_back(error); error->setIndex(state.currIndex); if (request.getOrdered()) return; } } }
// Goes over the request and preprocesses normalized versions of all the inserts in the request static void normalizeInserts( const BatchedCommandRequest& request, vector<StatusWith<BSONObj> >* normalInserts ) { for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) { StatusWith<BSONObj> normalInsert = normalizeInsert( BatchItemRef( &request, i ) ); normalInserts->push_back( normalInsert ); if ( request.getOrdered() && !normalInsert.isOK() ) break; } }
// Goes over the request and preprocesses normalized versions of all the inserts in the request static void normalizeInserts( const BatchedCommandRequest& request, vector<StatusWith<BSONObj> >* normalInserts ) { for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) { BSONObj insertDoc = request.getInsertRequest()->getDocumentsAt( i ); StatusWith<BSONObj> normalInsert = fixDocumentForInsert( insertDoc ); normalInserts->push_back( normalInsert ); if ( request.getOrdered() && !normalInsert.isOK() ) break; } }
void Strategy::writeOp(OperationContext* txn, int op, Request& request) { // make sure we have a last error dassert(&LastError::get(cc())); OwnedPointerVector<BatchedCommandRequest> commandRequestsOwned; vector<BatchedCommandRequest*>& commandRequests = commandRequestsOwned.mutableVector(); msgToBatchRequests(request.m(), &commandRequests); for (vector<BatchedCommandRequest*>::iterator it = commandRequests.begin(); it != commandRequests.end(); ++it) { // Multiple commands registered to last error as multiple requests if (it != commandRequests.begin()) LastError::get(cc()).startRequest(); BatchedCommandRequest* commandRequest = *it; // Adjust namespaces for command NamespaceString fullNS(commandRequest->getNS()); string cmdNS = fullNS.getCommandNS(); // We only pass in collection name to command commandRequest->setNS(fullNS); BSONObjBuilder builder; BSONObj requestBSON = commandRequest->toBSON(); { // Disable the last error object for the duration of the write cmd LastError::Disabled disableLastError(&LastError::get(cc())); Command::runAgainstRegistered(txn, cmdNS.c_str(), requestBSON, builder, 0); } BatchedCommandResponse commandResponse; bool parsed = commandResponse.parseBSON(builder.done(), NULL); (void)parsed; // for compile dassert(parsed && commandResponse.isValid(NULL)); // Populate the lastError object based on the write response LastError::get(cc()).reset(); bool hadError = batchErrorToLastError(*commandRequest, commandResponse, &LastError::get(cc())); // Check if this is an ordered batch and we had an error which should stop processing if (commandRequest->getOrdered() && hadError) break; } }
// Goes over the request and preprocesses normalized versions of all the inserts in the request static void normalizeInserts( const BatchedCommandRequest& request, vector<StatusWith<BSONObj> >* normalizedInserts, vector<PregeneratedKeys>* pregen ) { normalizedInserts->reserve(request.sizeWriteOps()); for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) { BSONObj insertDoc = request.getInsertRequest()->getDocumentsAt( i ); StatusWith<BSONObj> normalInsert = fixDocumentForInsert( insertDoc ); normalizedInserts->push_back( normalInsert ); if ( request.getOrdered() && !normalInsert.isOK() ) break; if ( !normalInsert.getValue().isEmpty() ) insertDoc = normalInsert.getValue(); pregen->push_back( PregeneratedKeys() ); GeneratorHolder::getInstance()->prepare( request.getTargetingNS(), insertDoc, &pregen->back() ); } }
void BatchSafeWriter::safeWriteBatch( DBClientBase* conn, const BatchedCommandRequest& request, BatchedCommandResponse* response ) { for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) { BatchItemRef itemRef( &request, static_cast<int>( i ) ); LastError lastError; _safeWriter->safeWrite( conn, itemRef, &lastError ); // Register the error if we need to BatchedErrorDetail* batchError = lastErrorToBatchError( lastError ); batchError->setIndex( i ); response->addToErrDetails( batchError ); // TODO: Other stats, etc. // Break on first error if we're ordered if ( request.getOrdered() && BatchSafeWriter::isFailedOp( lastError ) ) break; } }
void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request, BatchedCommandResponse* response ) { Timer commandTimer; WriteStats stats; std::auto_ptr<BatchedErrorDetail> error( new BatchedErrorDetail ); BSONObj upsertedID = BSONObj(); bool batchSuccess = true; bool staleBatch = false; // Apply each batch item, stopping on an error if we were asked to apply the batch // sequentially. size_t numBatchOps = request.sizeWriteOps(); bool verbose = request.isVerboseWC(); for ( size_t i = 0; i < numBatchOps; i++ ) { if ( applyWriteItem( BatchItemRef( &request, i ), &stats, &upsertedID, error.get() ) ) { // In case updates turned out to be upserts, the callers may be interested // in learning what _id was used for that document. if ( !upsertedID.isEmpty() ) { if ( numBatchOps == 1 ) { response->setSingleUpserted(upsertedID); } else if ( verbose ) { std::auto_ptr<BatchedUpsertDetail> upsertDetail(new BatchedUpsertDetail); upsertDetail->setIndex(i); upsertDetail->setUpsertedID(upsertedID); response->addToUpsertDetails(upsertDetail.release()); } upsertedID = BSONObj(); } } else { // The applyWriteItem did not go thgrou // If the error is sharding related, we'll have to investigate whether we // have a stale view of sharding state. if ( error->getErrCode() == ErrorCodes::StaleShardVersion ) staleBatch = true; // Don't bother recording if the user doesn't want a verbose answer. We want to // keep the error if this is a one-item batch, since we already compact the // response for those. if (verbose || numBatchOps == 1) { error->setIndex( static_cast<int>( i ) ); response->addToErrDetails( error.release() ); } batchSuccess = false; if ( request.getOrdered() ) break; error.reset( new BatchedErrorDetail ); } } // So far, we may have failed some of the batch's items. So we record // that. Rergardless, we still need to apply the write concern. If that generates a // more specific error, we'd replace for the intermediate error here. Note that we // "compatct" the error messge if this is an one-item batch. (See rationale later in // this file.) if ( !batchSuccess ) { if (numBatchOps > 1) { // TODO // Define the final error code here. // Might be used as a final error, depending on write concern success. response->setErrCode( 99999 ); response->setErrMessage( "batch op errors occurred" ); } else { // Promote the single error. const BatchedErrorDetail* error = response->getErrDetailsAt( 0 ); response->setErrCode( error->getErrCode() ); if ( error->isErrInfoSet() ) response->setErrInfo( error->getErrInfo() ); response->setErrMessage( error->getErrMessage() ); response->unsetErrDetails(); error = NULL; } } // Apply write concern. Note, again, that we're only assembling a full response if the // user is interested in it. BSONObj writeConcern; if ( request.isWriteConcernSet() ) { writeConcern = request.getWriteConcern(); } else { writeConcern = _defaultWriteConcern; } string errMsg; BSONObjBuilder wcResultsB; if ( !waitForWriteConcern( writeConcern, !batchSuccess, &wcResultsB, &errMsg ) ) { // TODO Revisit when user visible family error codes are set response->setErrCode( ErrorCodes::WriteConcernFailed ); response->setErrMessage( errMsg ); if ( verbose ) { response->setErrInfo( wcResultsB.obj() ); } } // TODO: Audit where we want to queue here if ( staleBatch ) { ChunkVersion latestShardVersion; shardingState.refreshMetadataIfNeeded( request.getTargetingNS(), request.getShardVersion(), &latestShardVersion ); } // Set the main body of the response. We assume that, if there was an error, the error // code would already be set. response->setOk( !response->isErrCodeSet() ); response->setN( stats.numInserted + stats.numUpserted + stats.numUpdated + stats.numDeleted ); dassert( response->isValid( NULL ) ); }
void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request, BatchedCommandResponse* response ) { // Validate namespace const NamespaceString nss = NamespaceString( request.getNS() ); if ( !nss.isValid() ) { toBatchError( Status( ErrorCodes::InvalidNamespace, nss.ns() + " is not a valid namespace" ), response ); return; } // Make sure we can write to the namespace Status allowedStatus = userAllowedWriteNS( nss ); if ( !allowedStatus.isOK() ) { toBatchError( allowedStatus, response ); return; } // Validate insert index requests // TODO: Push insert index requests through createIndex once all upgrade paths support it string errMsg; if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) { toBatchError( Status( ErrorCodes::InvalidOptions, errMsg ), response ); return; } // Validate write concern // TODO: Lift write concern parsing out of this entirely WriteConcernOptions writeConcern; BSONObj wcDoc; if ( request.isWriteConcernSet() ) { wcDoc = request.getWriteConcern(); } Status wcStatus = Status::OK(); if ( wcDoc.isEmpty() ) { // The default write concern if empty is w : 1 // Specifying w : 0 is/was allowed, but is interpreted identically to w : 1 wcStatus = writeConcern.parse( _defaultWriteConcern.isEmpty() ? WriteConcernOptions::Acknowledged : _defaultWriteConcern ); if ( writeConcern.wNumNodes == 0 && writeConcern.wMode.empty() ) { writeConcern.wNumNodes = 1; } } else { wcStatus = writeConcern.parse( wcDoc ); } if ( wcStatus.isOK() ) { wcStatus = validateWriteConcern( writeConcern ); } if ( !wcStatus.isOK() ) { toBatchError( wcStatus, response ); return; } if ( request.sizeWriteOps() == 0u ) { toBatchError( Status( ErrorCodes::InvalidLength, "no write ops were included in the batch" ), response ); return; } // Validate batch size if ( request.sizeWriteOps() > BatchedCommandRequest::kMaxWriteBatchSize ) { toBatchError( Status( ErrorCodes::InvalidLength, stream() << "exceeded maximum write batch size of " << BatchedCommandRequest::kMaxWriteBatchSize ), response ); return; } // // End validation // bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0 && writeConcern.syncMode == WriteConcernOptions::NONE; Timer commandTimer; OwnedPointerVector<WriteErrorDetail> writeErrorsOwned; vector<WriteErrorDetail*>& writeErrors = writeErrorsOwned.mutableVector(); OwnedPointerVector<BatchedUpsertDetail> upsertedOwned; vector<BatchedUpsertDetail*>& upserted = upsertedOwned.mutableVector(); // // Apply each batch item, possibly bulking some items together in the write lock. // Stops on error if batch is ordered. // bulkExecute( request, &upserted, &writeErrors ); // // Try to enforce the write concern if everything succeeded (unordered or ordered) // OR if something succeeded and we're unordered. // auto_ptr<WCErrorDetail> wcError; bool needToEnforceWC = writeErrors.empty() || ( !request.getOrdered() && writeErrors.size() < request.sizeWriteOps() ); if ( needToEnforceWC ) { _client->curop()->setMessage( "waiting for write concern" ); WriteConcernResult res; Status status = waitForWriteConcern( _txn, writeConcern, _client->getLastOp(), &res ); if ( !status.isOK() ) { wcError.reset( toWriteConcernError( status, res ) ); } } // // Refresh metadata if needed // bool staleBatch = !writeErrors.empty() && writeErrors.back()->getErrCode() == ErrorCodes::StaleShardVersion; if ( staleBatch ) { const BatchedRequestMetadata* requestMetadata = request.getMetadata(); dassert( requestMetadata ); // Make sure our shard name is set or is the same as what was set previously if ( shardingState.setShardName( requestMetadata->getShardName() ) ) { // // First, we refresh metadata if we need to based on the requested version. // ChunkVersion latestShardVersion; shardingState.refreshMetadataIfNeeded( request.getTargetingNS(), requestMetadata->getShardVersion(), &latestShardVersion ); // Report if we're still changing our metadata // TODO: Better reporting per-collection if ( shardingState.inCriticalMigrateSection() ) { noteInCriticalSection( writeErrors.back() ); } if ( queueForMigrationCommit ) { // // Queue up for migration to end - this allows us to be sure that clients will // not repeatedly try to refresh metadata that is not yet written to the config // server. Not necessary for correctness. // Exposed as optional parameter to allow testing of queuing behavior with // different network timings. // const ChunkVersion& requestShardVersion = requestMetadata->getShardVersion(); // // Only wait if we're an older version (in the current collection epoch) and // we're not write compatible, implying that the current migration is affecting // writes. // if ( requestShardVersion.isOlderThan( latestShardVersion ) && !requestShardVersion.isWriteCompatibleWith( latestShardVersion ) ) { while ( shardingState.inCriticalMigrateSection() ) { log() << "write request to old shard version " << requestMetadata->getShardVersion().toString() << " waiting for migration commit" << endl; shardingState.waitTillNotInCriticalSection( 10 /* secs */); } } } } else { // If our shard name is stale, our version must have been stale as well dassert( writeErrors.size() == request.sizeWriteOps() ); } } // // Construct response // response->setOk( true ); if ( !silentWC ) { if ( upserted.size() ) { response->setUpsertDetails( upserted ); } if ( writeErrors.size() ) { response->setErrDetails( writeErrors ); } if ( wcError.get() ) { response->setWriteConcernError( wcError.release() ); } const repl::ReplicationCoordinator::Mode replMode = repl::getGlobalReplicationCoordinator()->getReplicationMode(); if (replMode != repl::ReplicationCoordinator::modeNone) { response->setLastOp( _client->getLastOp() ); if (replMode == repl::ReplicationCoordinator::modeReplSet) { response->setElectionId(repl::theReplSet->getElectionId()); } } // Set the stats for the response response->setN( _stats->numInserted + _stats->numUpserted + _stats->numMatched + _stats->numDeleted ); if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) response->setNModified( _stats->numModified ); } dassert( response->isValid( NULL ) ); }
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request, std::vector<WriteErrorDetail*>* errors ) { // Bulk insert is a bit different from other bulk operations in that multiple request docs // can be processed at once inside the write lock. const NamespaceString nss( request.getTargetingNS() ); scoped_ptr<BatchItemRef> currInsertItem( new BatchItemRef( &request, 0 ) ); // Go through our request and do some preprocessing on insert documents outside the lock to // validate and put them in a normalized form - i.e. put _id in front and fill in // timestamps. The insert document may also be invalid. // TODO: Might be more efficient to do in batches. vector<StatusWith<BSONObj> > normalInserts; normalizeInserts( request, &normalInserts ); while ( currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) { WriteOpResult currResult; // Don't (re-)acquire locks and create database until it's necessary if ( !normalInserts[currInsertItem->getItemIndex()].isOK() ) { currResult.error = toWriteError( normalInserts[currInsertItem->getItemIndex()].getStatus() ); } else { PageFaultRetryableSection pFaultSection; //////////////////////////////////// Lock::DBWrite writeLock( nss.ns() ); //////////////////////////////////// // Check version inside of write lock if ( checkIsMasterForCollection( nss, &currResult.error ) && checkShardVersion( &shardingState, request, &currResult.error ) && checkIndexConstraints( &shardingState, request, &currResult.error ) ) { // // Get the collection for the insert // scoped_ptr<Client::Context> writeContext; Collection* collection = NULL; try { // Context once we're locked, to set more details in currentOp() // TODO: better constructor? writeContext.reset( new Client::Context( request.getNS(), storageGlobalParams.dbpath, false /* don't check version */) ); Database* database = writeContext->db(); dassert( database ); collection = database->getCollection( nss.ns() ); if ( !collection ) { // Implicitly create if it doesn't exist collection = database->createCollection( nss.ns() ); if ( !collection ) { currResult.error = toWriteError( Status( ErrorCodes::InternalError, "could not create collection" ) ); } } } catch ( const DBException& ex ) { Status status(ex.toStatus()); if (ErrorCodes::isInterruption(status.code())) { throw; } currResult.error = toWriteError(status); } // // Perform writes inside write lock // while ( collection && currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) { // // BEGIN CURRENT OP // scoped_ptr<CurOp> currentOp( beginCurrentOp( _client, *currInsertItem ) ); incOpStats( *currInsertItem ); // Get the actual document we want to write, assuming it's valid const StatusWith<BSONObj>& normalInsert = // normalInserts[currInsertItem->getItemIndex()]; const BSONObj& normalInsertDoc = normalInsert.getValue().isEmpty() ? currInsertItem->getDocument() : normalInsert.getValue(); if ( !normalInsert.isOK() ) { // This insert failed on preprocessing currResult.error = toWriteError( normalInsert.getStatus() ); } else if ( !request.isInsertIndexRequest() ) { // Try the insert singleInsert( *currInsertItem, normalInsertDoc, collection, &currResult ); } else { // Try the create index singleCreateIndex( *currInsertItem, normalInsertDoc, collection, &currResult ); } // // END CURRENT OP // finishCurrentOp( _client, currentOp.get(), currResult.error ); // Faults release the write lock if ( currResult.fault ) break; // In general, we might have stats and errors incWriteStats( *currInsertItem, currResult.stats, currResult.error, currentOp.get() ); // Errors release the write lock if ( currResult.error ) break; // Increment in the write lock and reset the stats for next time currInsertItem.reset( new BatchItemRef( &request, currInsertItem->getItemIndex() + 1 ) ); currResult.reset(); // Destruct curop so that our parent curop is restored, so that we // record the yield count in the parent. currentOp.reset(NULL); // yield sometimes int micros = ClientCursor::suggestYieldMicros(); if (micros > 0) { ClientCursor::staticYield(micros, "", NULL); } } } } // END WRITE LOCK // // Store the current error if it exists // if ( currResult.error ) { errors->push_back( currResult.releaseError() ); errors->back()->setIndex( currInsertItem->getItemIndex() ); // Break early for ordered batches if ( request.getOrdered() ) break; } // // Fault or increment // if ( currResult.fault ) { // Check page fault out of lock currResult.fault->touch(); } else { // Increment if not a fault currInsertItem.reset( new BatchItemRef( &request, currInsertItem->getItemIndex() + 1 ) ); } } }
void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request, BatchedCommandResponse* response ) { // TODO: Lift write concern parsing out of this entirely. WriteConcernOptions writeConcern; Status status = Status::OK(); BSONObj wcDoc; if ( request.isWriteConcernSet() ) { wcDoc = request.getWriteConcern(); } if ( wcDoc.isEmpty() ) { status = writeConcern.parse( _defaultWriteConcern ); } else { status = writeConcern.parse( wcDoc ); } if ( status.isOK() ) { status = validateWriteConcern( writeConcern ); } if ( !status.isOK() ) { response->setErrCode( status.code() ); response->setErrMessage( status.reason() ); response->setOk( false ); dassert( response->isValid(NULL) ); return; } bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0 && writeConcern.syncMode == WriteConcernOptions::NONE; Timer commandTimer; OwnedPointerVector<WriteErrorDetail> writeErrorsOwned; vector<WriteErrorDetail*>& writeErrors = writeErrorsOwned.mutableVector(); OwnedPointerVector<BatchedUpsertDetail> upsertedOwned; vector<BatchedUpsertDetail*>& upserted = upsertedOwned.mutableVector(); // // Apply each batch item, possibly bulking some items together in the write lock. // Stops on error if batch is ordered. // bulkExecute( request, &upserted, &writeErrors ); // // Try to enforce the write concern if everything succeeded (unordered or ordered) // OR if something succeeded and we're unordered. // auto_ptr<WCErrorDetail> wcError; bool needToEnforceWC = writeErrors.empty() || ( !request.getOrdered() && writeErrors.size() < request.sizeWriteOps() ); if ( needToEnforceWC ) { _client->curop()->setMessage( "waiting for write concern" ); WriteConcernResult res; status = waitForWriteConcern( writeConcern, _client->getLastOp(), &res ); if ( !status.isOK() ) { wcError.reset( toWriteConcernError( status, res ) ); } } // // Refresh metadata if needed // bool staleBatch = !writeErrors.empty() && writeErrors.back()->getErrCode() == ErrorCodes::StaleShardVersion; if ( staleBatch ) { const BatchedRequestMetadata* requestMetadata = request.getMetadata(); dassert( requestMetadata ); // Make sure our shard name is set or is the same as what was set previously if ( shardingState.setShardName( requestMetadata->getShardName() ) ) { // // First, we refresh metadata if we need to based on the requested version. // ChunkVersion latestShardVersion; shardingState.refreshMetadataIfNeeded( request.getTargetingNS(), requestMetadata->getShardVersion(), &latestShardVersion ); // Report if we're still changing our metadata // TODO: Better reporting per-collection if ( shardingState.inCriticalMigrateSection() ) { noteInCriticalSection( writeErrors.back() ); } if ( queueForMigrationCommit ) { // // Queue up for migration to end - this allows us to be sure that clients will // not repeatedly try to refresh metadata that is not yet written to the config // server. Not necessary for correctness. // Exposed as optional parameter to allow testing of queuing behavior with // different network timings. // const ChunkVersion& requestShardVersion = requestMetadata->getShardVersion(); // // Only wait if we're an older version (in the current collection epoch) and // we're not write compatible, implying that the current migration is affecting // writes. // if ( requestShardVersion.isOlderThan( latestShardVersion ) && !requestShardVersion.isWriteCompatibleWith( latestShardVersion ) ) { while ( shardingState.inCriticalMigrateSection() ) { log() << "write request to old shard version " << requestMetadata->getShardVersion().toString() << " waiting for migration commit" << endl; shardingState.waitTillNotInCriticalSection( 10 /* secs */); } } } } else { // If our shard name is stale, our version must have been stale as well dassert( writeErrors.size() == request.sizeWriteOps() ); } } // // Construct response // response->setOk( true ); if ( !silentWC ) { if ( upserted.size() ) { response->setUpsertDetails( upserted ); upserted.clear(); } if ( writeErrors.size() ) { response->setErrDetails( writeErrors ); writeErrors.clear(); } if ( wcError.get() ) { response->setWriteConcernError( wcError.release() ); } if ( anyReplEnabled() ) { response->setLastOp( _client->getLastOp() ); if (theReplSet) { response->setElectionId( theReplSet->getElectionId() ); } } // Set the stats for the response response->setN( _stats->numInserted + _stats->numUpserted + _stats->numMatched + _stats->numDeleted ); if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) response->setNModified( _stats->numModified ); } dassert( response->isValid( NULL ) ); }
void BatchSafeWriter::safeWriteBatch( DBClientBase* conn, const BatchedCommandRequest& request, BatchedCommandResponse* response ) { const NamespaceString nss( request.getNS() ); // N starts at zero, and we add to it for each item response->setN( 0 ); for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) { // Break on first error if we're ordered if ( request.getOrdered() && response->isErrDetailsSet() ) break; BatchItemRef itemRef( &request, static_cast<int>( i ) ); bool isLastItem = ( i == request.sizeWriteOps() - 1 ); BSONObj writeConcern; if ( isLastItem && request.isWriteConcernSet() ) { writeConcern = request.getWriteConcern(); // Pre-2.4.2 mongods react badly to 'w' being set on config servers if ( nss.db() == "config" ) writeConcern = fixWCForConfig( writeConcern ); } BSONObj gleResult; GLEErrors errors; Status status = _safeWriter->safeWrite( conn, itemRef, writeConcern, &gleResult ); if ( status.isOK() ) { status = extractGLEErrors( gleResult, &errors ); } if ( !status.isOK() ) { response->clear(); response->setOk( false ); response->setErrCode( status.code() ); response->setErrMessage( status.reason() ); return; } // // STATS HANDLING // GLEStats stats; extractGLEStats( gleResult, &stats ); // Special case for making legacy "n" field result for insert match the write // command result. if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert && !errors.writeError.get() ) { // n is always 0 for legacy inserts. dassert( stats.n == 0 ); stats.n = 1; } response->setN( response->getN() + stats.n ); if ( !stats.upsertedId.isEmpty() ) { BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail; upsertedId->setIndex( i ); upsertedId->setUpsertedID( stats.upsertedId ); response->addToUpsertDetails( upsertedId ); } response->setLastOp( stats.lastOp ); // // WRITE ERROR HANDLING // // If any error occurs (except stale config) the previous GLE was not enforced bool enforcedWC = !errors.writeError.get() || errors.writeError->getErrCode() == ErrorCodes::StaleShardVersion; // Save write error if ( errors.writeError.get() ) { errors.writeError->setIndex( i ); response->addToErrDetails( errors.writeError.release() ); } // // WRITE CONCERN ERROR HANDLING // // The last write is weird, since we enforce write concern and check the error through // the same GLE if possible. If the last GLE was an error, the write concern may not // have been enforced in that same GLE, so we need to send another after resetting the // error. if ( isLastItem ) { // Try to enforce the write concern if everything succeeded (unordered or ordered) // OR if something succeeded and we're unordered. bool needToEnforceWC = !response->isErrDetailsSet() || ( !request.getOrdered() && response->sizeErrDetails() < request.sizeWriteOps() ); if ( !enforcedWC && needToEnforceWC ) { dassert( !errors.writeError.get() ); // emptied above // Might have gotten a write concern validity error earlier, these are // enforced even if the wc isn't applied, so we ignore. errors.wcError.reset(); Status status = _safeWriter->enforceWriteConcern( conn, nss.db().toString(), writeConcern, &gleResult ); if ( status.isOK() ) { status = extractGLEErrors( gleResult, &errors ); } if ( !status.isOK() ) { response->clear(); response->setOk( false ); response->setErrCode( status.code() ); response->setErrMessage( status.reason() ); return; } } // END Write concern retry if ( errors.wcError.get() ) { response->setWriteConcernError( errors.wcError.release() ); } } } response->setOk( true ); dassert( response->isValid( NULL ) ); }
void BatchSafeWriter::safeWriteBatch( DBClientBase* conn, const BatchedCommandRequest& request, BatchedCommandResponse* response ) { const NamespaceString nss( request.getNS() ); // N starts at zero, and we add to it for each item response->setN( 0 ); // GLE path always sets nModified to -1 (sentinel) to indicate we should omit it later. response->setNModified(-1); for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) { // Break on first error if we're ordered if ( request.getOrdered() && response->isErrDetailsSet() ) break; BatchItemRef itemRef( &request, static_cast<int>( i ) ); BSONObj gleResult; GLEErrors errors; Status status = _safeWriter->safeWrite( conn, itemRef, WriteConcernOptions::Acknowledged, &gleResult ); if ( status.isOK() ) { status = extractGLEErrors( gleResult, &errors ); } if ( !status.isOK() ) { response->clear(); response->setOk( false ); response->setErrCode( ErrorCodes::RemoteResultsUnavailable ); StringBuilder builder; builder << "could not get write error from safe write"; builder << causedBy( status.toString() ); response->setErrMessage( builder.str() ); return; } if ( errors.wcError.get() ) { response->setWriteConcernError( errors.wcError.release() ); } // // STATS HANDLING // GLEStats stats; extractGLEStats( gleResult, &stats ); // Special case for making legacy "n" field result for insert match the write // command result. if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert && !errors.writeError.get() ) { // n is always 0 for legacy inserts. dassert( stats.n == 0 ); stats.n = 1; } response->setN( response->getN() + stats.n ); if ( !stats.upsertedId.isEmpty() ) { BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail; upsertedId->setIndex( i ); upsertedId->setUpsertedID( stats.upsertedId ); response->addToUpsertDetails( upsertedId ); } response->setLastOp( stats.lastOp ); // Save write error if ( errors.writeError.get() ) { errors.writeError->setIndex( i ); response->addToErrDetails( errors.writeError.release() ); } } // // WRITE CONCERN ERROR HANDLING // // The last write is weird, since we enforce write concern and check the error through // the same GLE if possible. If the last GLE was an error, the write concern may not // have been enforced in that same GLE, so we need to send another after resetting the // error. BSONObj writeConcern; if ( request.isWriteConcernSet() ) { writeConcern = request.getWriteConcern(); // Pre-2.4.2 mongods react badly to 'w' being set on config servers if ( nss.db() == "config" ) writeConcern = fixWCForConfig( writeConcern ); } bool needToEnforceWC = WriteConcernOptions::Acknowledged.woCompare(writeConcern) != 0 && WriteConcernOptions::Unacknowledged.woCompare(writeConcern) != 0; if ( needToEnforceWC && ( !response->isErrDetailsSet() || ( !request.getOrdered() && // Not all errored. Note: implicit response->isErrDetailsSet(). response->sizeErrDetails() < request.sizeWriteOps() ))) { // Might have gotten a write concern validity error earlier, these are // enforced even if the wc isn't applied, so we ignore. response->unsetWriteConcernError(); const string dbName( nss.db().toString() ); Status status( Status::OK() ); if ( response->isErrDetailsSet() ) { const WriteErrorDetail* lastError = response->getErrDetails().back(); // If last write op was an error. if ( lastError->getIndex() == static_cast<int>( request.sizeWriteOps() - 1 )) { // Reset previous errors so we can apply the write concern no matter what // as long as it is valid. status = _safeWriter->clearErrors( conn, dbName ); } } BSONObj gleResult; if ( status.isOK() ) { status = _safeWriter->enforceWriteConcern( conn, dbName, writeConcern, &gleResult ); } GLEErrors errors; if ( status.isOK() ) { status = extractGLEErrors( gleResult, &errors ); } if ( !status.isOK() ) { auto_ptr<WCErrorDetail> wcError( new WCErrorDetail ); wcError->setErrCode( status.code() ); wcError->setErrMessage( status.reason() ); response->setWriteConcernError( wcError.release() ); } else if ( errors.wcError.get() ) { response->setWriteConcernError( errors.wcError.release() ); } } response->setOk( true ); dassert( response->isValid( NULL ) ); }