// Helper function to cancel all the write ops of targeted batches in a map static void cancelBatches(const WriteErrorDetail& why, WriteOp* writeOps, TargetedBatchMap* batchMap) { set<WriteOp*> targetedWriteOps; // Collect all the writeOps that are currently targeted for (TargetedBatchMap::iterator it = batchMap->begin(); it != batchMap->end();) { TargetedWriteBatch* batch = it->second; const vector<TargetedWrite*>& writes = batch->getWrites(); for (vector<TargetedWrite*>::const_iterator writeIt = writes.begin(); writeIt != writes.end(); ++writeIt) { TargetedWrite* write = *writeIt; // NOTE: We may repeatedly cancel a write op here, but that's fast and we want to // cancel before erasing the TargetedWrite* (which owns the cancelled targeting // info) for reporting reasons. writeOps[write->writeOpRef.first].cancelWrites(&why); } // Note that we need to *erase* first, *then* delete, since the map keys are ptrs from // the values batchMap->erase(it++); delete batch; } batchMap->clear(); }
void BatchWriteOp::buildBatchRequest(const TargetedWriteBatch& targetedBatch, BatchedCommandRequest* request) const { request->setNS(_clientRequest->getNS()); request->setShouldBypassValidation(_clientRequest->shouldBypassValidation()); const vector<TargetedWrite*>& targetedWrites = targetedBatch.getWrites(); for (vector<TargetedWrite*>::const_iterator it = targetedWrites.begin(); it != targetedWrites.end(); ++it) { const WriteOpRef& writeOpRef = (*it)->writeOpRef; BatchedCommandRequest::BatchType batchType = _clientRequest->getBatchType(); // NOTE: We copy the batch items themselves here from the client request // TODO: This could be inefficient, maybe we want to just reference in the future if (batchType == BatchedCommandRequest::BatchType_Insert) { BatchedInsertRequest* clientInsertRequest = _clientRequest->getInsertRequest(); BSONObj insertDoc = clientInsertRequest->getDocumentsAt(writeOpRef.first); request->getInsertRequest()->addToDocuments(insertDoc); } else if (batchType == BatchedCommandRequest::BatchType_Update) { BatchedUpdateRequest* clientUpdateRequest = _clientRequest->getUpdateRequest(); BatchedUpdateDocument* updateDoc = new BatchedUpdateDocument; clientUpdateRequest->getUpdatesAt(writeOpRef.first)->cloneTo(updateDoc); request->getUpdateRequest()->addToUpdates(updateDoc); } else { dassert(batchType == BatchedCommandRequest::BatchType_Delete); BatchedDeleteRequest* clientDeleteRequest = _clientRequest->getDeleteRequest(); BatchedDeleteDocument* deleteDoc = new BatchedDeleteDocument; clientDeleteRequest->getDeletesAt(writeOpRef.first)->cloneTo(deleteDoc); request->getDeleteRequest()->addToDeletes(deleteDoc); } // TODO: We can add logic here to allow aborting individual ops // if ( NULL == response ) { // ->responses.erase( it++ ); // continue; //} } if (_clientRequest->isWriteConcernSet()) { if (_clientRequest->isVerboseWC()) { request->setWriteConcern(_clientRequest->getWriteConcern()); } else { // Mongos needs to send to the shard with w > 0 so it will be able to // see the writeErrors. request->setWriteConcern(upgradeWriteConcern(_clientRequest->getWriteConcern())); } } if (!request->isOrderedSet()) { request->setOrdered(_clientRequest->getOrdered()); } unique_ptr<BatchedRequestMetadata> requestMetadata(new BatchedRequestMetadata()); requestMetadata->setShardVersion( ChunkVersionAndOpTime(targetedBatch.getEndpoint().shardVersion)); requestMetadata->setSession(0); request->setMetadata(requestMetadata.release()); }
// Helper function to cancel all the write ops of targeted batches in a map static void cancelBatches( const BatchedErrorDetail& why, WriteOp* writeOps, TargetedBatchMap* batchMap ) { set<WriteOp*> targetedWriteOps; // Collect all the writeOps that are currently targeted for ( TargetedBatchMap::iterator it = batchMap->begin(); it != batchMap->end(); ) { TargetedWriteBatch* batch = it->second; const vector<TargetedWrite*>& writes = batch->getWrites(); for ( vector<TargetedWrite*>::const_iterator writeIt = writes.begin(); writeIt != writes.end(); ++writeIt ) { TargetedWrite* write = *writeIt; targetedWriteOps.insert( &writeOps[write->writeOpRef.first] ); } // Note that we need to *erase* first, *then* delete, since the map keys are ptrs from // the values batchMap->erase( it++ ); delete batch; } batchMap->clear(); // Cancel all the write ops we found above for ( set<WriteOp*>::iterator it = targetedWriteOps.begin(); it != targetedWriteOps.end(); ++it ) { WriteOp* writeOp = *it; writeOp->cancelWrites( &why ); } }
void BatchWriteOp::noteBatchError(const TargetedWriteBatch& targetedBatch, const WriteErrorDetail& error) { // Treat errors to get a batch response as failures of the contained writes BatchedCommandResponse emulatedResponse; toWriteErrorResponse( error, _clientRequest->getOrdered(), targetedBatch.getWrites().size(), &emulatedResponse); noteBatchResponse(targetedBatch, emulatedResponse, NULL); }
// Helper function to cancel all the write ops of targeted batch. static void cancelBatch( const TargetedWriteBatch& targetedBatch, WriteOp* writeOps, const WriteErrorDetail& why ) { const vector<TargetedWrite*>& writes = targetedBatch.getWrites(); for ( vector<TargetedWrite*>::const_iterator writeIt = writes.begin(); writeIt != writes.end(); ++writeIt ) { TargetedWrite* write = *writeIt; // NOTE: We may repeatedly cancel a write op here, but that's fast. writeOps[write->writeOpRef.first].cancelWrites( &why ); } }
void BatchWriteOp::buildBatchRequest( const TargetedWriteBatch& targetedBatch, BatchedCommandRequest* request ) const { request->setNS( _clientRequest->getNS() ); request->setShardVersion( targetedBatch.getEndpoint().shardVersion ); const vector<TargetedWrite*>& targetedWrites = targetedBatch.getWrites(); for ( vector<TargetedWrite*>::const_iterator it = targetedWrites.begin(); it != targetedWrites.end(); ++it ) { const WriteOpRef& writeOpRef = ( *it )->writeOpRef; BatchedCommandRequest::BatchType batchType = _clientRequest->getBatchType(); // NOTE: We copy the batch items themselves here from the client request // TODO: This could be inefficient, maybe we want to just reference in the future if ( batchType == BatchedCommandRequest::BatchType_Insert ) { BatchedInsertRequest* clientInsertRequest = _clientRequest->getInsertRequest(); BSONObj insertDoc = clientInsertRequest->getDocumentsAt( writeOpRef.first ); request->getInsertRequest()->addToDocuments( insertDoc ); } else if ( batchType == BatchedCommandRequest::BatchType_Update ) { BatchedUpdateRequest* clientUpdateRequest = _clientRequest->getUpdateRequest(); BatchedUpdateDocument* updateDoc = new BatchedUpdateDocument; clientUpdateRequest->getUpdatesAt( writeOpRef.first )->cloneTo( updateDoc ); request->getUpdateRequest()->addToUpdates( updateDoc ); } else { dassert( batchType == BatchedCommandRequest::BatchType_Delete ); BatchedDeleteRequest* clientDeleteRequest = _clientRequest->getDeleteRequest(); BatchedDeleteDocument* deleteDoc = new BatchedDeleteDocument; clientDeleteRequest->getDeletesAt( writeOpRef.first )->cloneTo( deleteDoc ); request->getDeleteRequest()->addToDeletes( deleteDoc ); } // TODO: We can add logic here to allow aborting individual ops //if ( NULL == response ) { // ->responses.erase( it++ ); // continue; //} } if ( _clientRequest->isWriteConcernSet() ) { request->setWriteConcern( _clientRequest->getWriteConcern() ); } if ( _clientRequest->isContinueOnErrorSet() ) { request->setContinueOnError( _clientRequest->getContinueOnError() ); } request->setSession( 0 ); }
void BatchWriteOp::noteBatchResponse(const TargetedWriteBatch& targetedBatch, const BatchedCommandResponse& response, TrackedErrors* trackedErrors) { if (!response.getOk()) { WriteErrorDetail error; cloneCommandErrorTo(response, &error); // Treat command errors exactly like other failures of the batch // Note that no errors will be tracked from these failures - as-designed noteBatchError(targetedBatch, error); return; } dassert(response.getOk()); // Stop tracking targeted batch _targeted.erase(&targetedBatch); // Increment stats for this batch incBatchStats(_clientRequest->getBatchType(), response, _stats.get()); // // Assign errors to particular items. // Write Concern errors are stored and handled later. // // Special handling for write concern errors, save for later if (response.isWriteConcernErrorSet()) { unique_ptr<ShardWCError> wcError( new ShardWCError(targetedBatch.getEndpoint(), *response.getWriteConcernError())); _wcErrors.mutableVector().push_back(wcError.release()); } vector<WriteErrorDetail*> itemErrors; // Handle batch and per-item errors if (response.isErrDetailsSet()) { // Per-item errors were set itemErrors.insert( itemErrors.begin(), response.getErrDetails().begin(), response.getErrDetails().end()); // Sort per-item errors by index std::sort(itemErrors.begin(), itemErrors.end(), WriteErrorDetailComp()); } // // Go through all pending responses of the op and sorted remote reponses, populate errors // This will either set all errors to the batch error or apply per-item errors as-needed // // If the batch is ordered, cancel all writes after the first error for retargeting. // bool ordered = _clientRequest->getOrdered(); vector<WriteErrorDetail*>::iterator itemErrorIt = itemErrors.begin(); int index = 0; WriteErrorDetail* lastError = NULL; for (vector<TargetedWrite*>::const_iterator it = targetedBatch.getWrites().begin(); it != targetedBatch.getWrites().end(); ++it, ++index) { const TargetedWrite* write = *it; WriteOp& writeOp = _writeOps[write->writeOpRef.first]; dassert(writeOp.getWriteState() == WriteOpState_Pending); // See if we have an error for the write WriteErrorDetail* writeError = NULL; if (itemErrorIt != itemErrors.end() && (*itemErrorIt)->getIndex() == index) { // We have an per-item error for this write op's index writeError = *itemErrorIt; ++itemErrorIt; } // Finish the response (with error, if needed) if (NULL == writeError) { if (!ordered || !lastError) { writeOp.noteWriteComplete(*write); } else { // We didn't actually apply this write - cancel so we can retarget dassert(writeOp.getNumTargeted() == 1u); writeOp.cancelWrites(lastError); } } else { writeOp.noteWriteError(*write, *writeError); lastError = writeError; } } // Track errors we care about, whether batch or individual errors if (NULL != trackedErrors) { trackErrors(targetedBatch.getEndpoint(), itemErrors, trackedErrors); } // Track upserted ids if we need to if (response.isUpsertDetailsSet()) { const vector<BatchedUpsertDetail*>& upsertedIds = response.getUpsertDetails(); for (vector<BatchedUpsertDetail*>::const_iterator it = upsertedIds.begin(); it != upsertedIds.end(); ++it) { // The child upserted details don't have the correct index for the full batch const BatchedUpsertDetail* childUpsertedId = *it; // Work backward from the child batch item index to the batch item index int childBatchIndex = childUpsertedId->getIndex(); int batchIndex = targetedBatch.getWrites()[childBatchIndex]->writeOpRef.first; // Push the upserted id with the correct index into the batch upserted ids BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail; upsertedId->setIndex(batchIndex); upsertedId->setUpsertedID(childUpsertedId->getUpsertedID()); _upsertedIds.mutableVector().push_back(upsertedId); } } }
Status BatchWriteOp::targetBatch(OperationContext* txn, const NSTargeter& targeter, bool recordTargetErrors, vector<TargetedWriteBatch*>* targetedBatches) { // // Targeting of unordered batches is fairly simple - each remaining write op is targeted, // and each of those targeted writes are grouped into a batch for a particular shard // endpoint. // // Targeting of ordered batches is a bit more complex - to respect the ordering of the // batch, we can only send: // A) a single targeted batch to one shard endpoint // B) multiple targeted batches, but only containing targeted writes for a single write op // // This means that any multi-shard write operation must be targeted and sent one-by-one. // Subsequent single-shard write operations can be batched together if they go to the same // place. // // Ex: ShardA : { skey : a->k }, ShardB : { skey : k->z } // // Ordered insert batch of: [{ skey : a }, { skey : b }, { skey : x }] // broken into: // [{ skey : a }, { skey : b }], // [{ skey : x }] // // Ordered update Batch of : // [{ skey : a }{ $push }, // { skey : b }{ $push }, // { skey : [c, x] }{ $push }, // { skey : y }{ $push }, // { skey : z }{ $push }] // broken into: // [{ skey : a }, { skey : b }], // [{ skey : [c,x] }], // [{ skey : y }, { skey : z }] // const bool ordered = _clientRequest->getOrdered(); TargetedBatchMap batchMap; TargetedBatchSizeMap batchSizes; int numTargetErrors = 0; size_t numWriteOps = _clientRequest->sizeWriteOps(); for (size_t i = 0; i < numWriteOps; ++i) { WriteOp& writeOp = _writeOps[i]; // Only target _Ready ops if (writeOp.getWriteState() != WriteOpState_Ready) continue; // // Get TargetedWrites from the targeter for the write operation // // TargetedWrites need to be owned once returned OwnedPointerVector<TargetedWrite> writesOwned; vector<TargetedWrite*>& writes = writesOwned.mutableVector(); Status targetStatus = writeOp.targetWrites(txn, targeter, &writes); if (!targetStatus.isOK()) { WriteErrorDetail targetError; buildTargetError(targetStatus, &targetError); if (!recordTargetErrors) { // Cancel current batch state with an error cancelBatches(targetError, _writeOps, &batchMap); dassert(batchMap.empty()); return targetStatus; } else if (!ordered || batchMap.empty()) { // Record an error for this batch writeOp.setOpError(targetError); ++numTargetErrors; if (ordered) return Status::OK(); continue; } else { dassert(ordered && !batchMap.empty()); // Send out what we have, but don't record an error yet, since there may be an // error in the writes before this point. writeOp.cancelWrites(&targetError); break; } } // // If ordered and we have a previous endpoint, make sure we don't need to send these // targeted writes to any other endpoints. // if (ordered && !batchMap.empty()) { dassert(batchMap.size() == 1u); if (isNewBatchRequired(writes, batchMap)) { writeOp.cancelWrites(NULL); break; } } // // If this write will push us over some sort of size limit, stop targeting // int writeSizeBytes = getWriteSizeBytes(writeOp); if (wouldMakeBatchesTooBig(writes, writeSizeBytes, batchSizes)) { invariant(!batchMap.empty()); writeOp.cancelWrites(NULL); break; } // // Targeting went ok, add to appropriate TargetedBatch // for (vector<TargetedWrite*>::iterator it = writes.begin(); it != writes.end(); ++it) { TargetedWrite* write = *it; TargetedBatchMap::iterator batchIt = batchMap.find(&write->endpoint); TargetedBatchSizeMap::iterator batchSizeIt = batchSizes.find(&write->endpoint); if (batchIt == batchMap.end()) { TargetedWriteBatch* newBatch = new TargetedWriteBatch(write->endpoint); batchIt = batchMap.insert(make_pair(&newBatch->getEndpoint(), newBatch)).first; batchSizeIt = batchSizes.insert(make_pair(&newBatch->getEndpoint(), BatchSize())).first; } TargetedWriteBatch* batch = batchIt->second; BatchSize& batchSize = batchSizeIt->second; ++batchSize.numOps; batchSize.sizeBytes += writeSizeBytes; batch->addWrite(write); } // Relinquish ownership of TargetedWrites, now the TargetedBatches own them writesOwned.mutableVector().clear(); // // Break if we're ordered and we have more than one endpoint - later writes cannot be // enforced as ordered across multiple shard endpoints. // if (ordered && batchMap.size() > 1u) break; } // // Send back our targeted batches // for (TargetedBatchMap::iterator it = batchMap.begin(); it != batchMap.end(); ++it) { TargetedWriteBatch* batch = it->second; if (batch->getWrites().empty()) continue; // Remember targeted batch for reporting _targeted.insert(batch); // Send the handle back to caller targetedBatches->push_back(batch); } return Status::OK(); }
void BatchWriteOp::noteBatchResponse( const TargetedWriteBatch& targetedBatch, const BatchedCommandResponse& response, TrackedErrors* trackedErrors ) { // // Organize errors based on error code. // We may have *either* a batch error or errors per-item. // (Write Concern errors are stored and handled later.) // vector<BatchedErrorDetail*> itemErrors; scoped_ptr<BatchedErrorDetail> batchError; if ( !response.getOk() ) { int errCode = response.getErrCode(); bool isWCError = isWCErrCode( errCode ); // Special handling for write concern errors, save for later if ( isWCError ) { BatchedErrorDetail error; cloneBatchErrorTo( response, &error ); ShardError* wcError = new ShardError( targetedBatch.getEndpoint(), error ); _wcErrors.mutableVector().push_back( wcError ); } // Handle batch and per-item errors if ( response.isErrDetailsSet() ) { // Per-item errors were set itemErrors.insert( itemErrors.begin(), response.getErrDetails().begin(), response.getErrDetails().end() ); // Sort per-item errors by index std::sort( itemErrors.begin(), itemErrors.end(), BatchedErrorDetailComp() ); } else if ( !isWCError ) { // Per-item errors were not set and this error is not a WC error // => this is a full-batch error batchError.reset( new BatchedErrorDetail ); cloneBatchErrorTo( response, batchError.get() ); } } // We can't have both a batch error and per-item errors dassert( !( batchError && !itemErrors.empty() ) ); // // Go through all pending responses of the op and sorted remote reponses, populate errors // This will either set all errors to the batch error or apply per-item errors as-needed // vector<BatchedErrorDetail*>::iterator itemErrorIt = itemErrors.begin(); int index = 0; for ( vector<TargetedWrite*>::const_iterator it = targetedBatch.getWrites().begin(); it != targetedBatch.getWrites().end(); ++it, ++index ) { const TargetedWrite* write = *it; WriteOp& writeOp = _writeOps[write->writeOpRef.first]; dassert( writeOp.getWriteState() == WriteOpState_Pending ); // See if we have an error for the write BatchedErrorDetail* writeError = NULL; if ( batchError ) { // Default to batch error, if it exists writeError = batchError.get(); } else if ( itemErrorIt != itemErrors.end() && ( *itemErrorIt )->getIndex() == index ) { // We have an per-item error for this write op's index writeError = *itemErrorIt; ++itemErrorIt; } // Finish the response (with error, if needed) if ( NULL == writeError ) { writeOp.noteWriteComplete( *write ); } else { writeOp.noteWriteError( *write, *writeError ); } } // Track errors we care about, whether batch or individual errors if ( NULL != trackedErrors ) { trackErrors( targetedBatch.getEndpoint(), batchError.get(), itemErrors, trackedErrors ); } // Stop tracking targeted batch _targeted.erase( &targetedBatch ); }