Example #1
0
// Helper function to cancel all the write ops of targeted batches in a map
static void cancelBatches(const WriteErrorDetail& why,
                          WriteOp* writeOps,
                          TargetedBatchMap* batchMap) {
    set<WriteOp*> targetedWriteOps;

    // Collect all the writeOps that are currently targeted
    for (TargetedBatchMap::iterator it = batchMap->begin(); it != batchMap->end();) {
        TargetedWriteBatch* batch = it->second;
        const vector<TargetedWrite*>& writes = batch->getWrites();

        for (vector<TargetedWrite*>::const_iterator writeIt = writes.begin();
             writeIt != writes.end();
             ++writeIt) {
            TargetedWrite* write = *writeIt;

            // NOTE: We may repeatedly cancel a write op here, but that's fast and we want to
            // cancel before erasing the TargetedWrite* (which owns the cancelled targeting
            // info) for reporting reasons.
            writeOps[write->writeOpRef.first].cancelWrites(&why);
        }

        // Note that we need to *erase* first, *then* delete, since the map keys are ptrs from
        // the values
        batchMap->erase(it++);
        delete batch;
    }
    batchMap->clear();
}
Example #2
0
void BatchWriteOp::buildBatchRequest(const TargetedWriteBatch& targetedBatch,
                                     BatchedCommandRequest* request) const {
    request->setNS(_clientRequest->getNS());
    request->setShouldBypassValidation(_clientRequest->shouldBypassValidation());

    const vector<TargetedWrite*>& targetedWrites = targetedBatch.getWrites();

    for (vector<TargetedWrite*>::const_iterator it = targetedWrites.begin();
         it != targetedWrites.end();
         ++it) {
        const WriteOpRef& writeOpRef = (*it)->writeOpRef;
        BatchedCommandRequest::BatchType batchType = _clientRequest->getBatchType();

        // NOTE:  We copy the batch items themselves here from the client request
        // TODO: This could be inefficient, maybe we want to just reference in the future
        if (batchType == BatchedCommandRequest::BatchType_Insert) {
            BatchedInsertRequest* clientInsertRequest = _clientRequest->getInsertRequest();
            BSONObj insertDoc = clientInsertRequest->getDocumentsAt(writeOpRef.first);
            request->getInsertRequest()->addToDocuments(insertDoc);
        } else if (batchType == BatchedCommandRequest::BatchType_Update) {
            BatchedUpdateRequest* clientUpdateRequest = _clientRequest->getUpdateRequest();
            BatchedUpdateDocument* updateDoc = new BatchedUpdateDocument;
            clientUpdateRequest->getUpdatesAt(writeOpRef.first)->cloneTo(updateDoc);
            request->getUpdateRequest()->addToUpdates(updateDoc);
        } else {
            dassert(batchType == BatchedCommandRequest::BatchType_Delete);
            BatchedDeleteRequest* clientDeleteRequest = _clientRequest->getDeleteRequest();
            BatchedDeleteDocument* deleteDoc = new BatchedDeleteDocument;
            clientDeleteRequest->getDeletesAt(writeOpRef.first)->cloneTo(deleteDoc);
            request->getDeleteRequest()->addToDeletes(deleteDoc);
        }

        // TODO: We can add logic here to allow aborting individual ops
        // if ( NULL == response ) {
        //    ->responses.erase( it++ );
        //    continue;
        //}
    }

    if (_clientRequest->isWriteConcernSet()) {
        if (_clientRequest->isVerboseWC()) {
            request->setWriteConcern(_clientRequest->getWriteConcern());
        } else {
            // Mongos needs to send to the shard with w > 0 so it will be able to
            // see the writeErrors.
            request->setWriteConcern(upgradeWriteConcern(_clientRequest->getWriteConcern()));
        }
    }

    if (!request->isOrderedSet()) {
        request->setOrdered(_clientRequest->getOrdered());
    }

    unique_ptr<BatchedRequestMetadata> requestMetadata(new BatchedRequestMetadata());
    requestMetadata->setShardVersion(
        ChunkVersionAndOpTime(targetedBatch.getEndpoint().shardVersion));
    requestMetadata->setSession(0);

    request->setMetadata(requestMetadata.release());
}
Example #3
0
    // Helper function to cancel all the write ops of targeted batches in a map
    static void cancelBatches( const BatchedErrorDetail& why,
                               WriteOp* writeOps,
                               TargetedBatchMap* batchMap ) {

        set<WriteOp*> targetedWriteOps;

        // Collect all the writeOps that are currently targeted
        for ( TargetedBatchMap::iterator it = batchMap->begin(); it != batchMap->end(); ) {

            TargetedWriteBatch* batch = it->second;
            const vector<TargetedWrite*>& writes = batch->getWrites();

            for ( vector<TargetedWrite*>::const_iterator writeIt = writes.begin();
                writeIt != writes.end(); ++writeIt ) {

                TargetedWrite* write = *writeIt;
                targetedWriteOps.insert( &writeOps[write->writeOpRef.first] );
            }

            // Note that we need to *erase* first, *then* delete, since the map keys are ptrs from
            // the values
            batchMap->erase( it++ );
            delete batch;
        }
        batchMap->clear();

        // Cancel all the write ops we found above
        for ( set<WriteOp*>::iterator it = targetedWriteOps.begin(); it != targetedWriteOps.end();
            ++it ) {
            WriteOp* writeOp = *it;
            writeOp->cancelWrites( &why );
        }
    }
Example #4
0
void BatchWriteOp::noteBatchError(const TargetedWriteBatch& targetedBatch,
                                  const WriteErrorDetail& error) {
    // Treat errors to get a batch response as failures of the contained writes
    BatchedCommandResponse emulatedResponse;
    toWriteErrorResponse(
        error, _clientRequest->getOrdered(), targetedBatch.getWrites().size(), &emulatedResponse);

    noteBatchResponse(targetedBatch, emulatedResponse, NULL);
}
Example #5
0
    // Helper function to cancel all the write ops of targeted batch.
    static void cancelBatch( const TargetedWriteBatch& targetedBatch,
                             WriteOp* writeOps,
                             const WriteErrorDetail& why ) {
        const vector<TargetedWrite*>& writes = targetedBatch.getWrites();

        for ( vector<TargetedWrite*>::const_iterator writeIt = writes.begin();
            writeIt != writes.end(); ++writeIt ) {

            TargetedWrite* write = *writeIt;
            // NOTE: We may repeatedly cancel a write op here, but that's fast.
            writeOps[write->writeOpRef.first].cancelWrites( &why );
        }
    }
Example #6
0
    void BatchWriteOp::buildBatchRequest( const TargetedWriteBatch& targetedBatch,
                                          BatchedCommandRequest* request ) const {

        request->setNS( _clientRequest->getNS() );
        request->setShardVersion( targetedBatch.getEndpoint().shardVersion );

        const vector<TargetedWrite*>& targetedWrites = targetedBatch.getWrites();

        for ( vector<TargetedWrite*>::const_iterator it = targetedWrites.begin();
            it != targetedWrites.end(); ++it ) {

            const WriteOpRef& writeOpRef = ( *it )->writeOpRef;
            BatchedCommandRequest::BatchType batchType = _clientRequest->getBatchType();

            // NOTE:  We copy the batch items themselves here from the client request
            // TODO: This could be inefficient, maybe we want to just reference in the future
            if ( batchType == BatchedCommandRequest::BatchType_Insert ) {
                BatchedInsertRequest* clientInsertRequest = _clientRequest->getInsertRequest();
                BSONObj insertDoc = clientInsertRequest->getDocumentsAt( writeOpRef.first );
                request->getInsertRequest()->addToDocuments( insertDoc );
            }
            else if ( batchType == BatchedCommandRequest::BatchType_Update ) {
                BatchedUpdateRequest* clientUpdateRequest = _clientRequest->getUpdateRequest();
                BatchedUpdateDocument* updateDoc = new BatchedUpdateDocument;
                clientUpdateRequest->getUpdatesAt( writeOpRef.first )->cloneTo( updateDoc );
                request->getUpdateRequest()->addToUpdates( updateDoc );
            }
            else {
                dassert( batchType == BatchedCommandRequest::BatchType_Delete );
                BatchedDeleteRequest* clientDeleteRequest = _clientRequest->getDeleteRequest();
                BatchedDeleteDocument* deleteDoc = new BatchedDeleteDocument;
                clientDeleteRequest->getDeletesAt( writeOpRef.first )->cloneTo( deleteDoc );
                request->getDeleteRequest()->addToDeletes( deleteDoc );
            }

            // TODO: We can add logic here to allow aborting individual ops
            //if ( NULL == response ) {
            //    ->responses.erase( it++ );
            //    continue;
            //}
        }

        if ( _clientRequest->isWriteConcernSet() ) {
            request->setWriteConcern( _clientRequest->getWriteConcern() );
        }
        if ( _clientRequest->isContinueOnErrorSet() ) {
            request->setContinueOnError( _clientRequest->getContinueOnError() );
        }
        request->setSession( 0 );
    }
Example #7
0
void BatchWriteOp::noteBatchResponse(const TargetedWriteBatch& targetedBatch,
                                     const BatchedCommandResponse& response,
                                     TrackedErrors* trackedErrors) {
    if (!response.getOk()) {
        WriteErrorDetail error;
        cloneCommandErrorTo(response, &error);

        // Treat command errors exactly like other failures of the batch
        // Note that no errors will be tracked from these failures - as-designed
        noteBatchError(targetedBatch, error);
        return;
    }

    dassert(response.getOk());

    // Stop tracking targeted batch
    _targeted.erase(&targetedBatch);

    // Increment stats for this batch
    incBatchStats(_clientRequest->getBatchType(), response, _stats.get());

    //
    // Assign errors to particular items.
    // Write Concern errors are stored and handled later.
    //

    // Special handling for write concern errors, save for later
    if (response.isWriteConcernErrorSet()) {
        unique_ptr<ShardWCError> wcError(
            new ShardWCError(targetedBatch.getEndpoint(), *response.getWriteConcernError()));
        _wcErrors.mutableVector().push_back(wcError.release());
    }

    vector<WriteErrorDetail*> itemErrors;

    // Handle batch and per-item errors
    if (response.isErrDetailsSet()) {
        // Per-item errors were set
        itemErrors.insert(
            itemErrors.begin(), response.getErrDetails().begin(), response.getErrDetails().end());

        // Sort per-item errors by index
        std::sort(itemErrors.begin(), itemErrors.end(), WriteErrorDetailComp());
    }

    //
    // Go through all pending responses of the op and sorted remote reponses, populate errors
    // This will either set all errors to the batch error or apply per-item errors as-needed
    //
    // If the batch is ordered, cancel all writes after the first error for retargeting.
    //

    bool ordered = _clientRequest->getOrdered();

    vector<WriteErrorDetail*>::iterator itemErrorIt = itemErrors.begin();
    int index = 0;
    WriteErrorDetail* lastError = NULL;
    for (vector<TargetedWrite*>::const_iterator it = targetedBatch.getWrites().begin();
         it != targetedBatch.getWrites().end();
         ++it, ++index) {
        const TargetedWrite* write = *it;
        WriteOp& writeOp = _writeOps[write->writeOpRef.first];

        dassert(writeOp.getWriteState() == WriteOpState_Pending);

        // See if we have an error for the write
        WriteErrorDetail* writeError = NULL;

        if (itemErrorIt != itemErrors.end() && (*itemErrorIt)->getIndex() == index) {
            // We have an per-item error for this write op's index
            writeError = *itemErrorIt;
            ++itemErrorIt;
        }

        // Finish the response (with error, if needed)
        if (NULL == writeError) {
            if (!ordered || !lastError) {
                writeOp.noteWriteComplete(*write);
            } else {
                // We didn't actually apply this write - cancel so we can retarget
                dassert(writeOp.getNumTargeted() == 1u);
                writeOp.cancelWrites(lastError);
            }
        } else {
            writeOp.noteWriteError(*write, *writeError);
            lastError = writeError;
        }
    }

    // Track errors we care about, whether batch or individual errors
    if (NULL != trackedErrors) {
        trackErrors(targetedBatch.getEndpoint(), itemErrors, trackedErrors);
    }

    // Track upserted ids if we need to
    if (response.isUpsertDetailsSet()) {
        const vector<BatchedUpsertDetail*>& upsertedIds = response.getUpsertDetails();
        for (vector<BatchedUpsertDetail*>::const_iterator it = upsertedIds.begin();
             it != upsertedIds.end();
             ++it) {
            // The child upserted details don't have the correct index for the full batch
            const BatchedUpsertDetail* childUpsertedId = *it;

            // Work backward from the child batch item index to the batch item index
            int childBatchIndex = childUpsertedId->getIndex();
            int batchIndex = targetedBatch.getWrites()[childBatchIndex]->writeOpRef.first;

            // Push the upserted id with the correct index into the batch upserted ids
            BatchedUpsertDetail* upsertedId = new BatchedUpsertDetail;
            upsertedId->setIndex(batchIndex);
            upsertedId->setUpsertedID(childUpsertedId->getUpsertedID());
            _upsertedIds.mutableVector().push_back(upsertedId);
        }
    }
}
Example #8
0
Status BatchWriteOp::targetBatch(OperationContext* txn,
                                 const NSTargeter& targeter,
                                 bool recordTargetErrors,
                                 vector<TargetedWriteBatch*>* targetedBatches) {
    //
    // Targeting of unordered batches is fairly simple - each remaining write op is targeted,
    // and each of those targeted writes are grouped into a batch for a particular shard
    // endpoint.
    //
    // Targeting of ordered batches is a bit more complex - to respect the ordering of the
    // batch, we can only send:
    // A) a single targeted batch to one shard endpoint
    // B) multiple targeted batches, but only containing targeted writes for a single write op
    //
    // This means that any multi-shard write operation must be targeted and sent one-by-one.
    // Subsequent single-shard write operations can be batched together if they go to the same
    // place.
    //
    // Ex: ShardA : { skey : a->k }, ShardB : { skey : k->z }
    //
    // Ordered insert batch of: [{ skey : a }, { skey : b }, { skey : x }]
    // broken into:
    //  [{ skey : a }, { skey : b }],
    //  [{ skey : x }]
    //
    // Ordered update Batch of :
    //  [{ skey : a }{ $push },
    //   { skey : b }{ $push },
    //   { skey : [c, x] }{ $push },
    //   { skey : y }{ $push },
    //   { skey : z }{ $push }]
    // broken into:
    //  [{ skey : a }, { skey : b }],
    //  [{ skey : [c,x] }],
    //  [{ skey : y }, { skey : z }]
    //

    const bool ordered = _clientRequest->getOrdered();

    TargetedBatchMap batchMap;
    TargetedBatchSizeMap batchSizes;

    int numTargetErrors = 0;

    size_t numWriteOps = _clientRequest->sizeWriteOps();
    for (size_t i = 0; i < numWriteOps; ++i) {
        WriteOp& writeOp = _writeOps[i];

        // Only target _Ready ops
        if (writeOp.getWriteState() != WriteOpState_Ready)
            continue;

        //
        // Get TargetedWrites from the targeter for the write operation
        //

        // TargetedWrites need to be owned once returned
        OwnedPointerVector<TargetedWrite> writesOwned;
        vector<TargetedWrite*>& writes = writesOwned.mutableVector();

        Status targetStatus = writeOp.targetWrites(txn, targeter, &writes);

        if (!targetStatus.isOK()) {
            WriteErrorDetail targetError;
            buildTargetError(targetStatus, &targetError);

            if (!recordTargetErrors) {
                // Cancel current batch state with an error

                cancelBatches(targetError, _writeOps, &batchMap);
                dassert(batchMap.empty());
                return targetStatus;
            } else if (!ordered || batchMap.empty()) {
                // Record an error for this batch

                writeOp.setOpError(targetError);
                ++numTargetErrors;

                if (ordered)
                    return Status::OK();

                continue;
            } else {
                dassert(ordered && !batchMap.empty());

                // Send out what we have, but don't record an error yet, since there may be an
                // error in the writes before this point.

                writeOp.cancelWrites(&targetError);
                break;
            }
        }

        //
        // If ordered and we have a previous endpoint, make sure we don't need to send these
        // targeted writes to any other endpoints.
        //

        if (ordered && !batchMap.empty()) {
            dassert(batchMap.size() == 1u);
            if (isNewBatchRequired(writes, batchMap)) {
                writeOp.cancelWrites(NULL);
                break;
            }
        }

        //
        // If this write will push us over some sort of size limit, stop targeting
        //

        int writeSizeBytes = getWriteSizeBytes(writeOp);
        if (wouldMakeBatchesTooBig(writes, writeSizeBytes, batchSizes)) {
            invariant(!batchMap.empty());
            writeOp.cancelWrites(NULL);
            break;
        }

        //
        // Targeting went ok, add to appropriate TargetedBatch
        //

        for (vector<TargetedWrite*>::iterator it = writes.begin(); it != writes.end(); ++it) {
            TargetedWrite* write = *it;

            TargetedBatchMap::iterator batchIt = batchMap.find(&write->endpoint);
            TargetedBatchSizeMap::iterator batchSizeIt = batchSizes.find(&write->endpoint);

            if (batchIt == batchMap.end()) {
                TargetedWriteBatch* newBatch = new TargetedWriteBatch(write->endpoint);
                batchIt = batchMap.insert(make_pair(&newBatch->getEndpoint(), newBatch)).first;
                batchSizeIt =
                    batchSizes.insert(make_pair(&newBatch->getEndpoint(), BatchSize())).first;
            }

            TargetedWriteBatch* batch = batchIt->second;
            BatchSize& batchSize = batchSizeIt->second;

            ++batchSize.numOps;
            batchSize.sizeBytes += writeSizeBytes;
            batch->addWrite(write);
        }

        // Relinquish ownership of TargetedWrites, now the TargetedBatches own them
        writesOwned.mutableVector().clear();

        //
        // Break if we're ordered and we have more than one endpoint - later writes cannot be
        // enforced as ordered across multiple shard endpoints.
        //

        if (ordered && batchMap.size() > 1u)
            break;
    }

    //
    // Send back our targeted batches
    //

    for (TargetedBatchMap::iterator it = batchMap.begin(); it != batchMap.end(); ++it) {
        TargetedWriteBatch* batch = it->second;

        if (batch->getWrites().empty())
            continue;

        // Remember targeted batch for reporting
        _targeted.insert(batch);
        // Send the handle back to caller
        targetedBatches->push_back(batch);
    }

    return Status::OK();
}
Example #9
0
    void BatchWriteOp::noteBatchResponse( const TargetedWriteBatch& targetedBatch,
                                          const BatchedCommandResponse& response,
                                          TrackedErrors* trackedErrors ) {

        //
        // Organize errors based on error code.
        // We may have *either* a batch error or errors per-item.
        // (Write Concern errors are stored and handled later.)
        //

        vector<BatchedErrorDetail*> itemErrors;
        scoped_ptr<BatchedErrorDetail> batchError;

        if ( !response.getOk() ) {

            int errCode = response.getErrCode();
            bool isWCError = isWCErrCode( errCode );

            // Special handling for write concern errors, save for later
            if ( isWCError ) {
                BatchedErrorDetail error;
                cloneBatchErrorTo( response, &error );
                ShardError* wcError = new ShardError( targetedBatch.getEndpoint(), error );
                _wcErrors.mutableVector().push_back( wcError );
            }

            // Handle batch and per-item errors
            if ( response.isErrDetailsSet() ) {

                // Per-item errors were set
                itemErrors.insert( itemErrors.begin(),
                                   response.getErrDetails().begin(),
                                   response.getErrDetails().end() );

                // Sort per-item errors by index
                std::sort( itemErrors.begin(), itemErrors.end(), BatchedErrorDetailComp() );
            }
            else if ( !isWCError ) {

                // Per-item errors were not set and this error is not a WC error
                // => this is a full-batch error
                batchError.reset( new BatchedErrorDetail );
                cloneBatchErrorTo( response, batchError.get() );
            }
        }

        // We can't have both a batch error and per-item errors
        dassert( !( batchError && !itemErrors.empty() ) );

        //
        // Go through all pending responses of the op and sorted remote reponses, populate errors
        // This will either set all errors to the batch error or apply per-item errors as-needed
        //

        vector<BatchedErrorDetail*>::iterator itemErrorIt = itemErrors.begin();
        int index = 0;
        for ( vector<TargetedWrite*>::const_iterator it = targetedBatch.getWrites().begin();
            it != targetedBatch.getWrites().end(); ++it, ++index ) {

            const TargetedWrite* write = *it;
            WriteOp& writeOp = _writeOps[write->writeOpRef.first];

            dassert( writeOp.getWriteState() == WriteOpState_Pending );

            // See if we have an error for the write
            BatchedErrorDetail* writeError = NULL;

            if ( batchError ) {
                // Default to batch error, if it exists
                writeError = batchError.get();
            }
            else if ( itemErrorIt != itemErrors.end() && ( *itemErrorIt )->getIndex() == index ) {
                // We have an per-item error for this write op's index
                writeError = *itemErrorIt;
                ++itemErrorIt;
            }

            // Finish the response (with error, if needed)
            if ( NULL == writeError ) {
                writeOp.noteWriteComplete( *write );
            }
            else {
                writeOp.noteWriteError( *write, *writeError );
            }
        }

        // Track errors we care about, whether batch or individual errors
        if ( NULL != trackedErrors ) {
            trackErrors( targetedBatch.getEndpoint(), batchError.get(), itemErrors, trackedErrors );
        }

        // Stop tracking targeted batch
        _targeted.erase( &targetedBatch );
    }