コード例 #1
0
ファイル: cluster_write.cpp プロジェクト: wzqtony/mongo
void ClusterWriter::write(OperationContext* opCtx,
                          const BatchedCommandRequest& request,
                          BatchWriteExecStats* stats,
                          BatchedCommandResponse* response) {
    const NamespaceString& nss = request.getNS();

    LastError::Disabled disableLastError(&LastError::get(opCtx->getClient()));

    // Config writes and shard writes are done differently
    if (nss.db() == NamespaceString::kConfigDb || nss.db() == NamespaceString::kAdminDb) {
        Grid::get(opCtx)->catalogClient()->writeConfigServerDirect(opCtx, request, response);
    } else {
        TargeterStats targeterStats;

        {
            ChunkManagerTargeter targeter(request.getTargetingNS(), &targeterStats);

            Status targetInitStatus = targeter.init(opCtx);
            if (!targetInitStatus.isOK()) {
                toBatchError({targetInitStatus.code(),
                              str::stream() << "unable to target"
                                            << (request.isInsertIndexRequest() ? " index" : "")
                                            << " write op for collection "
                                            << request.getTargetingNS().ns()
                                            << causedBy(targetInitStatus)},
                             response);
                return;
            }

            BatchWriteExec::executeBatch(opCtx, targeter, request, response, stats);
        }

        splitIfNeeded(opCtx, request.getNS(), targeterStats);
    }
}
コード例 #2
0
ファイル: cluster_write.cpp プロジェクト: 504com/mongo
    void ClusterWriter::shardWrite( const BatchedCommandRequest& request,
                                    BatchedCommandResponse* response ) {

        ChunkManagerTargeter targeter;
        Status targetInitStatus = targeter.init( NamespaceString( request.getTargetingNS() ) );

        if ( !targetInitStatus.isOK() ) {

            warning() << "could not initialize targeter for"
                      << ( request.isInsertIndexRequest() ? " index" : "" )
                      << " write op in collection " << request.getTargetingNS() << endl;

            // Errors will be reported in response if we are unable to target
        }

        DBClientShardResolver resolver;
        DBClientMultiCommand dispatcher;
        BatchWriteExec exec( &targeter, &resolver, &dispatcher );
        exec.executeBatch( request, response );

        if ( _autoSplit )
            splitIfNeeded( request.getNS(), *targeter.getStats() );

        _stats->setShardStats( exec.releaseStats() );
    }
コード例 #3
0
    void clusterWrite( const BatchedCommandRequest& request,
                       BatchedCommandResponse* response,
                       bool autoSplit ) {

        // App-level validation of a create index insert
        if ( request.isInsertIndexRequest() ) {
            if ( request.sizeWriteOps() != 1 || request.isWriteConcernSet() ) {

                // Invalid request to create index
                response->setOk( false );
                response->setErrCode( ErrorCodes::InvalidOptions );
                response->setErrMessage( "invalid batch request for index creation" );

                dassert( response->isValid( NULL ) );
                return;
            }
        }

        // Config writes and shard writes are done differently
        string dbName = NamespaceString( request.getNS() ).db().toString();
        if ( dbName == "config" || dbName == "admin" ) {

            bool verboseWC = request.isVerboseWC();

            // We only support batch sizes of one and {w:0} write concern for config writes
            if ( request.sizeWriteOps() != 1 || ( verboseWC && request.isWriteConcernSet() ) ) {
                // Invalid config server write
                response->setOk( false );
                response->setErrCode( ErrorCodes::InvalidOptions );
                response->setErrMessage( "invalid batch request for config write" );

                dassert( response->isValid( NULL ) );
                return;
            }

            // We need to support "best-effort" writes for pings to the config server.
            // {w:0} (!verbose) writes are interpreted as best-effort in this case - they may still
            // error, but do not do the initial fsync check.
            configWrite( request, response, verboseWC );
        }
        else {
            shardWrite( request, response, autoSplit );
        }
    }
コード例 #4
0
ファイル: batch_executor.cpp プロジェクト: Rockyit/mongo
    // static
    Status WriteBatchExecutor::validateBatch( const BatchedCommandRequest& request ) {

        // Validate namespace
        const NamespaceString nss = NamespaceString( request.getNS() );
        if ( !nss.isValid() ) {
            return Status( ErrorCodes::InvalidNamespace,
                           nss.ns() + " is not a valid namespace" );
        }

        // Make sure we can write to the namespace
        Status allowedStatus = userAllowedWriteNS( nss );
        if ( !allowedStatus.isOK() ) {
            return allowedStatus;
        }

        // Validate insert index requests
        // TODO: Push insert index requests through createIndex once all upgrade paths support it
        string errMsg;
        if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) {
            return Status( ErrorCodes::InvalidOptions, errMsg );
        }

        return Status::OK();
    }
コード例 #5
0
ファイル: batch_executor.cpp プロジェクト: AndrewCEmil/mongo
    void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request,
                                           BatchedCommandResponse* response ) {

        // Validate namespace
        const NamespaceString nss = NamespaceString( request.getNS() );
        if ( !nss.isValid() ) {
            toBatchError( Status( ErrorCodes::InvalidNamespace,
                                  nss.ns() + " is not a valid namespace" ),
                          response );
            return;
        }

        // Make sure we can write to the namespace
        Status allowedStatus = userAllowedWriteNS( nss );
        if ( !allowedStatus.isOK() ) {
            toBatchError( allowedStatus, response );
            return;
        }

        // Validate insert index requests
        // TODO: Push insert index requests through createIndex once all upgrade paths support it
        string errMsg;
        if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) {
            toBatchError( Status( ErrorCodes::InvalidOptions, errMsg ), response );
            return;
        }

        // Validate write concern
        // TODO: Lift write concern parsing out of this entirely
        WriteConcernOptions writeConcern;

        BSONObj wcDoc;
        if ( request.isWriteConcernSet() ) {
            wcDoc = request.getWriteConcern();
        }

        Status wcStatus = Status::OK();
        if ( wcDoc.isEmpty() ) {

            // The default write concern if empty is w : 1
            // Specifying w : 0 is/was allowed, but is interpreted identically to w : 1

            wcStatus = writeConcern.parse(
                _defaultWriteConcern.isEmpty() ?
                    WriteConcernOptions::Acknowledged : _defaultWriteConcern );

            if ( writeConcern.wNumNodes == 0 && writeConcern.wMode.empty() ) {
                writeConcern.wNumNodes = 1;
            }
        }
        else {
            wcStatus = writeConcern.parse( wcDoc );
        }

        if ( wcStatus.isOK() ) {
            wcStatus = validateWriteConcern( writeConcern );
        }

        if ( !wcStatus.isOK() ) {
            toBatchError( wcStatus, response );
            return;
        }

        if ( request.sizeWriteOps() == 0u ) {
            toBatchError( Status( ErrorCodes::InvalidLength,
                                  "no write ops were included in the batch" ),
                          response );
            return;
        }

        // Validate batch size
        if ( request.sizeWriteOps() > BatchedCommandRequest::kMaxWriteBatchSize ) {
            toBatchError( Status( ErrorCodes::InvalidLength,
                                  stream() << "exceeded maximum write batch size of "
                                           << BatchedCommandRequest::kMaxWriteBatchSize ),
                          response );
            return;
        }

        //
        // End validation
        //

        bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0
                        && writeConcern.syncMode == WriteConcernOptions::NONE;

        Timer commandTimer;

        OwnedPointerVector<WriteErrorDetail> writeErrorsOwned;
        vector<WriteErrorDetail*>& writeErrors = writeErrorsOwned.mutableVector();

        OwnedPointerVector<BatchedUpsertDetail> upsertedOwned;
        vector<BatchedUpsertDetail*>& upserted = upsertedOwned.mutableVector();

        //
        // Apply each batch item, possibly bulking some items together in the write lock.
        // Stops on error if batch is ordered.
        //

        bulkExecute( request, &upserted, &writeErrors );

        //
        // Try to enforce the write concern if everything succeeded (unordered or ordered)
        // OR if something succeeded and we're unordered.
        //

        auto_ptr<WCErrorDetail> wcError;
        bool needToEnforceWC = writeErrors.empty()
                               || ( !request.getOrdered()
                                    && writeErrors.size() < request.sizeWriteOps() );

        if ( needToEnforceWC ) {

            _client->curop()->setMessage( "waiting for write concern" );

            WriteConcernResult res;
            Status status = waitForWriteConcern( _txn, writeConcern, _client->getLastOp(), &res );

            if ( !status.isOK() ) {
                wcError.reset( toWriteConcernError( status, res ) );
            }
        }

        //
        // Refresh metadata if needed
        //

        bool staleBatch = !writeErrors.empty()
                          && writeErrors.back()->getErrCode() == ErrorCodes::StaleShardVersion;

        if ( staleBatch ) {

            const BatchedRequestMetadata* requestMetadata = request.getMetadata();
            dassert( requestMetadata );

            // Make sure our shard name is set or is the same as what was set previously
            if ( shardingState.setShardName( requestMetadata->getShardName() ) ) {

                //
                // First, we refresh metadata if we need to based on the requested version.
                //

                ChunkVersion latestShardVersion;
                shardingState.refreshMetadataIfNeeded( request.getTargetingNS(),
                                                       requestMetadata->getShardVersion(),
                                                       &latestShardVersion );

                // Report if we're still changing our metadata
                // TODO: Better reporting per-collection
                if ( shardingState.inCriticalMigrateSection() ) {
                    noteInCriticalSection( writeErrors.back() );
                }

                if ( queueForMigrationCommit ) {

                    //
                    // Queue up for migration to end - this allows us to be sure that clients will
                    // not repeatedly try to refresh metadata that is not yet written to the config
                    // server.  Not necessary for correctness.
                    // Exposed as optional parameter to allow testing of queuing behavior with
                    // different network timings.
                    //

                    const ChunkVersion& requestShardVersion = requestMetadata->getShardVersion();

                    //
                    // Only wait if we're an older version (in the current collection epoch) and
                    // we're not write compatible, implying that the current migration is affecting
                    // writes.
                    //

                    if ( requestShardVersion.isOlderThan( latestShardVersion ) &&
                         !requestShardVersion.isWriteCompatibleWith( latestShardVersion ) ) {

                        while ( shardingState.inCriticalMigrateSection() ) {

                            log() << "write request to old shard version "
                                  << requestMetadata->getShardVersion().toString()
                                  << " waiting for migration commit" << endl;

                            shardingState.waitTillNotInCriticalSection( 10 /* secs */);
                        }
                    }
                }
            }
            else {
                // If our shard name is stale, our version must have been stale as well
                dassert( writeErrors.size() == request.sizeWriteOps() );
            }
        }

        //
        // Construct response
        //

        response->setOk( true );

        if ( !silentWC ) {

            if ( upserted.size() ) {
                response->setUpsertDetails( upserted );
            }

            if ( writeErrors.size() ) {
                response->setErrDetails( writeErrors );
            }

            if ( wcError.get() ) {
                response->setWriteConcernError( wcError.release() );
            }

            const repl::ReplicationCoordinator::Mode replMode =
                    repl::getGlobalReplicationCoordinator()->getReplicationMode();
            if (replMode != repl::ReplicationCoordinator::modeNone) {
                response->setLastOp( _client->getLastOp() );
                if (replMode == repl::ReplicationCoordinator::modeReplSet) {
                    response->setElectionId(repl::theReplSet->getElectionId());
                }
            }

            // Set the stats for the response
            response->setN( _stats->numInserted + _stats->numUpserted + _stats->numMatched
                            + _stats->numDeleted );
            if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update )
                response->setNModified( _stats->numModified );
        }

        dassert( response->isValid( NULL ) );
    }
コード例 #6
0
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request,
                                      std::vector<WriteErrorDetail*>* errors ) {

    // Bulk insert is a bit different from other bulk operations in that multiple request docs
    // can be processed at once inside the write lock.

    const NamespaceString nss( request.getTargetingNS() );
    scoped_ptr<BatchItemRef> currInsertItem( new BatchItemRef( &request, 0 ) );

    // Go through our request and do some preprocessing on insert documents outside the lock to
    // validate and put them in a normalized form - i.e. put _id in front and fill in
    // timestamps.  The insert document may also be invalid.
    // TODO:  Might be more efficient to do in batches.
    vector<StatusWith<BSONObj> > normalInserts;
    normalizeInserts( request, &normalInserts );

    while ( currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) {

        WriteOpResult currResult;

        // Don't (re-)acquire locks and create database until it's necessary
        if ( !normalInserts[currInsertItem->getItemIndex()].isOK() ) {
            currResult.error =
                toWriteError( normalInserts[currInsertItem->getItemIndex()].getStatus() );
        }
        else {

            PageFaultRetryableSection pFaultSection;

            ////////////////////////////////////
            Lock::DBWrite writeLock( nss.ns() );
            ////////////////////////////////////

            // Check version inside of write lock

            if ( checkIsMasterForCollection( nss, &currResult.error )
                    && checkShardVersion( &shardingState, request, &currResult.error )
                    && checkIndexConstraints( &shardingState, request, &currResult.error ) ) {

                //
                // Get the collection for the insert
                //

                scoped_ptr<Client::Context> writeContext;
                Collection* collection = NULL;

                try {
                    // Context once we're locked, to set more details in currentOp()
                    // TODO: better constructor?
                    writeContext.reset( new Client::Context( request.getNS(),
                                        storageGlobalParams.dbpath,
                                        false /* don't check version */) );

                    Database* database = writeContext->db();
                    dassert( database );
                    collection = database->getCollection( nss.ns() );

                    if ( !collection ) {
                        // Implicitly create if it doesn't exist
                        collection = database->createCollection( nss.ns() );
                        if ( !collection ) {
                            currResult.error =
                                toWriteError( Status( ErrorCodes::InternalError,
                                                      "could not create collection" ) );
                        }
                    }
                }
                catch ( const DBException& ex ) {
                    Status status(ex.toStatus());
                    if (ErrorCodes::isInterruption(status.code())) {
                        throw;
                    }
                    currResult.error = toWriteError(status);
                }

                //
                // Perform writes inside write lock
                //

                while ( collection
                        && currInsertItem->getItemIndex()
                        < static_cast<int>( request.sizeWriteOps() ) ) {

                    //
                    // BEGIN CURRENT OP
                    //

                    scoped_ptr<CurOp> currentOp( beginCurrentOp( _client, *currInsertItem ) );
                    incOpStats( *currInsertItem );

                    // Get the actual document we want to write, assuming it's valid
                    const StatusWith<BSONObj>& normalInsert = //
                        normalInserts[currInsertItem->getItemIndex()];

                    const BSONObj& normalInsertDoc =
                        normalInsert.getValue().isEmpty() ?
                        currInsertItem->getDocument() : normalInsert.getValue();

                    if ( !normalInsert.isOK() ) {
                        // This insert failed on preprocessing
                        currResult.error = toWriteError( normalInsert.getStatus() );
                    }
                    else if ( !request.isInsertIndexRequest() ) {
                        // Try the insert
                        singleInsert( *currInsertItem,
                                      normalInsertDoc,
                                      collection,
                                      &currResult );
                    }
                    else {
                        // Try the create index
                        singleCreateIndex( *currInsertItem,
                                           normalInsertDoc,
                                           collection,
                                           &currResult );
                    }

                    //
                    // END CURRENT OP
                    //

                    finishCurrentOp( _client, currentOp.get(), currResult.error );

                    // Faults release the write lock
                    if ( currResult.fault )
                        break;

                    // In general, we might have stats and errors
                    incWriteStats( *currInsertItem,
                                   currResult.stats,
                                   currResult.error,
                                   currentOp.get() );

                    // Errors release the write lock
                    if ( currResult.error )
                        break;

                    // Increment in the write lock and reset the stats for next time
                    currInsertItem.reset( new BatchItemRef( &request,
                                                            currInsertItem->getItemIndex()
                                                            + 1 ) );
                    currResult.reset();

                    // Destruct curop so that our parent curop is restored, so that we
                    // record the yield count in the parent.
                    currentOp.reset(NULL);

                    // yield sometimes
                    int micros = ClientCursor::suggestYieldMicros();
                    if (micros > 0) {
                        ClientCursor::staticYield(micros, "", NULL);
                    }
                }
            }

        } // END WRITE LOCK

        //
        // Store the current error if it exists
        //

        if ( currResult.error ) {

            errors->push_back( currResult.releaseError() );
            errors->back()->setIndex( currInsertItem->getItemIndex() );

            // Break early for ordered batches
            if ( request.getOrdered() )
                break;
        }

        //
        // Fault or increment
        //

        if ( currResult.fault ) {
            // Check page fault out of lock
            currResult.fault->touch();
        }
        else {
            // Increment if not a fault
            currInsertItem.reset( new BatchItemRef( &request,
                                                    currInsertItem->getItemIndex() + 1 ) );
        }
    }

}
コード例 #7
0
ファイル: cluster_write.cpp プロジェクト: 504com/mongo
    void ClusterWriter::write( const BatchedCommandRequest& request,
                               BatchedCommandResponse* response ) {

        const NamespaceString nss = NamespaceString( request.getNS() );
        if ( !nss.isValid() ) {
            toBatchError( Status( ErrorCodes::InvalidNamespace,
                                  nss.ns() + " is not a valid namespace" ),
                          response );
            return;
        }

        if ( !NamespaceString::validCollectionName( nss.coll() ) ) {
            toBatchError( Status( ErrorCodes::BadValue,
                                  str::stream() << "invalid collection name " << nss.coll() ),
                          response );
            return;
        }

        if ( request.sizeWriteOps() > BatchedCommandRequest::kMaxWriteBatchSize ) {
            toBatchError( Status( ErrorCodes::FailedToParse,
                                  str::stream() << "exceeded maximum write batch size of "
                                                << BatchedCommandRequest::kMaxWriteBatchSize ),
                          response );
            return;
        }

        string errMsg;
        if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) {
            toBatchError( Status( ErrorCodes::InvalidOptions, errMsg ), response );
            return;
        }

        // Config writes and shard writes are done differently
        string dbName = nss.db().toString();
        if ( dbName == "config" || dbName == "admin" ) {

            bool verboseWC = request.isVerboseWC();

            // We only support batch sizes of one for config writes
            if ( request.sizeWriteOps() != 1 ) {
                toBatchError( Status( ErrorCodes::InvalidOptions,
                                      mongoutils::str::stream() << "Writes to config servers must "
                                              "have batch size of 1, found "
                                              << request.sizeWriteOps() ),
                              response );
                return;
            }

            // We only support {w: 0}, {w: 1}, and {w: 'majority'} write concern for config writes
            if ( request.isWriteConcernSet() && !validConfigWC( request.getWriteConcern() )) {
                toBatchError( Status( ErrorCodes::InvalidOptions,
                                      mongoutils::str::stream() << "Invalid write concern for write"
				              " to config servers: " << request.getWriteConcern() ),
                              response );
                return;
            }

            // We need to support "best-effort" writes for pings to the config server.
            // {w:0} (!verbose) writes are interpreted as best-effort in this case - they may still
            // error, but do not do the initial fsync check.
            configWrite( request, response, verboseWC );
        }
        else {
            shardWrite( request, response );
        }
    }