void ClusterWriter::write(OperationContext* opCtx, const BatchedCommandRequest& request, BatchWriteExecStats* stats, BatchedCommandResponse* response) { const NamespaceString& nss = request.getNS(); LastError::Disabled disableLastError(&LastError::get(opCtx->getClient())); // Config writes and shard writes are done differently if (nss.db() == NamespaceString::kConfigDb || nss.db() == NamespaceString::kAdminDb) { Grid::get(opCtx)->catalogClient()->writeConfigServerDirect(opCtx, request, response); } else { TargeterStats targeterStats; { ChunkManagerTargeter targeter(request.getTargetingNS(), &targeterStats); Status targetInitStatus = targeter.init(opCtx); if (!targetInitStatus.isOK()) { toBatchError({targetInitStatus.code(), str::stream() << "unable to target" << (request.isInsertIndexRequest() ? " index" : "") << " write op for collection " << request.getTargetingNS().ns() << causedBy(targetInitStatus)}, response); return; } BatchWriteExec::executeBatch(opCtx, targeter, request, response, stats); } splitIfNeeded(opCtx, request.getNS(), targeterStats); } }
void ClusterWriter::shardWrite( const BatchedCommandRequest& request, BatchedCommandResponse* response ) { ChunkManagerTargeter targeter; Status targetInitStatus = targeter.init( NamespaceString( request.getTargetingNS() ) ); if ( !targetInitStatus.isOK() ) { warning() << "could not initialize targeter for" << ( request.isInsertIndexRequest() ? " index" : "" ) << " write op in collection " << request.getTargetingNS() << endl; // Errors will be reported in response if we are unable to target } DBClientShardResolver resolver; DBClientMultiCommand dispatcher; BatchWriteExec exec( &targeter, &resolver, &dispatcher ); exec.executeBatch( request, response ); if ( _autoSplit ) splitIfNeeded( request.getNS(), *targeter.getStats() ); _stats->setShardStats( exec.releaseStats() ); }
void clusterWrite( const BatchedCommandRequest& request, BatchedCommandResponse* response, bool autoSplit ) { // App-level validation of a create index insert if ( request.isInsertIndexRequest() ) { if ( request.sizeWriteOps() != 1 || request.isWriteConcernSet() ) { // Invalid request to create index response->setOk( false ); response->setErrCode( ErrorCodes::InvalidOptions ); response->setErrMessage( "invalid batch request for index creation" ); dassert( response->isValid( NULL ) ); return; } } // Config writes and shard writes are done differently string dbName = NamespaceString( request.getNS() ).db().toString(); if ( dbName == "config" || dbName == "admin" ) { bool verboseWC = request.isVerboseWC(); // We only support batch sizes of one and {w:0} write concern for config writes if ( request.sizeWriteOps() != 1 || ( verboseWC && request.isWriteConcernSet() ) ) { // Invalid config server write response->setOk( false ); response->setErrCode( ErrorCodes::InvalidOptions ); response->setErrMessage( "invalid batch request for config write" ); dassert( response->isValid( NULL ) ); return; } // We need to support "best-effort" writes for pings to the config server. // {w:0} (!verbose) writes are interpreted as best-effort in this case - they may still // error, but do not do the initial fsync check. configWrite( request, response, verboseWC ); } else { shardWrite( request, response, autoSplit ); } }
// static Status WriteBatchExecutor::validateBatch( const BatchedCommandRequest& request ) { // Validate namespace const NamespaceString nss = NamespaceString( request.getNS() ); if ( !nss.isValid() ) { return Status( ErrorCodes::InvalidNamespace, nss.ns() + " is not a valid namespace" ); } // Make sure we can write to the namespace Status allowedStatus = userAllowedWriteNS( nss ); if ( !allowedStatus.isOK() ) { return allowedStatus; } // Validate insert index requests // TODO: Push insert index requests through createIndex once all upgrade paths support it string errMsg; if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) { return Status( ErrorCodes::InvalidOptions, errMsg ); } return Status::OK(); }
void WriteBatchExecutor::executeBatch( const BatchedCommandRequest& request, BatchedCommandResponse* response ) { // Validate namespace const NamespaceString nss = NamespaceString( request.getNS() ); if ( !nss.isValid() ) { toBatchError( Status( ErrorCodes::InvalidNamespace, nss.ns() + " is not a valid namespace" ), response ); return; } // Make sure we can write to the namespace Status allowedStatus = userAllowedWriteNS( nss ); if ( !allowedStatus.isOK() ) { toBatchError( allowedStatus, response ); return; } // Validate insert index requests // TODO: Push insert index requests through createIndex once all upgrade paths support it string errMsg; if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) { toBatchError( Status( ErrorCodes::InvalidOptions, errMsg ), response ); return; } // Validate write concern // TODO: Lift write concern parsing out of this entirely WriteConcernOptions writeConcern; BSONObj wcDoc; if ( request.isWriteConcernSet() ) { wcDoc = request.getWriteConcern(); } Status wcStatus = Status::OK(); if ( wcDoc.isEmpty() ) { // The default write concern if empty is w : 1 // Specifying w : 0 is/was allowed, but is interpreted identically to w : 1 wcStatus = writeConcern.parse( _defaultWriteConcern.isEmpty() ? WriteConcernOptions::Acknowledged : _defaultWriteConcern ); if ( writeConcern.wNumNodes == 0 && writeConcern.wMode.empty() ) { writeConcern.wNumNodes = 1; } } else { wcStatus = writeConcern.parse( wcDoc ); } if ( wcStatus.isOK() ) { wcStatus = validateWriteConcern( writeConcern ); } if ( !wcStatus.isOK() ) { toBatchError( wcStatus, response ); return; } if ( request.sizeWriteOps() == 0u ) { toBatchError( Status( ErrorCodes::InvalidLength, "no write ops were included in the batch" ), response ); return; } // Validate batch size if ( request.sizeWriteOps() > BatchedCommandRequest::kMaxWriteBatchSize ) { toBatchError( Status( ErrorCodes::InvalidLength, stream() << "exceeded maximum write batch size of " << BatchedCommandRequest::kMaxWriteBatchSize ), response ); return; } // // End validation // bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0 && writeConcern.syncMode == WriteConcernOptions::NONE; Timer commandTimer; OwnedPointerVector<WriteErrorDetail> writeErrorsOwned; vector<WriteErrorDetail*>& writeErrors = writeErrorsOwned.mutableVector(); OwnedPointerVector<BatchedUpsertDetail> upsertedOwned; vector<BatchedUpsertDetail*>& upserted = upsertedOwned.mutableVector(); // // Apply each batch item, possibly bulking some items together in the write lock. // Stops on error if batch is ordered. // bulkExecute( request, &upserted, &writeErrors ); // // Try to enforce the write concern if everything succeeded (unordered or ordered) // OR if something succeeded and we're unordered. // auto_ptr<WCErrorDetail> wcError; bool needToEnforceWC = writeErrors.empty() || ( !request.getOrdered() && writeErrors.size() < request.sizeWriteOps() ); if ( needToEnforceWC ) { _client->curop()->setMessage( "waiting for write concern" ); WriteConcernResult res; Status status = waitForWriteConcern( _txn, writeConcern, _client->getLastOp(), &res ); if ( !status.isOK() ) { wcError.reset( toWriteConcernError( status, res ) ); } } // // Refresh metadata if needed // bool staleBatch = !writeErrors.empty() && writeErrors.back()->getErrCode() == ErrorCodes::StaleShardVersion; if ( staleBatch ) { const BatchedRequestMetadata* requestMetadata = request.getMetadata(); dassert( requestMetadata ); // Make sure our shard name is set or is the same as what was set previously if ( shardingState.setShardName( requestMetadata->getShardName() ) ) { // // First, we refresh metadata if we need to based on the requested version. // ChunkVersion latestShardVersion; shardingState.refreshMetadataIfNeeded( request.getTargetingNS(), requestMetadata->getShardVersion(), &latestShardVersion ); // Report if we're still changing our metadata // TODO: Better reporting per-collection if ( shardingState.inCriticalMigrateSection() ) { noteInCriticalSection( writeErrors.back() ); } if ( queueForMigrationCommit ) { // // Queue up for migration to end - this allows us to be sure that clients will // not repeatedly try to refresh metadata that is not yet written to the config // server. Not necessary for correctness. // Exposed as optional parameter to allow testing of queuing behavior with // different network timings. // const ChunkVersion& requestShardVersion = requestMetadata->getShardVersion(); // // Only wait if we're an older version (in the current collection epoch) and // we're not write compatible, implying that the current migration is affecting // writes. // if ( requestShardVersion.isOlderThan( latestShardVersion ) && !requestShardVersion.isWriteCompatibleWith( latestShardVersion ) ) { while ( shardingState.inCriticalMigrateSection() ) { log() << "write request to old shard version " << requestMetadata->getShardVersion().toString() << " waiting for migration commit" << endl; shardingState.waitTillNotInCriticalSection( 10 /* secs */); } } } } else { // If our shard name is stale, our version must have been stale as well dassert( writeErrors.size() == request.sizeWriteOps() ); } } // // Construct response // response->setOk( true ); if ( !silentWC ) { if ( upserted.size() ) { response->setUpsertDetails( upserted ); } if ( writeErrors.size() ) { response->setErrDetails( writeErrors ); } if ( wcError.get() ) { response->setWriteConcernError( wcError.release() ); } const repl::ReplicationCoordinator::Mode replMode = repl::getGlobalReplicationCoordinator()->getReplicationMode(); if (replMode != repl::ReplicationCoordinator::modeNone) { response->setLastOp( _client->getLastOp() ); if (replMode == repl::ReplicationCoordinator::modeReplSet) { response->setElectionId(repl::theReplSet->getElectionId()); } } // Set the stats for the response response->setN( _stats->numInserted + _stats->numUpserted + _stats->numMatched + _stats->numDeleted ); if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) response->setNModified( _stats->numModified ); } dassert( response->isValid( NULL ) ); }
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request, std::vector<WriteErrorDetail*>* errors ) { // Bulk insert is a bit different from other bulk operations in that multiple request docs // can be processed at once inside the write lock. const NamespaceString nss( request.getTargetingNS() ); scoped_ptr<BatchItemRef> currInsertItem( new BatchItemRef( &request, 0 ) ); // Go through our request and do some preprocessing on insert documents outside the lock to // validate and put them in a normalized form - i.e. put _id in front and fill in // timestamps. The insert document may also be invalid. // TODO: Might be more efficient to do in batches. vector<StatusWith<BSONObj> > normalInserts; normalizeInserts( request, &normalInserts ); while ( currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) { WriteOpResult currResult; // Don't (re-)acquire locks and create database until it's necessary if ( !normalInserts[currInsertItem->getItemIndex()].isOK() ) { currResult.error = toWriteError( normalInserts[currInsertItem->getItemIndex()].getStatus() ); } else { PageFaultRetryableSection pFaultSection; //////////////////////////////////// Lock::DBWrite writeLock( nss.ns() ); //////////////////////////////////// // Check version inside of write lock if ( checkIsMasterForCollection( nss, &currResult.error ) && checkShardVersion( &shardingState, request, &currResult.error ) && checkIndexConstraints( &shardingState, request, &currResult.error ) ) { // // Get the collection for the insert // scoped_ptr<Client::Context> writeContext; Collection* collection = NULL; try { // Context once we're locked, to set more details in currentOp() // TODO: better constructor? writeContext.reset( new Client::Context( request.getNS(), storageGlobalParams.dbpath, false /* don't check version */) ); Database* database = writeContext->db(); dassert( database ); collection = database->getCollection( nss.ns() ); if ( !collection ) { // Implicitly create if it doesn't exist collection = database->createCollection( nss.ns() ); if ( !collection ) { currResult.error = toWriteError( Status( ErrorCodes::InternalError, "could not create collection" ) ); } } } catch ( const DBException& ex ) { Status status(ex.toStatus()); if (ErrorCodes::isInterruption(status.code())) { throw; } currResult.error = toWriteError(status); } // // Perform writes inside write lock // while ( collection && currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) { // // BEGIN CURRENT OP // scoped_ptr<CurOp> currentOp( beginCurrentOp( _client, *currInsertItem ) ); incOpStats( *currInsertItem ); // Get the actual document we want to write, assuming it's valid const StatusWith<BSONObj>& normalInsert = // normalInserts[currInsertItem->getItemIndex()]; const BSONObj& normalInsertDoc = normalInsert.getValue().isEmpty() ? currInsertItem->getDocument() : normalInsert.getValue(); if ( !normalInsert.isOK() ) { // This insert failed on preprocessing currResult.error = toWriteError( normalInsert.getStatus() ); } else if ( !request.isInsertIndexRequest() ) { // Try the insert singleInsert( *currInsertItem, normalInsertDoc, collection, &currResult ); } else { // Try the create index singleCreateIndex( *currInsertItem, normalInsertDoc, collection, &currResult ); } // // END CURRENT OP // finishCurrentOp( _client, currentOp.get(), currResult.error ); // Faults release the write lock if ( currResult.fault ) break; // In general, we might have stats and errors incWriteStats( *currInsertItem, currResult.stats, currResult.error, currentOp.get() ); // Errors release the write lock if ( currResult.error ) break; // Increment in the write lock and reset the stats for next time currInsertItem.reset( new BatchItemRef( &request, currInsertItem->getItemIndex() + 1 ) ); currResult.reset(); // Destruct curop so that our parent curop is restored, so that we // record the yield count in the parent. currentOp.reset(NULL); // yield sometimes int micros = ClientCursor::suggestYieldMicros(); if (micros > 0) { ClientCursor::staticYield(micros, "", NULL); } } } } // END WRITE LOCK // // Store the current error if it exists // if ( currResult.error ) { errors->push_back( currResult.releaseError() ); errors->back()->setIndex( currInsertItem->getItemIndex() ); // Break early for ordered batches if ( request.getOrdered() ) break; } // // Fault or increment // if ( currResult.fault ) { // Check page fault out of lock currResult.fault->touch(); } else { // Increment if not a fault currInsertItem.reset( new BatchItemRef( &request, currInsertItem->getItemIndex() + 1 ) ); } } }
void ClusterWriter::write( const BatchedCommandRequest& request, BatchedCommandResponse* response ) { const NamespaceString nss = NamespaceString( request.getNS() ); if ( !nss.isValid() ) { toBatchError( Status( ErrorCodes::InvalidNamespace, nss.ns() + " is not a valid namespace" ), response ); return; } if ( !NamespaceString::validCollectionName( nss.coll() ) ) { toBatchError( Status( ErrorCodes::BadValue, str::stream() << "invalid collection name " << nss.coll() ), response ); return; } if ( request.sizeWriteOps() > BatchedCommandRequest::kMaxWriteBatchSize ) { toBatchError( Status( ErrorCodes::FailedToParse, str::stream() << "exceeded maximum write batch size of " << BatchedCommandRequest::kMaxWriteBatchSize ), response ); return; } string errMsg; if ( request.isInsertIndexRequest() && !request.isValidIndexRequest( &errMsg ) ) { toBatchError( Status( ErrorCodes::InvalidOptions, errMsg ), response ); return; } // Config writes and shard writes are done differently string dbName = nss.db().toString(); if ( dbName == "config" || dbName == "admin" ) { bool verboseWC = request.isVerboseWC(); // We only support batch sizes of one for config writes if ( request.sizeWriteOps() != 1 ) { toBatchError( Status( ErrorCodes::InvalidOptions, mongoutils::str::stream() << "Writes to config servers must " "have batch size of 1, found " << request.sizeWriteOps() ), response ); return; } // We only support {w: 0}, {w: 1}, and {w: 'majority'} write concern for config writes if ( request.isWriteConcernSet() && !validConfigWC( request.getWriteConcern() )) { toBatchError( Status( ErrorCodes::InvalidOptions, mongoutils::str::stream() << "Invalid write concern for write" " to config servers: " << request.getWriteConcern() ), response ); return; } // We need to support "best-effort" writes for pings to the config server. // {w:0} (!verbose) writes are interpreted as best-effort in this case - they may still // error, but do not do the initial fsync check. configWrite( request, response, verboseWC ); } else { shardWrite( request, response ); } }