bool WriteBatchExecutor::ExecInsertsState::_lockAndCheckImpl(WriteOpResult* result) { if (hasLock()) { txn->getCurOp()->enter(_context.get()); return true; } invariant(!_context.get()); _writeLock.reset(new Lock::DBWrite(txn->lockState(), request->getNS())); if (!checkIsMasterForDatabase(request->getNS(), result)) { return false; } if (!checkShardVersion(txn, &shardingState, *request, result)) { return false; } if (!checkIndexConstraints(txn, &shardingState, *request, result)) { return false; } _context.reset(new Client::Context(request->getNS(), false /* don't check version */)); Database* database = _context->db(); dassert(database); _collection = database->getCollection(txn, request->getTargetingNS()); if (!_collection) { // Implicitly create if it doesn't exist _collection = database->createCollection(txn, request->getTargetingNS()); if (!_collection) { result->setError( toWriteError(Status(ErrorCodes::InternalError, "could not create collection " + request->getTargetingNS()))); return false; } } return true; }
bool VersionManager::checkShardVersionCB(OperationContext* opCtx, ShardConnection* conn_in, bool authoritative, int tryNumber) { return checkShardVersion( opCtx, conn_in->get(), conn_in->getNS(), conn_in->getManager(), authoritative, tryNumber); }
bool VersionManager::checkShardVersionCB(OperationContext* opCtx, DBClientBase* conn_in, const string& ns, bool authoritative, int tryNumber) { return checkShardVersion(opCtx, conn_in, ns, nullptr, authoritative, tryNumber); }
void ShardConnection::_init(){ assert( _addr.size() ); _conn = ClientConnections::get()->get( _addr ); if ( _ns.size() ){ checkShardVersion( *_conn , _ns ); } }
void checkShardVersion( DBClientBase& conn , const string& ns , bool authoritative ){ // TODO: cache, optimize, etc... WriteBackListener::init( conn ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return; ShardChunkVersion version = 0; unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ){ manager = conf->getChunkManager( ns , authoritative ); officialSequenceNumber = manager->getSequenceNumber(); } unsigned long long & sequenceNumber = checkShardVersionLastSequence[ make_pair(&conn,ns) ]; if ( sequenceNumber == officialSequenceNumber ) return; if ( isSharded ){ version = manager->getVersion( Shard::make( conn.getServerAddress() ) ); } log(2) << " have to set shard version for conn: " << &conn << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; BSONObj result; if ( setShardVersion( conn , ns , version , authoritative , result ) ){ // success! log(1) << " setShardVersion success!" << endl; sequenceNumber = officialSequenceNumber; dassert( sequenceNumber == checkShardVersionLastSequence[ make_pair(&conn,ns) ] ); return; } log(1) << " setShardVersion failed!\n" << result << endl; if ( result.getBoolField( "need_authoritative" ) ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ){ checkShardVersion( conn , ns , 1 ); return; } log() << " setShardVersion failed: " << result << endl; massert( 10429 , (string)"setShardVersion failed! " + result.jsonString() , 0 ); }
static void multiUpdate( OperationContext* txn, const BatchItemRef& updateItem, WriteOpResult* result ) { const NamespaceString nsString(updateItem.getRequest()->getNS()); UpdateRequest request(nsString); request.setQuery(updateItem.getUpdate()->getQuery()); request.setUpdates(updateItem.getUpdate()->getUpdateExpr()); request.setMulti(updateItem.getUpdate()->getMulti()); request.setUpsert(updateItem.getUpdate()->getUpsert()); request.setUpdateOpLog(true); UpdateLifecycleImpl updateLifecycle(true, request.getNamespaceString()); request.setLifecycle(&updateLifecycle); UpdateExecutor executor(&request, &txn->getCurOp()->debug()); Status status = executor.prepare(); if (!status.isOK()) { result->setError(toWriteError(status)); return; } /////////////////////////////////////////// Lock::DBWrite writeLock(txn->lockState(), nsString.ns()); /////////////////////////////////////////// if (!checkShardVersion(txn, &shardingState, *updateItem.getRequest(), result)) return; Client::Context ctx( nsString.ns(), storageGlobalParams.dbpath, false /* don't check version */ ); try { UpdateResult res = executor.execute(txn, ctx.db()); const long long numDocsModified = res.numDocsModified; const long long numMatched = res.numMatched; const BSONObj resUpsertedID = res.upserted; // We have an _id from an insert const bool didInsert = !resUpsertedID.isEmpty(); result->getStats().nModified = didInsert ? 0 : numDocsModified; result->getStats().n = didInsert ? 1 : numMatched; result->getStats().upsertedID = resUpsertedID; } catch (const DBException& ex) { status = ex.toStatus(); if (ErrorCodes::isInterruption(status.code())) { throw; } result->setError(toWriteError(status)); } }
void ShardConnection::_finishInit(){ if ( _finishedInit ) return; _finishedInit = true; if ( _ns.size() ){ _setVersion = checkShardVersion( *_conn , _ns ); } else { _setVersion = false; } }
/** * Perform a remove operation, which might remove multiple documents. Dispatches to remove code * currently to do most of this. * * Might fault or error, otherwise populates the result. */ static void multiRemove( const BatchItemRef& removeItem, WriteOpResult* result ) { const NamespaceString nss( removeItem.getRequest()->getNS() ); DeleteRequest request( nss ); request.setQuery( removeItem.getDelete()->getQuery() ); request.setMulti( removeItem.getDelete()->getLimit() != 1 ); request.setUpdateOpLog(true); request.setGod( false ); DeleteExecutor executor( &request ); Status status = executor.prepare(); if ( !status.isOK() ) { result->error = toWriteError( status ); return; } // NOTE: Deletes will not fault outside the lock once any data has been written PageFaultRetryableSection pFaultSection; /////////////////////////////////////////// Lock::DBWrite writeLock( nss.ns() ); /////////////////////////////////////////// // Check version once we're locked if ( !checkShardVersion( &shardingState, *removeItem.getRequest(), &result->error ) ) { // Version error return; } // Context once we're locked, to set more details in currentOp() // TODO: better constructor? Client::Context writeContext( nss.ns(), storageGlobalParams.dbpath, false /* don't check version */); try { result->stats.n = executor.execute(); } catch ( const PageFaultException& ex ) { // TODO: An actual data structure that's not an exception for this result->fault = new PageFaultException( ex ); } catch ( const DBException& ex ) { status = ex.toStatus(); if (ErrorCodes::isInterruption(status.code())) { throw; } result->error = toWriteError(status); } }
void WriteBatchExecutor::execUpdate( const BatchItemRef& updateItem, BSONObj* upsertedId, WriteErrorDetail** error ) { // Updates currently do a lot of the lock management internally const BatchedCommandRequest& request = *updateItem.getRequest(); const NamespaceString nss( updateItem.getRequest()->getNS() ); // BEGIN CURRENT OP scoped_ptr<CurOp> currentOp( beginCurrentOp( _client, updateItem ) ); incOpStats( updateItem ); WriteOpResult result; { /////////////////////////////////////////// Lock::DBWrite writeLock( nss.ns() ); /////////////////////////////////////////// // Check version once we're locked if ( checkShardVersion( &shardingState, request, &result.error ) ) { // Context once we're locked, to set more details in currentOp() // TODO: better constructor? Client::Context writeContext( nss.ns(), storageGlobalParams.dbpath, false /* don't check version */); multiUpdate( updateItem, &result ); incWriteStats( updateItem, result.stats, result.error, currentOp.get() ); if ( !result.stats.upsertedID.isEmpty() ) { *upsertedId = result.stats.upsertedID.getOwned(); } } } // END CURRENT OP finishCurrentOp( _client, currentOp.get(), result.error ); if ( result.error ) { result.error->setIndex( updateItem.getItemIndex() ); *error = result.releaseError(); } }
/** * Perform a remove operation, which might remove multiple documents. Dispatches to remove code * currently to do most of this. * * Might fault or error, otherwise populates the result. */ static void multiRemove( OperationContext* txn, const BatchItemRef& removeItem, WriteOpResult* result ) { const NamespaceString nss( removeItem.getRequest()->getNS() ); DeleteRequest request( nss ); request.setQuery( removeItem.getDelete()->getQuery() ); request.setMulti( removeItem.getDelete()->getLimit() != 1 ); request.setUpdateOpLog(true); request.setGod( false ); DeleteExecutor executor( &request ); Status status = executor.prepare(); if ( !status.isOK() ) { result->setError(toWriteError(status)); return; } /////////////////////////////////////////// Lock::DBWrite writeLock(txn->lockState(), nss.ns()); /////////////////////////////////////////// // Check version once we're locked if (!checkShardVersion(txn, &shardingState, *removeItem.getRequest(), result)) { // Version error return; } // Context once we're locked, to set more details in currentOp() // TODO: better constructor? Client::Context writeContext( nss.ns(), storageGlobalParams.dbpath, false /* don't check version */); try { result->getStats().n = executor.execute(txn, writeContext.db()); } catch ( const DBException& ex ) { status = ex.toStatus(); if (ErrorCodes::isInterruption(status.code())) { throw; } result->setError(toWriteError(status)); } }
bool WriteBatchExecutor::ExecInsertsState::_lockAndCheckImpl(WriteOpResult* result) { if (hasLock()) { txn->getCurOp()->enter(_context.get()); return true; } invariant(!_context.get()); _writeLock.reset(new Lock::DBLock(txn->lockState(), nsToDatabase(request->getNS()), newlm::MODE_X)); if (!checkIsMasterForDatabase(request->getNS(), result)) { return false; } if (!checkShardVersion(txn, &shardingState, *request, result)) { return false; } if (!checkIndexConstraints(txn, &shardingState, *request, result)) { return false; } _context.reset(new Client::Context(txn, request->getNS(), false)); Database* database = _context->db(); dassert(database); _collection = database->getCollection(txn, request->getTargetingNS()); if (!_collection) { WriteUnitOfWork wunit (txn); // Implicitly create if it doesn't exist _collection = database->createCollection(txn, request->getTargetingNS()); if (!_collection) { result->setError( toWriteError(Status(ErrorCodes::InternalError, "could not create collection " + request->getTargetingNS()))); return false; } repl::logOp(txn, "c", (database->name() + ".$cmd").c_str(), BSON("create" << nsToCollectionSubstring(request->getTargetingNS()))); wunit.commit(); } return true; }
auto_ptr<DBClientCursor> ShardedCursor::query( const string& server , int num , BSONObj extra ){ uassert( "cursor already done" , ! _done ); BSONObj q = _query; if ( ! extra.isEmpty() ){ q = concatQuery( q , extra ); } ScopedDbConnection conn( server ); checkShardVersion( conn.conn() , _ns ); log(5) << "ShardedCursor::query server:" << server << " ns:" << _ns << " query:" << q << " num:" << num << " _fields:" << _fields << " options: " << _options << endl; auto_ptr<DBClientCursor> cursor = conn->query( _ns.c_str() , q , num , 0 , ( _fields.isEmpty() ? 0 : &_fields ) , _options ); if ( cursor->hasResultFlag( QueryResult::ResultFlag_ShardConfigStale ) ) throw StaleConfigException( _ns , "ShardedCursor::query" ); conn.done(); return cursor; }
void checkVersions( const string& ns ){ vector<Shard> all; Shard::getAllShards( all ); for ( unsigned i=0; i<all.size(); i++ ){ Status* &s = _hosts[all[i].getConnString()]; if ( ! s ) s = new Status(); } for ( map<string,Status*>::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ){ if ( ! Shard::isAShard( i->first ) ) continue; Status* ss = i->second; assert( ss ); if ( ! ss->avail ) ss->avail = pool.get( i->first ); checkShardVersion( *ss->avail , ns ); } }
bool VersionManager::checkShardVersionCB( DBClientBase* conn_in , const string& ns , bool authoritative , int tryNumber ) { return checkShardVersion( conn_in, ns, ChunkManagerPtr(), authoritative, tryNumber ); }
/** * @return true if had to do something */ bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... WriteBackListener::init( *conn_in ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; DBClientBase* conn = getVersionable( conn_in ); verify(conn); // errors thrown above unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManagerIfExists( ns , authoritative ); // It's possible the chunk manager was reset since we checked whether sharded was true, // so must check this here. if( manager ) officialSequenceNumber = manager->getSequenceNumber(); } // Check this manager against the reference manager if( isSharded && manager ){ Shard shard = Shard::make( conn->getServerAddress() ); if( refManager && ! refManager->compatibleWith( manager, shard ) ){ throw SendStaleConfigException( ns, str::stream() << "manager (" << manager->getVersion( shard ).toString() << " : " << manager->getSequenceNumber() << ") " << "not compatible with reference manager (" << refManager->getVersion( shard ).toString() << " : " << refManager->getSequenceNumber() << ") " << "on shard " << shard.getName() << " (" << shard.getAddress().toString() << ")", refManager->getVersion( shard ), manager->getVersion( shard ) ); } } else if( refManager ){ Shard shard = Shard::make( conn->getServerAddress() ); string msg( str::stream() << "not sharded (" << ( (manager.get() == 0) ? string( "<none>" ) : str::stream() << manager->getSequenceNumber() ) << ") but has reference manager (" << refManager->getSequenceNumber() << ") " << "on conn " << conn->getServerAddress() << " (" << conn_in->getServerAddress() << ")" ); throw SendStaleConfigException( ns, msg, refManager->getVersion( shard ), ShardChunkVersion( 0, OID() )); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie., last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ShardChunkVersion version = ShardChunkVersion( 0, OID() ); if ( isSharded && manager ) { version = manager->getVersion( Shard::make( conn->getServerAddress() ) ); } if( ! version.isSet() ){ LOG(0) << "resetting shard version of " << ns << " on " << conn->getServerAddress() << ", " << ( ! isSharded ? "no longer sharded" : ( ! manager ? "no chunk manager found" : "version is zero" ) ) << endl; } LOG(2) << " have to set shard version for conn: " << conn->getServerAddress() << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; const string versionableServerAddress(conn->getServerAddress()); BSONObj result; if ( setShardVersion( *conn , ns , version , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; connectionShardStatus.setSequence( conn , ns , officialSequenceNumber ); return true; } LOG(1) << " setShardVersion failed!\n" << result << endl; if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1); return true; } if ( result["reloadConfig"].trueValue() ) { if( result["version"].timestampTime() == 0 ){ warning() << "reloading full configuration for " << conf->getName() << ", connection state indicates significant version changes" << endl; // reload db conf->reload(); } else { // reload config conf->getChunkManager( ns , true ); } } const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) << "going to retry checkShardVersion host: " << versionableServerAddress << " " << result << endl; sleepmillis( 10 * tryNumber ); // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1); return true; } string errmsg = str::stream() << "setShardVersion failed host: " << versionableServerAddress << " " << result; log() << " " << errmsg << endl; massert( 10429 , errmsg , 0 ); return true; }
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request, std::vector<WriteErrorDetail*>* errors ) { // Bulk insert is a bit different from other bulk operations in that multiple request docs // can be processed at once inside the write lock. const NamespaceString nss( request.getTargetingNS() ); scoped_ptr<BatchItemRef> currInsertItem( new BatchItemRef( &request, 0 ) ); // Go through our request and do some preprocessing on insert documents outside the lock to // validate and put them in a normalized form - i.e. put _id in front and fill in // timestamps. The insert document may also be invalid. // TODO: Might be more efficient to do in batches. vector<StatusWith<BSONObj> > normalInserts; normalizeInserts( request, &normalInserts ); while ( currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) { WriteOpResult currResult; // Don't (re-)acquire locks and create database until it's necessary if ( !normalInserts[currInsertItem->getItemIndex()].isOK() ) { currResult.error = toWriteError( normalInserts[currInsertItem->getItemIndex()].getStatus() ); } else { PageFaultRetryableSection pFaultSection; //////////////////////////////////// Lock::DBWrite writeLock( nss.ns() ); //////////////////////////////////// // Check version inside of write lock if ( checkIsMasterForCollection( nss, &currResult.error ) && checkShardVersion( &shardingState, request, &currResult.error ) && checkIndexConstraints( &shardingState, request, &currResult.error ) ) { // // Get the collection for the insert // scoped_ptr<Client::Context> writeContext; Collection* collection = NULL; try { // Context once we're locked, to set more details in currentOp() // TODO: better constructor? writeContext.reset( new Client::Context( request.getNS(), storageGlobalParams.dbpath, false /* don't check version */) ); Database* database = writeContext->db(); dassert( database ); collection = database->getCollection( nss.ns() ); if ( !collection ) { // Implicitly create if it doesn't exist collection = database->createCollection( nss.ns() ); if ( !collection ) { currResult.error = toWriteError( Status( ErrorCodes::InternalError, "could not create collection" ) ); } } } catch ( const DBException& ex ) { Status status(ex.toStatus()); if (ErrorCodes::isInterruption(status.code())) { throw; } currResult.error = toWriteError(status); } // // Perform writes inside write lock // while ( collection && currInsertItem->getItemIndex() < static_cast<int>( request.sizeWriteOps() ) ) { // // BEGIN CURRENT OP // scoped_ptr<CurOp> currentOp( beginCurrentOp( _client, *currInsertItem ) ); incOpStats( *currInsertItem ); // Get the actual document we want to write, assuming it's valid const StatusWith<BSONObj>& normalInsert = // normalInserts[currInsertItem->getItemIndex()]; const BSONObj& normalInsertDoc = normalInsert.getValue().isEmpty() ? currInsertItem->getDocument() : normalInsert.getValue(); if ( !normalInsert.isOK() ) { // This insert failed on preprocessing currResult.error = toWriteError( normalInsert.getStatus() ); } else if ( !request.isInsertIndexRequest() ) { // Try the insert singleInsert( *currInsertItem, normalInsertDoc, collection, &currResult ); } else { // Try the create index singleCreateIndex( *currInsertItem, normalInsertDoc, collection, &currResult ); } // // END CURRENT OP // finishCurrentOp( _client, currentOp.get(), currResult.error ); // Faults release the write lock if ( currResult.fault ) break; // In general, we might have stats and errors incWriteStats( *currInsertItem, currResult.stats, currResult.error, currentOp.get() ); // Errors release the write lock if ( currResult.error ) break; // Increment in the write lock and reset the stats for next time currInsertItem.reset( new BatchItemRef( &request, currInsertItem->getItemIndex() + 1 ) ); currResult.reset(); // Destruct curop so that our parent curop is restored, so that we // record the yield count in the parent. currentOp.reset(NULL); // yield sometimes int micros = ClientCursor::suggestYieldMicros(); if (micros > 0) { ClientCursor::staticYield(micros, "", NULL); } } } } // END WRITE LOCK // // Store the current error if it exists // if ( currResult.error ) { errors->push_back( currResult.releaseError() ); errors->back()->setIndex( currInsertItem->getItemIndex() ); // Break early for ordered batches if ( request.getOrdered() ) break; } // // Fault or increment // if ( currResult.fault ) { // Check page fault out of lock currResult.fault->touch(); } else { // Increment if not a fault currInsertItem.reset( new BatchItemRef( &request, currInsertItem->getItemIndex() + 1 ) ); } } }
/** * @return true if had to do something */ bool checkShardVersion( DBClientBase& conn_in , const string& ns , bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... WriteBackListener::init( conn_in ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; DBClientBase* conn = 0; switch ( conn_in.type() ) { case ConnectionString::INVALID: assert(0); break; case ConnectionString::MASTER: // great conn = &conn_in; break; case ConnectionString::PAIR: assert( ! "pair not support for sharding" ); break; case ConnectionString::SYNC: // TODO: we should check later that we aren't actually sharded on this conn = &conn_in; break; case ConnectionString::SET: DBClientReplicaSet* set = (DBClientReplicaSet*)&conn_in; conn = &(set->masterConn()); break; } assert(conn); unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManagerIfExists( ns , authoritative ); // It's possible the chunk manager was reset since we checked whether sharded was true, // so must check this here. if( manager ) officialSequenceNumber = manager->getSequenceNumber(); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie., last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ShardChunkVersion version = 0; if ( isSharded && manager ) { version = manager->getVersion( Shard::make( conn->getServerAddress() ) ); } LOG(2) << " have to set shard version for conn: " << conn << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; BSONObj result; if ( setShardVersion( *conn , ns , version , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; connectionShardStatus.setSequence( conn , ns , officialSequenceNumber ); return true; } LOG(1) << " setShardVersion failed!\n" << result << endl; if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { checkShardVersion( *conn , ns , 1 , tryNumber + 1 ); return true; } if ( result["reloadConfig"].trueValue() ) { if( result["version"].timestampTime() == 0 ){ // reload db conf->reload(); } else { // reload config conf->getChunkManager( ns , true ); } } const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) << "going to retry checkShardVersion host: " << conn->getServerAddress() << " " << result << endl; sleepmillis( 10 * tryNumber ); checkShardVersion( *conn , ns , true , tryNumber + 1 ); return true; } string errmsg = str::stream() << "setShardVersion failed host: " << conn->getServerAddress() << " " << result; log() << " " << errmsg << endl; massert( 10429 , errmsg , 0 ); return true; }
/** * @return true if had to do something */ bool checkShardVersion( DBClientBase& conn , const string& ns , bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... WriteBackListener::init( conn ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManager( ns , authoritative ); officialSequenceNumber = manager->getSequenceNumber(); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie, last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(&conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ShardChunkVersion version = 0; if ( isSharded ) { version = manager->getVersion( Shard::make( conn.getServerAddress() ) ); } log(2) << " have to set shard version for conn: " << &conn << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; BSONObj result; if ( setShardVersion( conn , ns , version , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; connectionShardStatus.setSequence( &conn , ns , officialSequenceNumber ); return true; } log(1) << " setShardVersion failed!\n" << result << endl; if ( result.getBoolField( "need_authoritative" ) ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { checkShardVersion( conn , ns , 1 , tryNumber + 1 ); return true; } if ( tryNumber < 4 ) { log(1) << "going to retry checkShardVersion" << endl; sleepmillis( 10 ); checkShardVersion( conn , ns , 1 , tryNumber + 1 ); return true; } log() << " setShardVersion failed: " << result << endl; massert( 10429 , (string)"setShardVersion failed! " + result.jsonString() , 0 ); return true; }
/** * Updates the remote cached version on the remote shard host (primary, in the case of replica * sets) if needed with a fully-qualified shard version for the given namespace: * config server(s) + shard name + shard version * * If no remote cached version has ever been set, an initial shard version is sent. * * If the namespace is empty and no version has ever been sent, the config server + shard name * is sent to the remote shard host to initialize the connection as coming from mongos. * NOTE: This initialization is *best-effort only*. Operations which wish to correctly version * must send the namespace. * * Config servers are special and are not (unless otherwise a shard) kept up to date with this * protocol. This is safe so long as config servers only contain unversioned collections. * * It is an error to call checkShardVersion with an unversionable connection (isVersionableCB). * * @return true if we contacted the remote host */ bool checkShardVersion(DBClientBase* conn_in, const string& ns, ChunkManagerPtr refManager, bool authoritative, int tryNumber) { // TODO: cache, optimize, etc... // Empty namespaces are special - we require initialization but not versioning if (ns.size() == 0) { return initShardVersionEmptyNS(conn_in); } auto status = grid.catalogCache()->getDatabase(nsToDatabase(ns)); if (!status.isOK()) { return false; } shared_ptr<DBConfig> conf = status.getValue(); DBClientBase* conn = getVersionable(conn_in); verify(conn); // errors thrown above unsigned long long officialSequenceNumber = 0; ShardPtr primary; ChunkManagerPtr manager; if (authoritative) conf->getChunkManagerIfExists(ns, true); conf->getChunkManagerOrPrimary(ns, manager, primary); if (manager) { officialSequenceNumber = manager->getSequenceNumber(); } const auto shard = grid.shardRegistry()->getShard(conn->getServerAddress()); uassert(ErrorCodes::ShardNotFound, str::stream() << conn->getServerAddress() << " is not recognized as a shard", shard); // Check this manager against the reference manager if (manager) { if (refManager && !refManager->compatibleWith(*manager, shard->getId())) { const ChunkVersion refVersion(refManager->getVersion(shard->getId())); const ChunkVersion currentVersion(manager->getVersion(shard->getId())); string msg(str::stream() << "manager (" << currentVersion.toString() << " : " << manager->getSequenceNumber() << ") " << "not compatible with reference manager (" << refVersion.toString() << " : " << refManager->getSequenceNumber() << ") " << "on shard " << shard->getId() << " (" << shard->getConnString().toString() << ")"); throw SendStaleConfigException(ns, msg, refVersion, currentVersion); } } else if (refManager) { string msg(str::stream() << "not sharded (" << ((manager.get() == 0) ? string("<none>") : str::stream() << manager->getSequenceNumber()) << ") but has reference manager (" << refManager->getSequenceNumber() << ") " << "on conn " << conn->getServerAddress() << " (" << conn_in->getServerAddress() << ")"); throw SendStaleConfigException( ns, msg, refManager->getVersion(shard->getId()), ChunkVersion::UNSHARDED()); } // Do not send setShardVersion to collections on the config servers - this causes problems // when config servers are also shards and get SSV with conflicting names. // TODO: Make config servers regular shards if (primary && primary->getId() == "config") { return false; } // Has the ChunkManager been reloaded since the last time we updated the shard version over // this connection? If we've never updated the shard version, do so now. unsigned long long sequenceNumber = 0; if (connectionShardStatus.getSequence(conn, ns, &sequenceNumber)) { if (sequenceNumber == officialSequenceNumber) { return false; } } ChunkVersion version = ChunkVersion(0, 0, OID()); if (manager) { version = manager->getVersion(shard->getId()); } LOG(1) << "setting shard version of " << version << " for " << ns << " on shard " << shard->toString(); LOG(3) << "last version sent with chunk manager iteration " << sequenceNumber << ", current chunk manager iteration is " << officialSequenceNumber; BSONObj result; if (setShardVersion(*conn, ns, grid.catalogManager()->connectionString().toString(), version, manager.get(), authoritative, result)) { LOG(1) << " setShardVersion success: " << result; connectionShardStatus.setSequence(conn, ns, officialSequenceNumber); return true; } LOG(1) << " setShardVersion failed!\n" << result << endl; if (result["need_authoritative"].trueValue()) massert(10428, "need_authoritative set but in authoritative mode already", !authoritative); if (!authoritative) { // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1); return true; } if (result["reloadConfig"].trueValue()) { if (result["version"].timestampTime() == Date_t()) { warning() << "reloading full configuration for " << conf->name() << ", connection state indicates significant version changes"; // reload db conf->reload(); } else { // reload config conf->getChunkManager(ns, true); } } const int maxNumTries = 7; if (tryNumber < maxNumTries) { LOG(tryNumber < (maxNumTries / 2) ? 1 : 0) << "going to retry checkShardVersion shard: " << shard->toString() << " " << result; sleepmillis(10 * tryNumber); // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1); return true; } string errmsg = str::stream() << "setShardVersion failed shard: " << shard->toString() << " " << result; log() << " " << errmsg << endl; massert(10429, errmsg, 0); return true; }
void WriteBatchExecutor::execRemove( const BatchItemRef& removeItem, WriteErrorDetail** error ) { // Removes are similar to updates, but page faults are handled externally const BatchedCommandRequest& request = *removeItem.getRequest(); const NamespaceString nss( removeItem.getRequest()->getNS() ); // BEGIN CURRENT OP scoped_ptr<CurOp> currentOp( beginCurrentOp( _client, removeItem ) ); incOpStats( removeItem ); WriteOpResult result; while ( true ) { { // NOTE: Deletes will not fault outside the lock once any data has been written PageFaultRetryableSection pFaultSection; /////////////////////////////////////////// Lock::DBWrite writeLock( nss.ns() ); /////////////////////////////////////////// // Check version once we're locked if ( !checkShardVersion( &shardingState, request, &result.error ) ) { // Version error break; } // Context once we're locked, to set more details in currentOp() // TODO: better constructor? Client::Context writeContext( nss.ns(), storageGlobalParams.dbpath, false /* don't check version */); multiRemove( removeItem, &result ); if ( !result.fault ) { incWriteStats( removeItem, result.stats, result.error, currentOp.get() ); break; } } // // Check page fault out of lock // dassert( result.fault ); result.fault->touch(); result.reset(); } // END CURRENT OP finishCurrentOp( _client, currentOp.get(), result.error ); if ( result.error ) { result.error->setIndex( removeItem.getItemIndex() ); *error = result.releaseError(); } }
/** * @return true if had to do something */ bool checkShardVersion( DBClientBase& conn_in , const string& ns , bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... WriteBackListener::init( conn_in ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; DBClientBase* conn = getVersionable( &conn_in ); assert(conn); // errors thrown above unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManagerIfExists( ns , authoritative ); // It's possible the chunk manager was reset since we checked whether sharded was true, // so must check this here. if( manager ) officialSequenceNumber = manager->getSequenceNumber(); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie., last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ShardChunkVersion version = 0; if ( isSharded && manager ) { version = manager->getVersion( Shard::make( conn->getServerAddress() ) ); } if( version == 0 ){ LOG(0) << "resetting shard version of " << ns << " on " << conn->getServerAddress() << ", " << ( ! isSharded ? "no longer sharded" : ( ! manager ? "no chunk manager found" : "version is zero" ) ) << endl; } LOG(2) << " have to set shard version for conn: " << conn << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; BSONObj result; if ( setShardVersion( *conn , ns , version , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; connectionShardStatus.setSequence( conn , ns , officialSequenceNumber ); return true; } LOG(1) << " setShardVersion failed!\n" << result << endl; if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { checkShardVersion( *conn , ns , 1 , tryNumber + 1 ); return true; } if ( result["reloadConfig"].trueValue() ) { if( result["version"].timestampTime() == 0 ){ // reload db conf->reload(); } else { // reload config conf->getChunkManager( ns , true ); } } const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) << "going to retry checkShardVersion host: " << conn->getServerAddress() << " " << result << endl; sleepmillis( 10 * tryNumber ); checkShardVersion( *conn , ns , true , tryNumber + 1 ); return true; } string errmsg = str::stream() << "setShardVersion failed host: " << conn->getServerAddress() << " " << result; log() << " " << errmsg << endl; massert( 10429 , errmsg , 0 ); return true; }