void ShardRegistryData::_addShard_inlock(const std::shared_ptr<Shard>& shard) { const ShardId shardId = shard->getId(); const ConnectionString connString = shard->originalConnString(); auto currentShard = _findByShardId_inlock(shardId); if (currentShard) { auto oldConnString = currentShard->originalConnString(); if (oldConnString.toString() != connString.toString()) { log() << "Updating ShardRegistry connection string for shard " << currentShard->getId() << " from: " << oldConnString.toString() << " to: " << connString.toString(); } for (const auto& host : oldConnString.getServers()) { _lookup.erase(host.toString()); _hostLookup.erase(host); } _lookup.erase(oldConnString.toString()); } _lookup[shard->getId()] = shard; if (connString.type() == ConnectionString::SET) { _rsLookup[connString.getSetName()] = shard; } else if (connString.type() == ConnectionString::CUSTOM) { // CUSTOM connection strings (ie "$dummy:10000) become DBDirectClient connections which // always return "localhost" as their response to getServerAddress(). This is just for // making dbtest work. _lookup[ShardId("localhost")] = shard; _hostLookup[HostAndPort("localhost")] = shard; } // TODO: The only reason to have the shard host names in the lookup table is for the // setShardVersion call, which resolves the shard id from the shard address. This is // error-prone and will go away eventually when we switch all communications to go through // the remote command runner and all nodes are sharding aware by default. _lookup[connString.toString()] = shard; for (const HostAndPort& hostAndPort : connString.getServers()) { _lookup[hostAndPort.toString()] = shard; _hostLookup[hostAndPort] = shard; } }
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor( const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); _setupTaskExecutorInLock(connStr.toString()); auto setName = connStr.getSetName(); auto monitor = _monitors[setName].lock(); if (monitor) { uassertNotMixingSSL(monitor->getOriginalUri().getSSLMode(), transport::kGlobalSSLMode); return monitor; } log() << "Starting new replica set monitor for " << connStr.toString(); auto newMonitor = std::make_shared<ReplicaSetMonitor>(MongoURI(connStr)); _monitors[setName] = newMonitor; newMonitor->init(); return newMonitor; }
/** * Upgrades v4 to v5. */ bool doUpgradeV4ToV5(const ConnectionString& configLoc, const VersionType& lastVersionInfo, string* errMsg) { string dummy; if (!errMsg) errMsg = &dummy; verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_MandatoryEpochVersion); Status result = preUpgradeCheck(configLoc, lastVersionInfo, minMongoProcessVersion); if (!result.isOK()) { if (result.code() == ErrorCodes::ManualInterventionRequired) { *errMsg = cannotCleanupMessage; } else { *errMsg = result.toString(); } return false; } // This is not needed because we are not actually going to make any modifications // on the other collections in the config server for this particular upgrade. // startConfigUpgrade(configLoc.toString(), // lastVersionInfo.getCurrentVersion(), // OID::gen()); // If we actually need to modify something in the config servers these need to follow // after calling startConfigUpgrade(...): // // 1. Acquire necessary locks. // 2. Make a backup of the collections we are about to modify. // 3. Perform the upgrade process on the backup collection. // 4. Verify that no changes were made to the collections since the backup was performed. // 5. Call enterConfigUpgradeCriticalSection(configLoc.toString(), // lastVersionInfo.getCurrentVersion()). // 6. Rename the backup collection to the name of the original collection with // dropTarget set to true. // We're only after the version bump in commitConfigUpgrade here since we never // get into the critical section. Status commitStatus = commitConfigUpgrade(configLoc.toString(), lastVersionInfo.getCurrentVersion(), MIN_COMPATIBLE_CONFIG_VERSION, CURRENT_CONFIG_VERSION); if (!commitStatus.isOK()) { *errMsg = commitStatus.toString(); return false; } return true; }
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor( const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); _setupTaskExecutorInLock(connStr.toString()); auto setName = connStr.getSetName(); auto monitor = _monitors[setName].lock(); if (monitor) { return monitor; } const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end()); log() << "Starting new replica set monitor for " << connStr.toString(); auto newMonitor = std::make_shared<ReplicaSetMonitor>(setName, servers); _monitors[setName] = newMonitor; newMonitor->init(); return newMonitor; }
void LegacyDistLockPinger::acknowledgeStopPing(const ConnectionString& addr, const string& processId) { { boost::lock_guard<boost::mutex> lk(_mutex); string pingId = pingThreadId(addr, processId); _kill.erase(pingId); _seen.erase(pingId); } try { ScopedDbConnection conn(addr.toString(), 30.0); conn->remove(LockpingsType::ConfigNS, BSON(LockpingsType::process(processId))); } catch (const DBException& ex) { warning() << "Error encountered while stopping ping on " << processId << causedBy(ex); } }
void StartChunkCloneRequest::appendAsCommand( BSONObjBuilder* builder, const NamespaceString& nss, const MigrationSessionId& shardVersion, const ConnectionString& configServerConnectionString, const std::string& fromShardId, const std::string& toShardId, const BSONObj& chunkMinKey, const BSONObj& chunkMaxKey, const BSONObj& shardKeyPattern, const MigrationSecondaryThrottleOptions& secondaryThrottle) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); builder->append(kRecvChunkStart, nss.ns()); builder->append(kConfigServerConnectionString, configServerConnectionString.toString()); builder->append(kFromShardId, fromShardId); builder->append(kToShardId, toShardId); builder->append(kChunkMinKey, chunkMinKey); builder->append(kChunkMaxKey, chunkMaxKey); builder->append(kShardKeyPattern, shardKeyPattern); secondaryThrottle.append(builder); }
// In order to be accepted as a new shard, that mongod must not have // any database name that exists already in any other shards. // If that test passes, the new shard's databases are going to be entered as // non-sharded db's whose primary is the newly added shard. StatusWith<vector<string>> getDBNames(const ConnectionString& shardConnectionString, ScopedDbConnection& conn) { vector<string> dbNames; BSONObj resListDB; if (!conn->runCommand("admin", BSON("listDatabases" << 1), resListDB)) { return Status(ErrorCodes::OperationFailed, str::stream() << "failed listing " << shardConnectionString.toString() << "'s databases:" << resListDB); } BSONObjIterator i(resListDB["databases"].Obj()); while (i.more()) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if (!(dbName == "local" || dbName == "admin" || dbName == "config")) { dbNames.push_back(dbName); } } return dbNames; }
void ShardRegistry::_updateLookupMapsForShard_inlock(shared_ptr<Shard> shard, const ConnectionString& newConnString) { auto oldConnString = shard->getConnString(); for (const auto& host : oldConnString.getServers()) { _lookup.erase(host.toString()); } _lookup[shard->getId()] = shard; if (newConnString.type() == ConnectionString::SET) { _rsLookup[newConnString.getSetName()] = shard; } // TODO: The only reason to have the shard host names in the lookup table is for the // setShardVersion call, which resolves the shard id from the shard address. This is // error-prone and will go away eventually when we switch all communications to go through // the remote command runner and all nodes are sharding aware by default. _lookup[newConnString.toString()] = shard; for (const HostAndPort& hostAndPort : newConnString.getServers()) { _lookup[hostAndPort.toString()] = shard; } }
void Shard::setAddress( const ConnectionString& cs) { verify( _name.size() ); _addr = cs.toString(); _cs = cs; staticShardInfo.set( _name , *this , true , false ); }
void BatchWriteExec::executeBatch(const BatchedCommandRequest& clientRequest, BatchedCommandResponse* clientResponse) { LOG(4) << "starting execution of write batch of size " << static_cast<int>(clientRequest.sizeWriteOps()) << " for " << clientRequest.getNS() << endl; BatchWriteOp batchOp; batchOp.initClientRequest(&clientRequest); // Current batch status bool refreshedTargeter = false; int rounds = 0; int numCompletedOps = 0; int numRoundsWithoutProgress = 0; while (!batchOp.isFinished()) { // // Get child batches to send using the targeter // // Targeting errors can be caused by remote metadata changing (the collection could have // been dropped and recreated, for example with a new shard key). If a remote metadata // change occurs *before* a client sends us a batch, we need to make sure that we don't // error out just because we're staler than the client - otherwise mongos will be have // unpredictable behavior. // // (If a metadata change happens *during* or *after* a client sends us a batch, however, // we make no guarantees about delivery.) // // For this reason, we don't record targeting errors until we've refreshed our targeting // metadata at least once *after* receiving the client batch - at that point, we know: // // 1) our new metadata is the same as the metadata when the client sent a batch, and so // targeting errors are real. // OR // 2) our new metadata is a newer version than when the client sent a batch, and so // the metadata must have changed after the client batch was sent. We don't need to // deliver in this case, since for all the client knows we may have gotten the batch // exactly when the metadata changed. // OwnedPointerVector<TargetedWriteBatch> childBatchesOwned; vector<TargetedWriteBatch*>& childBatches = childBatchesOwned.mutableVector(); // If we've already had a targeting error, we've refreshed the metadata once and can // record target errors definitively. bool recordTargetErrors = refreshedTargeter; Status targetStatus = batchOp.targetBatch(*_targeter, recordTargetErrors, &childBatches); if (!targetStatus.isOK()) { // Don't do anything until a targeter refresh _targeter->noteCouldNotTarget(); refreshedTargeter = true; ++_stats->numTargetErrors; dassert(childBatches.size() == 0u); } // // Send all child batches // size_t numSent = 0; size_t numToSend = childBatches.size(); bool remoteMetadataChanging = false; while (numSent != numToSend) { // Collect batches out on the network, mapped by endpoint OwnedHostBatchMap ownedPendingBatches; OwnedHostBatchMap::MapType& pendingBatches = ownedPendingBatches.mutableMap(); // // Send side // // Get as many batches as we can at once for (vector<TargetedWriteBatch*>::iterator it = childBatches.begin(); it != childBatches.end(); ++it) { // // Collect the info needed to dispatch our targeted batch // TargetedWriteBatch* nextBatch = *it; // If the batch is NULL, we sent it previously, so skip if (nextBatch == NULL) continue; // Figure out what host we need to dispatch our targeted batch ConnectionString shardHost; Status resolveStatus = _resolver->chooseWriteHost(nextBatch->getEndpoint().shardName, &shardHost); if (!resolveStatus.isOK()) { ++_stats->numResolveErrors; // Record a resolve failure // TODO: It may be necessary to refresh the cache if stale, or maybe just // cancel and retarget the batch WriteErrorDetail error; buildErrorFrom(resolveStatus, &error); LOG(4) << "unable to send write batch to " << shardHost.toString() << causedBy(resolveStatus.toString()) << endl; batchOp.noteBatchError(*nextBatch, error); // We're done with this batch // Clean up when we can't resolve a host delete *it; *it = NULL; --numToSend; continue; } // If we already have a batch for this host, wait until the next time OwnedHostBatchMap::MapType::iterator pendingIt = pendingBatches.find(shardHost); if (pendingIt != pendingBatches.end()) continue; // // We now have all the info needed to dispatch the batch // BatchedCommandRequest request(clientRequest.getBatchType()); batchOp.buildBatchRequest(*nextBatch, &request); // Internally we use full namespaces for request/response, but we send the // command to a database with the collection name in the request. NamespaceString nss(request.getNS()); request.setNS(nss.coll()); LOG(4) << "sending write batch to " << shardHost.toString() << ": " << request.toString() << endl; _dispatcher->addCommand(shardHost, nss.db(), request); // Indicate we're done by setting the batch to NULL // We'll only get duplicate hostEndpoints if we have broadcast and non-broadcast // endpoints for the same host, so this should be pretty efficient without // moving stuff around. *it = NULL; // Recv-side is responsible for cleaning up the nextBatch when used pendingBatches.insert(make_pair(shardHost, nextBatch)); } // Send them all out _dispatcher->sendAll(); numSent += pendingBatches.size(); // // Recv side // while (_dispatcher->numPending() > 0) { // Get the response ConnectionString shardHost; BatchedCommandResponse response; Status dispatchStatus = _dispatcher->recvAny(&shardHost, &response); // Get the TargetedWriteBatch to find where to put the response dassert(pendingBatches.find(shardHost) != pendingBatches.end()); TargetedWriteBatch* batch = pendingBatches.find(shardHost)->second; if (dispatchStatus.isOK()) { TrackedErrors trackedErrors; trackedErrors.startTracking(ErrorCodes::StaleShardVersion); LOG(4) << "write results received from " << shardHost.toString() << ": " << response.toString() << endl; // Dispatch was ok, note response batchOp.noteBatchResponse(*batch, response, &trackedErrors); // Note if anything was stale const vector<ShardError*>& staleErrors = trackedErrors.getErrors(ErrorCodes::StaleShardVersion); if (staleErrors.size() > 0) { noteStaleResponses(staleErrors, _targeter); ++_stats->numStaleBatches; } // Remember if the shard is actively changing metadata right now if (isShardMetadataChanging(staleErrors)) { remoteMetadataChanging = true; } // Remember that we successfully wrote to this shard // NOTE: This will record lastOps for shards where we actually didn't update // or delete any documents, which preserves old behavior but is conservative _stats->noteWriteAt(shardHost, response.isLastOpSet() ? response.getLastOp() : OpTime(), response.isElectionIdSet() ? response.getElectionId() : OID()); } else { // Error occurred dispatching, note it stringstream msg; msg << "write results unavailable from " << shardHost.toString() << causedBy(dispatchStatus.toString()); WriteErrorDetail error; buildErrorFrom(Status(ErrorCodes::RemoteResultsUnavailable, msg.str()), &error); LOG(4) << "unable to receive write results from " << shardHost.toString() << causedBy(dispatchStatus.toString()) << endl; batchOp.noteBatchError(*batch, error); } } } ++rounds; ++_stats->numRounds; // If we're done, get out if (batchOp.isFinished()) break; // MORE WORK TO DO // // Refresh the targeter if we need to (no-op if nothing stale) // bool targeterChanged = false; Status refreshStatus = _targeter->refreshIfNeeded(&targeterChanged); if (!refreshStatus.isOK()) { // It's okay if we can't refresh, we'll just record errors for the ops if // needed. warning() << "could not refresh targeter" << causedBy(refreshStatus.reason()) << endl; } // // Ensure progress is being made toward completing the batch op // int currCompletedOps = batchOp.numWriteOpsIn(WriteOpState_Completed); if (currCompletedOps == numCompletedOps && !targeterChanged && !remoteMetadataChanging) { ++numRoundsWithoutProgress; } else { numRoundsWithoutProgress = 0; } numCompletedOps = currCompletedOps; if (numRoundsWithoutProgress > kMaxRoundsWithoutProgress) { stringstream msg; msg << "no progress was made executing batch write op in " << clientRequest.getNS() << " after " << kMaxRoundsWithoutProgress << " rounds (" << numCompletedOps << " ops completed in " << rounds << " rounds total)"; WriteErrorDetail error; buildErrorFrom(Status(ErrorCodes::NoProgressMade, msg.str()), &error); batchOp.abortBatch(error); break; } } batchOp.buildClientResponse(clientResponse); LOG(4) << "finished execution of write batch" << (clientResponse->isErrDetailsSet() ? " with write errors" : "") << (clientResponse->isErrDetailsSet() && clientResponse->isWriteConcernErrorSet() ? " and" : "") << (clientResponse->isWriteConcernErrorSet() ? " with write concern error" : "") << " for " << clientRequest.getNS() << endl; }
StatusWith<std::string> ShardingCatalogManager::addShard( OperationContext* opCtx, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // Only one addShard operation can be in progress at a time. Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock); // Check if this shard has already been added (can happen in the case of a retry after a network // error, for example) and thus this addShard request should be considered a no-op. auto existingShard = _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize); if (!existingShard.isOK()) { return existingShard.getStatus(); } if (existingShard.getValue()) { // These hosts already belong to an existing shard, so report success and terminate the // addShard request. Make sure to set the last optime for the client to the system last // optime so that we'll still wait for replication so that this state is visible in the // committed snapshot. repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); return existingShard.getValue()->getName(); } // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the // set with that setName from the ReplicaSetMonitorManager and will create a new // ReplicaSetMonitor when targeting the set below. // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a // shard is removed. if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) { // If the first reload joined an existing one, call reload again to ensure the reload is // fresh. Grid::get(opCtx)->shardRegistry()->reload(opCtx); } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); auto stopMonitoringGuard = MakeGuard([&] { if (shardConnectionString.type() == ConnectionString::SET) { // This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is // added, it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); } }); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); // Check that none of the existing shard candidate's dbs exist already auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } for (const auto& dbName : dbNamesStatus.getValue()) { auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase( opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // Check that the shard candidate does not have a local config.system.sessions collection auto res = _dropSessionsCollection(opCtx, targeter); if (!res.isOK()) { return res.withContext( "can't add shard with a local copy of config.system.sessions, please drop this " "collection from the shard manually and try again."); } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { auto result = generateNewShardName(opCtx); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } // Helper function that runs a command on the to-be shard and returns the status auto runCmdOnNewShard = [this, &opCtx, &targeter](const BSONObj& cmd) -> Status { auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, cmd); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } // Grabs the underlying status from a StatusWith object by taking the first // non-OK status, if there is one. This is needed due to the semantics of // _runCommandForAddShard. auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; return Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); }; AddShard addShardCmd = add_shard_util::createAddShardCmd(opCtx, shardType.getName()); auto addShardCmdBSON = [&]() { // In 4.2, use the _addShard command to add the shard, which in turn inserts a // shardIdentity document into the shard and triggers sharding state initialization. // In the unlikely scenario that there's a downgrade to 4.0 between the // construction of this command object and the issuing of the command // on the receiving shard, the user will receive a rather harmless // CommandNotFound error for _addShard, and can simply retry. if (serverGlobalParams.featureCompatibility.getVersion() == ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) { // Needed for IDL toBSON method BSONObj passthroughFields; return addShardCmd.toBSON(passthroughFields); } else { // To support backwards compatibility with v4.0 shards, insert a shardIdentity document // directly. return add_shard_util::createShardIdentityUpsertForAddShard(addShardCmd); } }(); auto addShardStatus = runCmdOnNewShard(addShardCmdBSON); if (!addShardStatus.isOK()) { return addShardStatus; } { // Hold the fcvLock across checking the FCV, sending setFCV to the new shard, and // writing the entry for the new shard to config.shards. This ensures the FCV doesn't change // after we send setFCV to the new shard, but before we write its entry to config.shards. // (Note, we don't use a Global IX lock here, because we don't want to hold the global lock // while blocking on the network). invariant(!opCtx->lockState()->isLocked()); Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock); BSONObj setFCVCmd; switch (serverGlobalParams.featureCompatibility.getVersion()) { case ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42: case ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo42: setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName << FeatureCompatibilityVersionParser::kVersion42 << WriteConcernOptions::kWriteConcernField << opCtx->getWriteConcern().toBSON()); break; default: setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName << FeatureCompatibilityVersionParser::kVersion40 << WriteConcernOptions::kWriteConcernField << opCtx->getWriteConcern().toBSON()); break; } auto versionResponse = _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, setFCVCmd); if (!versionResponse.isOK()) { return versionResponse.getStatus(); } if (!versionResponse.getValue().commandStatus.isOK()) { return versionResponse.getValue().commandStatus; } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument( opCtx, ShardType::ConfigNS, shardType.toBSON(), ShardingCatalogClient::kLocalWriteConcern); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } } // Add all databases which were discovered on the new shard for (const auto& dbName : dbNamesStatus.getValue()) { DatabaseType dbt(dbName, shardType.getName(), false, databaseVersion::makeNew()); { const auto status = Grid::get(opCtx)->catalogClient()->updateConfigDocument( opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::name(dbName)), dbt.toBSON(), true, ShardingCatalogClient::kLocalWriteConcern); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); Grid::get(opCtx)->catalogClient()->logChange( opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(opCtx)->shardRegistry(); if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } stopMonitoringGuard.Dismiss(); return shardType.getName(); }
StatusWith<ShardType> ShardingCatalogManager::_validateHostAsShard( OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter, const std::string* shardProposedName, const ConnectionString& connectionString) { auto swCommandResponse = _runCommandForAddShard( opCtx, targeter.get(), NamespaceString::kAdminDb, BSON("isMaster" << 1)); if (swCommandResponse.getStatus() == ErrorCodes::IncompatibleServerVersion) { return swCommandResponse.getStatus().withReason( str::stream() << "Cannot add " << connectionString.toString() << " as a shard because its binary version is not compatible with " "the cluster's featureCompatibilityVersion."); } else if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } // Check for a command response error auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus); if (!resIsMasterStatus.isOK()) { return resIsMasterStatus.withContext(str::stream() << "Error running isMaster against " << targeter->connectionString().toString()); } auto resIsMaster = std::move(swCommandResponse.getValue().response); // Fail if the node being added is a mongos. const std::string msg = resIsMaster.getStringField("msg"); if (msg == "isdbgrid") { return {ErrorCodes::IllegalOperation, "cannot add a mongos as a shard"}; } // Extract the maxWireVersion so we can verify that the node being added has a binary version // greater than or equal to the cluster's featureCompatibilityVersion. We expect an incompatible // binary node to be unable to communicate, returning an IncompatibleServerVersion error, // because of our internal wire version protocol. So we can safely invariant here that the node // is compatible. long long maxWireVersion; Status status = bsonExtractIntegerField(resIsMaster, "maxWireVersion", &maxWireVersion); if (!status.isOK()) { return status.withContext(str::stream() << "isMaster returned invalid 'maxWireVersion' " << "field when attempting to add " << connectionString.toString() << " as a shard"); } if (serverGlobalParams.featureCompatibility.getVersion() > ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) { // If the cluster's FCV is 4.2, or upgrading to / downgrading from, the node being added // must be a v4.2 binary. invariant(maxWireVersion == WireVersion::LATEST_WIRE_VERSION); } else { // If the cluster's FCV is 4.0, the node being added must be a v4.0 or v4.2 binary. invariant(serverGlobalParams.featureCompatibility.getVersion() == ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40); invariant(maxWireVersion >= WireVersion::LATEST_WIRE_VERSION - 1); } // Check whether there is a master. If there isn't, the replica set may not have been // initiated. If the connection is a standalone, it will return true for isMaster. bool isMaster; status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster); if (!status.isOK()) { return status.withContext(str::stream() << "isMaster returned invalid 'ismaster' " << "field when attempting to add " << connectionString.toString() << " as a shard"); } if (!isMaster) { return {ErrorCodes::NotMaster, str::stream() << connectionString.toString() << " does not have a master. If this is a replica set, ensure that it has a" << " healthy primary and that the set has been properly initiated."}; } const std::string providedSetName = connectionString.getSetName(); const std::string foundSetName = resIsMaster["setName"].str(); // Make sure the specified replica set name (if any) matches the actual shard's replica set if (providedSetName.empty() && !foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host is part of set " << foundSetName << "; " << "use replica set url format " << "<setname>/<server1>,<server2>, ..."}; } if (!providedSetName.empty() && foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host did not return a set name; " << "is the replica set still initializing? " << resIsMaster}; } // Make sure the set name specified in the connection string matches the one where its hosts // belong into if (!providedSetName.empty() && (providedSetName != foundSetName)) { return {ErrorCodes::OperationFailed, str::stream() << "the provided connection string (" << connectionString.toString() << ") does not match the actual set name " << foundSetName}; } // Is it a config server? if (resIsMaster.hasField("configsvr")) { return {ErrorCodes::OperationFailed, str::stream() << "Cannot add " << connectionString.toString() << " as a shard since it is a config server"}; } // If the shard is part of a replica set, make sure all the hosts mentioned in the connection // string are part of the set. It is fine if not all members of the set are mentioned in the // connection string, though. if (!providedSetName.empty()) { std::set<std::string> hostSet; BSONObjIterator iter(resIsMaster["hosts"].Obj()); while (iter.more()) { hostSet.insert(iter.next().String()); // host:port } if (resIsMaster["passives"].isABSONObj()) { BSONObjIterator piter(resIsMaster["passives"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } if (resIsMaster["arbiters"].isABSONObj()) { BSONObjIterator piter(resIsMaster["arbiters"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } for (const auto& hostEntry : connectionString.getServers()) { const auto& host = hostEntry.toString(); // host:port if (hostSet.find(host) == hostSet.end()) { return {ErrorCodes::OperationFailed, str::stream() << "in seed list " << connectionString.toString() << ", host " << host << " does not belong to replica set " << foundSetName << "; found " << resIsMaster.toString()}; } } } std::string actualShardName; if (shardProposedName) { actualShardName = *shardProposedName; } else if (!foundSetName.empty()) { // Default it to the name of the replica set actualShardName = foundSetName; } // Disallow adding shard replica set with name 'config' if (actualShardName == NamespaceString::kConfigDb) { return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"}; } // Retrieve the most up to date connection string that we know from the replica set monitor (if // this is a replica set shard, otherwise it will be the same value as connectionString). ConnectionString actualShardConnStr = targeter->connectionString(); ShardType shard; shard.setName(actualShardName); shard.setHost(actualShardConnStr.toString()); shard.setState(ShardType::ShardState::kShardAware); return shard; }
void Balancer::run() { // this is the body of a BackgroundJob so if we throw here we're basically ending the balancer thread prematurely while ( ! inShutdown() ) { if ( ! _init() ) { log() << "will retry to initialize balancer in one minute" << endl; sleepsecs( 60 ); continue; } break; } int sleepTime = 10; // getConnectioString and dist lock constructor does not throw, which is what we expect on while // on the balancer thread ConnectionString config = configServer.getConnectionString(); DistributedLock balanceLock( config , "balancer" ); while ( ! inShutdown() ) { try { ScopedDbConnection conn(config.toString(), 30); // ping has to be first so we keep things in the config server in sync _ping(); // use fresh shard state Shard::reloadShardInfo(); // refresh chunk size (even though another balancer might be active) Chunk::refreshChunkSize(); SettingsType balancerConfig; string errMsg; if (!grid.getBalancerSettings(&balancerConfig, &errMsg)) { warning() << errMsg; return ; } // now make sure we should even be running if ((balancerConfig.isKeySet() && // balancer config doc exists !grid.shouldBalance(balancerConfig)) || MONGO_FAIL_POINT(skipBalanceRound)) { LOG(1) << "skipping balancing round because balancing is disabled" << endl; // Ping again so scripts can determine if we're active without waiting _ping( true ); conn.done(); sleepsecs( sleepTime ); continue; } uassert( 13258 , "oids broken after resetting!" , _checkOIDs() ); { dist_lock_try lk( &balanceLock , "doing balance round" ); if ( ! lk.got() ) { LOG(1) << "skipping balancing round because another balancer is active" << endl; // Ping again so scripts can determine if we're active without waiting _ping( true ); conn.done(); sleepsecs( sleepTime ); // no need to wake up soon continue; } if ( !isConfigServerConsistent() ) { conn.done(); warning() << "Skipping balancing round because data inconsistency" << " was detected amongst the config servers." << endl; sleepsecs( sleepTime ); continue; } const bool waitForDelete = (balancerConfig.isWaitForDeleteSet() ? balancerConfig.getWaitForDelete() : false); scoped_ptr<WriteConcernOptions> writeConcern; if (balancerConfig.isKeySet()) { // if balancer doc exists. StatusWith<WriteConcernOptions*> extractStatus = balancerConfig.extractWriteConcern(); if (extractStatus.isOK()) { writeConcern.reset(extractStatus.getValue()); } else { warning() << extractStatus.toString(); } } LOG(1) << "*** start balancing round. " << "waitForDelete: " << waitForDelete << ", secondaryThrottle: " << (writeConcern.get() ? writeConcern->toBSON().toString() : "default") << endl; vector<CandidateChunkPtr> candidateChunks; _doBalanceRound( conn.conn() , &candidateChunks ); if ( candidateChunks.size() == 0 ) { LOG(1) << "no need to move any chunk" << endl; _balancedLastTime = 0; } else { _balancedLastTime = _moveChunks(&candidateChunks, writeConcern.get(), waitForDelete ); } LOG(1) << "*** end of balancing round" << endl; } // Ping again so scripts can determine if we're active without waiting _ping( true ); conn.done(); sleepsecs( _balancedLastTime ? sleepTime / 10 : sleepTime ); } catch ( std::exception& e ) { log() << "caught exception while doing balance: " << e.what() << endl; // Just to match the opening statement if in log level 1 LOG(1) << "*** End of balancing round" << endl; sleepsecs( sleepTime ); // sleep a fair amount b/c of error continue; } } }
StatusWith<string> ShardingCatalogManagerImpl::addShard( OperationContext* txn, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(txn)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(txn, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { // TODO: This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is added, // it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); auto dbNamesStatus = _getDBNamesListFromShard(txn, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } // Check that none of the existing shard candidate's dbs exist already for (const string& dbName : dbNamesStatus.getValue()) { auto dbt = _catalogClient->getDatabase(txn, dbName); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { StatusWith<string> result = _generateNewShardName(txn); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } ShardIdentityType shardIdentity; shardIdentity.setConfigsvrConnString( Grid::get(txn)->shardRegistry()->getConfigServerConnectionString()); shardIdentity.setShardName(shardType.getName()); shardIdentity.setClusterId(Grid::get(txn)->shardRegistry()->getClusterId()); auto validateStatus = shardIdentity.validate(); if (!validateStatus.isOK()) { return validateStatus; } log() << "going to insert shardIdentity document into shard: " << shardIdentity.toString(); auto updateRequest = shardIdentity.createUpsertForAddShard(); BatchedCommandRequest commandRequest(updateRequest.release()); commandRequest.setNS(NamespaceString::kConfigCollectionNamespace); commandRequest.setWriteConcern(kMajorityWriteConcern.toBSON()); auto swCommandResponse = _runCommandForAddShard(txn, targeter.get(), "admin", commandRequest.toBSON()); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; auto batchResponseStatus = Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); if (!batchResponseStatus.isOK()) { return batchResponseStatus; } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = _catalogClient->insertConfigDocument(txn, ShardType::ConfigNS, shardType.toBSON()); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); if (result == ErrorCodes::DuplicateKey) { // TODO(SERVER-24213): adding a shard that already exists should be considered success, // however this approach does no validation that we are adding the shard with the same // options. It also does not protect against adding the same shard with a different // shard name and slightly different connection string. This is a temporary hack to // get the continuous stepdown suite passing. warning() << "Received duplicate key error when inserting new shard with name " << shardType.getName() << " and connection string " << shardConnectionString.toString() << " to config.shards collection. This most likely means that there was an " "attempt to add a shard that already exists in the cluster"; return shardType.getName(); } return result; } // Add all databases which were discovered on the new shard for (const string& dbName : dbNamesStatus.getValue()) { DatabaseType dbt; dbt.setName(dbName); dbt.setPrimary(shardType.getName()); dbt.setSharded(false); Status status = _catalogClient->updateDatabase(txn, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); _catalogClient->logChange(txn, "addShard", "", shardDetails.obj()); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(txn)->shardRegistry(); if (!shardRegistry->getShard(txn, shardType.getName())) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } return shardType.getName(); }
ScopedDbConnection::ScopedDbConnection(const ConnectionString& host, double socketTimeout) : _host(host.toString()), _conn(globalConnPool.get(host, socketTimeout)), _socketTimeout(socketTimeout) { _setSocketTimeout(); }
void LegacyDistLockPinger::_distLockPingThread(ConnectionString addr, const string& process, stdx::chrono::milliseconds sleepTime) { setThreadName("LockPinger"); string pingId = pingThreadId(addr, process); LOG(0) << "creating distributed lock ping thread for " << addr << " and process " << process << " (sleeping for " << sleepTime.count() << "ms)"; static int loops = 0; Date_t lastPingTime = jsTime(); while (!shouldStopPinging(addr, process)) { LOG(3) << "distributed lock pinger '" << pingId << "' about to ping."; Date_t pingTime; try { ScopedDbConnection conn(addr.toString(), 30.0); pingTime = jsTime(); Date_t elapsed(pingTime.millis - lastPingTime.millis); if (elapsed.millis > static_cast<unsigned long long>(10 * sleepTime.count())) { warning() << "Lock pinger for addr: " << addr << ", proc: " << process << " was inactive for " << elapsed.millis << " ms"; } lastPingTime = pingTime; // Refresh the entry corresponding to this process in the lockpings collection. conn->update(LockpingsType::ConfigNS, BSON(LockpingsType::process(process)), BSON("$set" << BSON(LockpingsType::ping(pingTime))), true); string err = conn->getLastError(); if (!err.empty()) { warning() << "pinging failed for distributed lock pinger '" << pingId << "'." << causedBy(err); conn.done(); if (!shouldStopPinging(addr, process)) { waitTillNextPingTime(sleepTime); } continue; } // Remove really old entries from the lockpings collection if they're not // holding a lock. This may happen if an instance of a process was taken down // and no new instance came up to replace it for a quite a while. // NOTE this is NOT the same as the standard take-over mechanism, which forces // the lock entry. BSONObj fieldsToReturn = BSON(LocksType::state() << 1 << LocksType::process() << 1); auto activeLocks = conn->query(LocksType::ConfigNS, BSON(LocksType::state() << NE << LocksType::UNLOCKED)); uassert(16060, str::stream() << "cannot query locks collection on config server " << conn.getHost(), activeLocks.get()); std::set<string> pids; while (activeLocks->more()) { BSONObj lock = activeLocks->nextSafe(); if (!lock[LocksType::process()].eoo()) { pids.insert(lock[LocksType::process()].str()); } else { warning() << "found incorrect lock document during lock ping cleanup: " << lock.toString(); } } // This can potentially delete ping entries that are actually active (if the clock // of another pinger is too skewed). This is still fine as the lock logic only // checks if there is a change in the ping document and the document going away // is a valid change. Date_t fourDays = pingTime - (4 * 86400 * 1000); // 4 days conn->remove(LockpingsType::ConfigNS, BSON(LockpingsType::process() << NIN << pids << LockpingsType::ping() << LT << fourDays)); err = conn->getLastError(); if (!err.empty()) { warning() << "ping cleanup for distributed lock pinger '" << pingId << " failed." << causedBy(err); conn.done(); if (!shouldStopPinging(addr, process)) { waitTillNextPingTime(sleepTime); } continue; } LOG(1 - (loops % 10 == 0 ? 1 : 0)) << "cluster " << addr << " pinged successfully at " << pingTime << " by distributed lock pinger '" << pingId << "', sleeping for " << sleepTime.count() << "ms"; // Remove old locks, if possible // Make sure no one else is adding to this list at the same time boost::lock_guard<boost::mutex> lk(_mutex); int numOldLocks = _unlockList.size(); if (numOldLocks > 0) { LOG(0) << "trying to delete " << _unlockList.size() << " old lock entries for process " << process; } bool removed = false; for (auto iter = _unlockList.begin(); iter != _unlockList.end(); iter = (removed ? _unlockList.erase(iter) : ++iter)) { removed = false; try { // Got DistLockHandle from lock, so we don't need to specify _id again conn->update(LocksType::ConfigNS, BSON(LocksType::lockID(*iter)), BSON("$set" << BSON( LocksType::state(LocksType::UNLOCKED)))); // Either the update went through or it didn't, // either way we're done trying to unlock. LOG(0) << "handled late remove of old distributed lock with ts " << *iter; removed = true; } catch (UpdateNotTheSame&) { LOG(0) << "partially removed old distributed lock with ts " << *iter; removed = true; } catch (std::exception& e) { warning() << "could not remove old distributed lock with ts " << *iter << causedBy(e); } } if (numOldLocks > 0 && _unlockList.size() > 0) { LOG(0) << "not all old lock entries could be removed for process " << process; } conn.done(); } catch (std::exception& e) { warning() << "distributed lock pinger '" << pingId << "' detected an exception while pinging." << causedBy(e); } if (!shouldStopPinging(addr, process)) { waitTillNextPingTime(sleepTime); } } warning() << "removing distributed lock ping thread '" << pingId << "'"; if (shouldStopPinging(addr, process)) { acknowledgeStopPing(addr, process); } }
/** * Upgrades v5 to v6. */ bool doUpgradeV5ToV6(const ConnectionString& configLoc, const VersionType& lastVersionInfo, string* errMsg) { string dummy; if (!errMsg) errMsg = &dummy; verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_DummyBumpPre2_6); Status result = preUpgradeCheck(configLoc, lastVersionInfo, minMongoProcessVersion); if (!result.isOK()) { if (result.code() == ErrorCodes::ManualInterventionRequired) { *errMsg = cannotCleanupMessage; } else { *errMsg = result.toString(); } return false; } // This is not needed because we are not actually going to make any modifications // on the other collections in the config server for this particular upgrade. // startConfigUpgrade(configLoc.toString(), // lastVersionInfo.getCurrentVersion(), // OID::gen()); // If we actually need to modify something in the config servers these need to follow // after calling startConfigUpgrade(...): // // 1. Acquire necessary locks. // 2. Make a backup of the collections we are about to modify. // 3. Perform the upgrade process on the backup collection. // 4. Verify that no changes were made to the collections since the backup was performed. // 5. Call enterConfigUpgradeCriticalSection(configLoc.toString(), // lastVersionInfo.getCurrentVersion()). // 6. Rename the backup collection to the name of the original collection with // dropTarget set to true. // Make sure the { ts: 1 } index is not unique by dropping the existing one // and rebuilding the index with the right specification. const BSONObj lockIdxKey = BSON(LocksType::lockID() << 1); const NamespaceString indexNS(LocksType::ConfigNS); bool dropOk = false; try { ScopedDbConnection conn(configLoc); BSONObj dropResponse; dropOk = conn->runCommand(indexNS.db().toString(), BSON("dropIndexes" << indexNS.coll() << "index" << lockIdxKey), dropResponse); conn.done(); } catch (const DBException& ex) { if (ex.getCode() == 13105) { // 13105 is the exception code from SyncClusterConnection::findOne that gets // thrown when one of the command responses has an "ok" field that is not true. dropOk = false; } else { *errMsg = str::stream() << "Failed to drop { ts: 1 } index" << causedBy(ex); return false; } } if (!dropOk && hasBadIndex(configLoc, errMsg)) { // Fail only if the index still exists. return false; } result = clusterCreateIndex(LocksType::ConfigNS, BSON(LocksType::lockID() << 1), false, // unique WriteConcernOptions::AllConfigs, NULL); if (!result.isOK()) { *errMsg = str::stream() << "error while creating { ts: 1 } index on config db" << causedBy(result); return false; } LOG(1) << "Checking to make sure that the right { ts: 1 } index is created..."; if (hasBadIndex(configLoc, errMsg)) { return false; } // We're only after the version bump in commitConfigUpgrade here since we never // get into the critical section. Status commitStatus = commitConfigUpgrade(configLoc.toString(), lastVersionInfo.getCurrentVersion(), MIN_COMPATIBLE_CONFIG_VERSION, CURRENT_CONFIG_VERSION); if (!commitStatus.isOK()) { *errMsg = commitStatus.toString(); return false; } return true; }
StatusWith<string> isValidShard(const string& name, const ConnectionString& shardConnectionString, ScopedDbConnection& conn) { if (conn->type() == ConnectionString::SYNC) { return Status(ErrorCodes::BadValue, "can't use sync cluster as a shard; for a replica set, " "you have to use <setname>/<server1>,<server2>,..."); } BSONObj resIsMongos; // (ok == 0) implies that it is a mongos if (conn->runCommand("admin", BSON("isdbgrid" << 1), resIsMongos)) { return Status(ErrorCodes::BadValue, "can't add a mongos process as a shard"); } BSONObj resIsMaster; if (!conn->runCommand("admin", BSON("isMaster" << 1), resIsMaster)) { return Status(ErrorCodes::OperationFailed, str::stream() << "failed running isMaster: " << resIsMaster); } // if the shard has only one host, make sure it is not part of a replica set string setName = resIsMaster["setName"].str(); string commandSetName = shardConnectionString.getSetName(); if (commandSetName.empty() && !setName.empty()) { return Status(ErrorCodes::BadValue, str::stream() << "host is part of set " << setName << "; " << "use replica set url format " << "<setname>/<server1>,<server2>, ..."); } if (!commandSetName.empty() && setName.empty()) { return Status(ErrorCodes::OperationFailed, str::stream() << "host did not return a set name; " << "is the replica set still initializing? " << resIsMaster); } // if the shard is part of replica set, make sure it is the right one if (!commandSetName.empty() && (commandSetName != setName)) { return Status(ErrorCodes::OperationFailed, str::stream() << "host is part of a different set: " << setName); } if (setName.empty()) { // check this isn't a --configsvr BSONObj res; bool ok = conn->runCommand("admin", BSON("replSetGetStatus" << 1), res); if(!ok && res["info"].type() == String && res["info"].String() == "configsvr") { return Status(ErrorCodes::BadValue, "the specified mongod is a --configsvr and " "should thus not be a shard server"); } } // if the shard is part of a replica set, // make sure all the hosts mentioned in 'shardConnectionString' are part of // the set. It is fine if not all members of the set are present in 'shardConnectionString'. bool foundAll = true; string offendingHost; if (!commandSetName.empty()) { set<string> hostSet; BSONObjIterator iter(resIsMaster["hosts"].Obj()); while (iter.more()) { hostSet.insert(iter.next().String()); // host:port } if (resIsMaster["passives"].isABSONObj()) { BSONObjIterator piter(resIsMaster["passives"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } if (resIsMaster["arbiters"].isABSONObj()) { BSONObjIterator piter(resIsMaster["arbiters"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } vector<HostAndPort> hosts = shardConnectionString.getServers(); for (size_t i = 0; i < hosts.size(); i++) { if (!hosts[i].hasPort()) { hosts[i] = HostAndPort(hosts[i].host(), hosts[i].port()); } string host = hosts[i].toString(); // host:port if (hostSet.find(host) == hostSet.end()) { offendingHost = host; foundAll = false; break; } } } if (!foundAll) { return Status(ErrorCodes::OperationFailed, str::stream() << "in seed list " << shardConnectionString.toString() << ", host " << offendingHost << " does not belong to replica set " << setName); } string shardName(name); // shard name defaults to the name of the replica set if (name.empty() && !setName.empty()) { shardName = setName; } // disallow adding shard replica set with name 'config' if (shardName == "config") { return Status(ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"); } return shardName; }
StatusWith<string> CatalogManagerCommon::addShard(OperationContext* txn, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { // Validate the specified connection string may serve as shard at all auto shardStatus = validateHostAsShard(txn, grid.shardRegistry(), shardConnectionString, shardProposedName); if (!shardStatus.isOK()) { // TODO: This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is added, // it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); auto dbNamesStatus = getDBNamesListFromShard(txn, grid.shardRegistry(), shardConnectionString); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } // Check that none of the existing shard candidate's dbs exist already for (const string& dbName : dbNamesStatus.getValue()) { auto dbt = getDatabase(txn, dbName); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { StatusWith<string> result = _generateNewShardName(txn); if (!result.isOK()) { return Status(ErrorCodes::OperationFailed, "error generating new shard name"); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } log() << "going to add shard: " << shardType.toString(); Status result = insert(txn, ShardType::ConfigNS, shardType.toBSON(), NULL); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } // Make sure the new shard is visible grid.shardRegistry()->reload(txn); // Add all databases which were discovered on the new shard for (const string& dbName : dbNamesStatus.getValue()) { DatabaseType dbt; dbt.setName(dbName); dbt.setPrimary(shardType.getName()); dbt.setSharded(false); Status status = updateDatabase(txn, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); logChange(txn, txn->getClient()->clientAddress(true), "addShard", "", shardDetails.obj()); return shardType.getName(); }
std::string MongoConnectionPool::BuildHostString(const ConnectionString& host) { return host.toString() + host.getDatabase() + "/" + host.getUser(); }
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) { // name can be NULL, so provide a dummy one here to avoid testing it elsewhere string nameInternal; if ( ! name ) { name = &nameInternal; } ReplicaSetMonitorPtr rsMonitor; // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be // brought into the grid without conflict. if ( servers.type() == ConnectionString::SYNC ) { errMsg = "can't use sync cluster as a shard for replica set, " "have to use <setname>/<server1>,<server2>,..."; return false; } vector<string> dbNames; try { bool ok = false; { ScopedDbConnection newShardConn(servers.toString()); BSONObj resIsMongos; ok = newShardConn->runCommand( "admin", BSON( "isdbgrid" << 1 ), resIsMongos ); newShardConn.done(); } // should return ok=0, cmd not found if it's a normal mongod if ( ok ) { errMsg = "can't add a mongos process as a shard"; return false; } if ( servers.type() == ConnectionString::SET ) { if (!addReplSetShardCheck( servers, &errMsg )) { return false; } // shard name defaults to the name of the replica set if ( name->empty() && !servers.getSetName().empty() ) { *name = servers.getSetName(); } } // In order to be accepted as a new shard, that mongod must not have any database name // that exists already in any other shards. If that test passes, the new shard's // databases are going to be entered as non-sharded db's whose primary is the // newly added shard. BSONObj resListDB; { ScopedDbConnection newShardConn(servers.toString()); ok = newShardConn->runCommand( "admin", BSON( "listDatabases" << 1 ), resListDB ); newShardConn.done(); } if ( !ok ) { errMsg = str::stream() << "failed listing " << servers.toString() << "'s databases:" << resListDB;; return false; } BSONObjIterator i( resListDB["databases"].Obj() ); while ( i.more() ) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if ( _isSpecialLocalDB( dbName ) ) { // 'local', 'admin', and 'config' are system DBs and should be excluded here continue; } else { dbNames.push_back( dbName ); } } if ( servers.type() == ConnectionString::SET ) { rsMonitor = ReplicaSetMonitor::get( servers.getSetName() ); } } catch ( DBException& e ) { if ( servers.type() == ConnectionString::SET ) { ReplicaSetMonitor::remove( servers.getSetName() ); } errMsg = str::stream() << "couldn't connect to new shard " << causedBy(e); return false; } // check that none of the existing shard candidate's db's exist elsewhere for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , false ); if ( config.get() != NULL ) { ostringstream ss; ss << "can't add shard " << servers.toString() << " because a local database '" << *it; ss << "' exists in another " << config->getPrimary().toString(); errMsg = ss.str(); return false; } } // if a name for a shard wasn't provided, pick one. if ( name->empty() && ! _getNewShardName( name ) ) { errMsg = "error generating new shard name"; return false; } // build the ConfigDB shard document BSONObjBuilder b; b.append(ShardType::name(), *name); b.append(ShardType::host(), rsMonitor ? rsMonitor->getServerAddress() : servers.toString()); if (maxSize > 0) { b.append(ShardType::maxSize(), maxSize); } BSONObj shardDoc = b.obj(); { ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30); // check whether the set of hosts (or single host) is not an already a known shard BSONObj old = conn->findOne(ShardType::ConfigNS, BSON(ShardType::host(servers.toString()))); if ( ! old.isEmpty() ) { errMsg = "host already used"; conn.done(); return false; } log() << "going to add shard: " << shardDoc << endl; conn->insert(ShardType::ConfigNS , shardDoc); errMsg = conn->getLastError(); if ( ! errMsg.empty() ) { log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl; conn.done(); return false; } conn.done(); } Shard::reloadShardInfo(); // add all databases of the new shard for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , true , *name ); if ( ! config ) { log() << "adding shard " << servers << " even though could not add database " << *it << endl; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", *name); shardDetails.append("host", servers.toString()); configServer.logChange("addShard", "", shardDetails.obj()); return true; }
/** * Performs sanity check on the given connection string on whether the seed list * is consistent with the view of the set using replSetGetStatus. */ bool addReplSetShardCheck( const ConnectionString& servers, string* errMsg ) { bool ok = false; BSONObj replSetStat; try { ScopedDbConnection newShardConn(servers.toString()); ok = newShardConn->runCommand( "admin", BSON( "replSetGetStatus" << 1 ), replSetStat ); newShardConn.done(); } catch ( const DBException& ex ) { *errMsg = str::stream() << "Error encountered while checking status of " << servers.toString() << ": " << causedBy( ex ); } if( !ok ) { if ( replSetStat["info"].str() == "configsvr" ) { *errMsg = "the specified mongod is a --configsvr and " "should thus not be a shard server"; } else { *errMsg = str::stream() << "error encountered calling replSetGetStatus: " << replSetStat; } return false; } // if the shard has only one host, make sure it is not part of a replica set string setName = replSetStat["set"].str(); string commandSetName = servers.getSetName(); if ( commandSetName.empty() && ! setName.empty() ) { *errMsg = str::stream() << "host is part of set: " << setName << " use replica set url format <setname>/<server1>,<server2>,...."; return false; } if ( !commandSetName.empty() && setName.empty() ) { *errMsg = str::stream() << "host did not return a set name, " << "is the replica set still initializing?" << replSetStat; return false; } // if the shard is part of replica set, make sure it is the right one if ( ! commandSetName.empty() && ( commandSetName != setName ) ) { *errMsg = str::stream() << "host is part of a different set: " << setName; return false; } // if the shard is part of a replica set, make sure all the hosts mentioned in // 'servers' are part of the set. It is fine if not all members of the set // are present in 'servers'. bool foundAll = true; string offendingHost; if ( ! commandSetName.empty() ) { set<string> hostSet; BSONElement membersElem( replSetStat["members"] ); if ( membersElem.type() == Array ) { BSONArrayIteratorSorted iter( BSONArray( membersElem.Obj() )); while ( iter.more() ) { hostSet.insert( iter.next()["name"].str() ); // host:port } vector<HostAndPort> hosts = servers.getServers(); for ( size_t i = 0 ; i < hosts.size() ; i++ ) { if (!hosts[i].hasPort()) { hosts[i].setPort(CmdLine::DefaultDBPort); } string host = hosts[i].toString(); // host:port if ( hostSet.find( host ) == hostSet.end() ) { offendingHost = host; foundAll = false; break; } } } if ( hostSet.empty() ) { *errMsg = "replSetGetStatus returned an empty set. " " Please wait for the set to initialize and try again."; return false; } } if ( ! foundAll ) { *errMsg = str::stream() << "in seed list " << servers.toString() << ", host " << offendingHost << " does not belong to replica set " << setName; return false; } return true; }
void Balancer::run() { // this is the body of a BackgroundJob so if we throw here we're basically ending the balancer thread prematurely while ( ! inShutdown() ) { if ( ! _init() ) { log() << "will retry to initialize balancer in one minute" << endl; sleepsecs( 60 ); continue; } break; } int sleepTime = 30; // getConnectioString and dist lock constructor does not throw, which is what we expect on while // on the balancer thread ConnectionString config = configServer.getConnectionString(); DistributedLock balanceLock( config , "balancer" ); while ( ! inShutdown() ) { try { scoped_ptr<ScopedDbConnection> connPtr( ScopedDbConnection::getInternalScopedDbConnection( config.toString() ) ); ScopedDbConnection& conn = *connPtr; // ping has to be first so we keep things in the config server in sync _ping( conn.conn() ); // use fresh shard state Shard::reloadShardInfo(); // refresh chunk size (even though another balancer might be active) Chunk::refreshChunkSize(); BSONObj balancerConfig; // now make sure we should even be running if ( ! grid.shouldBalance( "", &balancerConfig ) ) { LOG(1) << "skipping balancing round because balancing is disabled" << endl; // Ping again so scripts can determine if we're active without waiting _ping( conn.conn(), true ); conn.done(); sleepsecs( sleepTime ); continue; } sleepTime = balancerConfig["_nosleep"].trueValue() ? 30 : 6; uassert( 13258 , "oids broken after resetting!" , _checkOIDs() ); { dist_lock_try lk( &balanceLock , "doing balance round" ); if ( ! lk.got() ) { LOG(1) << "skipping balancing round because another balancer is active" << endl; // Ping again so scripts can determine if we're active without waiting _ping( conn.conn(), true ); conn.done(); sleepsecs( sleepTime ); // no need to wake up soon continue; } LOG(1) << "*** start balancing round" << endl; vector<CandidateChunkPtr> candidateChunks; _doBalanceRound( conn.conn() , &candidateChunks ); if ( candidateChunks.size() == 0 ) { LOG(1) << "no need to move any chunk" << endl; _balancedLastTime = 0; } else { _balancedLastTime = _moveChunks( &candidateChunks, balancerConfig["_secondaryThrottle"].trueValue() ); } LOG(1) << "*** end of balancing round" << endl; } // Ping again so scripts can determine if we're active without waiting _ping( conn.conn(), true ); conn.done(); sleepsecs( _balancedLastTime ? sleepTime / 6 : sleepTime ); } catch ( std::exception& e ) { log() << "caught exception while doing balance: " << e.what() << endl; // Just to match the opening statement if in log level 1 LOG(1) << "*** End of balancing round" << endl; sleepsecs( sleepTime ); // sleep a fair amount b/c of error continue; } } }
Status enforceLegacyWriteConcern( MultiCommandDispatch* dispatcher, StringData dbName, const BSONObj& options, const HostOpTimeMap& hostOpTimes, vector<LegacyWCResponse>* legacyWCResponses ) { if ( hostOpTimes.empty() ) { return Status::OK(); } for ( HostOpTimeMap::const_iterator it = hostOpTimes.begin(); it != hostOpTimes.end(); ++it ) { const ConnectionString& shardEndpoint = it->first; const HostOpTime hot = it->second; const OpTime& opTime = hot.opTime; const OID& electionId = hot.electionId; LOG( 3 ) << "enforcing write concern " << options << " on " << shardEndpoint.toString() << " at opTime " << opTime.toStringPretty() << " with electionID " << electionId; BSONObj gleCmd = buildGLECmdWithOpTime( options, opTime, electionId ); RawBSONSerializable gleCmdSerial( gleCmd ); dispatcher->addCommand( shardEndpoint, dbName, gleCmdSerial ); } dispatcher->sendAll(); vector<Status> failedStatuses; while ( dispatcher->numPending() > 0 ) { ConnectionString shardEndpoint; RawBSONSerializable gleResponseSerial; Status dispatchStatus = dispatcher->recvAny( &shardEndpoint, &gleResponseSerial ); if ( !dispatchStatus.isOK() ) { // We need to get all responses before returning failedStatuses.push_back( dispatchStatus ); continue; } BSONObj gleResponse = stripNonWCInfo( gleResponseSerial.toBSON() ); // Use the downconversion tools to determine if this GLE response is ok, a // write concern error, or an unknown error we should immediately abort for. GLEErrors errors; Status extractStatus = extractGLEErrors( gleResponse, &errors ); if ( !extractStatus.isOK() ) { failedStatuses.push_back( extractStatus ); continue; } LegacyWCResponse wcResponse; wcResponse.shardHost = shardEndpoint.toString(); wcResponse.gleResponse = gleResponse; if ( errors.wcError.get() ) { wcResponse.errToReport = errors.wcError->getErrMessage(); } legacyWCResponses->push_back( wcResponse ); } if ( failedStatuses.empty() ) { return Status::OK(); } StringBuilder builder; builder << "could not enforce write concern"; for ( vector<Status>::const_iterator it = failedStatuses.begin(); it != failedStatuses.end(); ++it ) { const Status& failedStatus = *it; if ( it == failedStatuses.begin() ) { builder << causedBy( failedStatus.toString() ); } else { builder << ":: and ::" << failedStatus.toString(); } } return Status( failedStatuses.size() == 1u ? failedStatuses.front().code() : ErrorCodes::MultipleErrorsOccurred, builder.str() ); }
DistLockPingInfo DistributedLock::LastPings::getLastPing(const ConnectionString& conn, const string& lockName) { stdx::lock_guard<stdx::mutex> lock(_mutex); return _lastPings[std::make_pair(conn.toString(), lockName)]; }
StatusWith<ShardType> ShardingCatalogManagerImpl::_validateHostAsShard( OperationContext* txn, std::shared_ptr<RemoteCommandTargeter> targeter, const std::string* shardProposedName, const ConnectionString& connectionString) { // Check whether any host in the connection is already part of the cluster. Grid::get(txn)->shardRegistry()->reload(txn); for (const auto& hostAndPort : connectionString.getServers()) { std::shared_ptr<Shard> shard; shard = Grid::get(txn)->shardRegistry()->getShardNoReload(hostAndPort.toString()); if (shard) { return {ErrorCodes::OperationFailed, str::stream() << "'" << hostAndPort.toString() << "' " << "is already a member of the existing shard '" << shard->getConnString().toString() << "' (" << shard->getId() << ")."}; } } // Check for mongos and older version mongod connections, and whether the hosts // can be found for the user specified replset. auto swCommandResponse = _runCommandForAddShard(txn, targeter.get(), "admin", BSON("isMaster" << 1)); if (!swCommandResponse.isOK()) { if (swCommandResponse.getStatus() == ErrorCodes::RPCProtocolNegotiationFailed) { // Mongos to mongos commands are no longer supported in the wire protocol // (because mongos does not support OP_COMMAND), similarly for a new mongos // and an old mongod. So the call will fail in such cases. // TODO: If/When mongos ever supports opCommands, this logic will break because // cmdStatus will be OK. return {ErrorCodes::RPCProtocolNegotiationFailed, str::stream() << targeter->connectionString().toString() << " does not recognize the RPC protocol being used. This is" << " likely because it contains a node that is a mongos or an old" << " version of mongod."}; } else { return swCommandResponse.getStatus(); } } // Check for a command response error auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus); if (!resIsMasterStatus.isOK()) { return {resIsMasterStatus.code(), str::stream() << "Error running isMaster against " << targeter->connectionString().toString() << ": " << causedBy(resIsMasterStatus)}; } auto resIsMaster = std::move(swCommandResponse.getValue().response); // Check whether there is a master. If there isn't, the replica set may not have been // initiated. If the connection is a standalone, it will return true for isMaster. bool isMaster; Status status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster); if (!status.isOK()) { return Status(status.code(), str::stream() << "isMaster returned invalid 'ismaster' " << "field when attempting to add " << connectionString.toString() << " as a shard: " << status.reason()); } if (!isMaster) { return {ErrorCodes::NotMaster, str::stream() << connectionString.toString() << " does not have a master. If this is a replica set, ensure that it has a" << " healthy primary and that the set has been properly initiated."}; } const string providedSetName = connectionString.getSetName(); const string foundSetName = resIsMaster["setName"].str(); // Make sure the specified replica set name (if any) matches the actual shard's replica set if (providedSetName.empty() && !foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host is part of set " << foundSetName << "; " << "use replica set url format " << "<setname>/<server1>,<server2>, ..."}; } if (!providedSetName.empty() && foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host did not return a set name; " << "is the replica set still initializing? " << resIsMaster}; } // Make sure the set name specified in the connection string matches the one where its hosts // belong into if (!providedSetName.empty() && (providedSetName != foundSetName)) { return {ErrorCodes::OperationFailed, str::stream() << "the provided connection string (" << connectionString.toString() << ") does not match the actual set name " << foundSetName}; } // Is it a config server? if (resIsMaster.hasField("configsvr")) { return {ErrorCodes::OperationFailed, str::stream() << "Cannot add " << connectionString.toString() << " as a shard since it is a config server"}; } // If the shard is part of a replica set, make sure all the hosts mentioned in the connection // string are part of the set. It is fine if not all members of the set are mentioned in the // connection string, though. if (!providedSetName.empty()) { std::set<string> hostSet; BSONObjIterator iter(resIsMaster["hosts"].Obj()); while (iter.more()) { hostSet.insert(iter.next().String()); // host:port } if (resIsMaster["passives"].isABSONObj()) { BSONObjIterator piter(resIsMaster["passives"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } if (resIsMaster["arbiters"].isABSONObj()) { BSONObjIterator piter(resIsMaster["arbiters"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } vector<HostAndPort> hosts = connectionString.getServers(); for (size_t i = 0; i < hosts.size(); i++) { const string host = hosts[i].toString(); // host:port if (hostSet.find(host) == hostSet.end()) { return {ErrorCodes::OperationFailed, str::stream() << "in seed list " << connectionString.toString() << ", host " << host << " does not belong to replica set " << foundSetName << "; found " << resIsMaster.toString()}; } } } string actualShardName; if (shardProposedName) { actualShardName = *shardProposedName; } else if (!foundSetName.empty()) { // Default it to the name of the replica set actualShardName = foundSetName; } // Disallow adding shard replica set with name 'config' if (actualShardName == "config") { return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"}; } // Retrieve the most up to date connection string that we know from the replica set monitor (if // this is a replica set shard, otherwise it will be the same value as connectionString). ConnectionString actualShardConnStr = targeter->connectionString(); ShardType shard; shard.setName(actualShardName); shard.setHost(actualShardConnStr.toString()); return shard; }
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) { // name can be NULL, so provide a dummy one here to avoid testing it elsewhere string nameInternal; if ( ! name ) { name = &nameInternal; } ReplicaSetMonitorPtr rsMonitor; // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be // brought into the grid without conflict. vector<string> dbNames; try { ScopedDbConnection newShardConn(servers.toString()); newShardConn->getLastError(); if ( newShardConn->type() == ConnectionString::SYNC ) { newShardConn.done(); errMsg = "can't use sync cluster as a shard. for replica set, have to use <setname>/<server1>,<server2>,..."; return false; } BSONObj resIsMongos; bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos ); // should return ok=0, cmd not found if it's a normal mongod if ( ok ) { errMsg = "can't add a mongos process as a shard"; newShardConn.done(); return false; } BSONObj resIsMaster; ok = newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster ); if ( !ok ) { ostringstream ss; ss << "failed running isMaster: " << resIsMaster; errMsg = ss.str(); newShardConn.done(); return false; } // if the shard has only one host, make sure it is not part of a replica set string setName = resIsMaster["setName"].str(); string commandSetName = servers.getSetName(); if ( commandSetName.empty() && ! setName.empty() ) { ostringstream ss; ss << "host is part of set " << setName << ", use replica set url format <setname>/<server1>,<server2>,...."; errMsg = ss.str(); newShardConn.done(); return false; } if ( !commandSetName.empty() && setName.empty() ) { ostringstream ss; ss << "host did not return a set name, is the replica set still initializing? " << resIsMaster; errMsg = ss.str(); newShardConn.done(); return false; } // if the shard is part of replica set, make sure it is the right one if ( ! commandSetName.empty() && ( commandSetName != setName ) ) { ostringstream ss; ss << "host is part of a different set: " << setName; errMsg = ss.str(); newShardConn.done(); return false; } if( setName.empty() ) { // check this isn't a --configsvr BSONObj res; bool ok = newShardConn->runCommand("admin",BSON("replSetGetStatus"<<1),res); ostringstream ss; if( !ok && res["info"].type() == String && res["info"].String() == "configsvr" ) { errMsg = "the specified mongod is a --configsvr and should thus not be a shard server"; newShardConn.done(); return false; } } // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of // the set. It is fine if not all members of the set are present in 'servers'. bool foundAll = true; string offendingHost; if ( ! commandSetName.empty() ) { set<string> hostSet; BSONObjIterator iter( resIsMaster["hosts"].Obj() ); while ( iter.more() ) { hostSet.insert( iter.next().String() ); // host:port } if ( resIsMaster["passives"].isABSONObj() ) { BSONObjIterator piter( resIsMaster["passives"].Obj() ); while ( piter.more() ) { hostSet.insert( piter.next().String() ); // host:port } } if ( resIsMaster["arbiters"].isABSONObj() ) { BSONObjIterator piter( resIsMaster["arbiters"].Obj() ); while ( piter.more() ) { hostSet.insert( piter.next().String() ); // host:port } } vector<HostAndPort> hosts = servers.getServers(); for ( size_t i = 0 ; i < hosts.size() ; i++ ) { if (!hosts[i].hasPort()) { hosts[i].setPort(ServerGlobalParams::DefaultDBPort); } string host = hosts[i].toString(); // host:port if ( hostSet.find( host ) == hostSet.end() ) { offendingHost = host; foundAll = false; break; } } } if ( ! foundAll ) { ostringstream ss; ss << "in seed list " << servers.toString() << ", host " << offendingHost << " does not belong to replica set " << setName; errMsg = ss.str(); newShardConn.done(); return false; } // shard name defaults to the name of the replica set if ( name->empty() && ! setName.empty() ) *name = setName; // In order to be accepted as a new shard, that mongod must not have any database name that exists already // in any other shards. If that test passes, the new shard's databases are going to be entered as // non-sharded db's whose primary is the newly added shard. BSONObj resListDB; ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB ); if ( !ok ) { ostringstream ss; ss << "failed listing " << servers.toString() << "'s databases:" << resListDB; errMsg = ss.str(); newShardConn.done(); return false; } BSONObjIterator i( resListDB["databases"].Obj() ); while ( i.more() ) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if ( _isSpecialLocalDB( dbName ) ) { // 'local', 'admin', and 'config' are system DBs and should be excluded here continue; } else { dbNames.push_back( dbName ); } } if ( newShardConn->type() == ConnectionString::SET ) rsMonitor = ReplicaSetMonitor::get( setName ); newShardConn.done(); } catch ( DBException& e ) { if ( servers.type() == ConnectionString::SET ) { ReplicaSetMonitor::remove( servers.getSetName() ); } ostringstream ss; ss << "couldn't connect to new shard "; ss << e.what(); errMsg = ss.str(); return false; } // check that none of the existing shard candidate's db's exist elsewhere for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , false ); if ( config.get() != NULL ) { ostringstream ss; ss << "can't add shard " << servers.toString() << " because a local database '" << *it; ss << "' exists in another " << config->getPrimary().toString(); errMsg = ss.str(); return false; } } // if a name for a shard wasn't provided, pick one. if ( name->empty() && ! _getNewShardName( name ) ) { errMsg = "error generating new shard name"; return false; } // build the ConfigDB shard document BSONObjBuilder b; b.append(ShardType::name(), *name); b.append(ShardType::host(), rsMonitor ? rsMonitor->getServerAddress() : servers.toString()); if (maxSize > 0) { b.append(ShardType::maxSize(), maxSize); } BSONObj shardDoc = b.obj(); { ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30); // check whether the set of hosts (or single host) is not an already a known shard BSONObj old = conn->findOne(ShardType::ConfigNS, BSON(ShardType::host(servers.toString()))); if ( ! old.isEmpty() ) { errMsg = "host already used"; conn.done(); return false; } conn.done(); } log() << "going to add shard: " << shardDoc << endl; Status result = clusterInsert( ShardType::ConfigNS, shardDoc, WriteConcernOptions::AllConfigs, NULL ); if ( !result.isOK() ) { errMsg = result.reason(); log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl; return false; } Shard::reloadShardInfo(); // add all databases of the new shard for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , true , *name ); if ( ! config ) { log() << "adding shard " << servers << " even though could not add database " << *it << endl; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", *name); shardDetails.append("host", servers.toString()); configServer.logChange("addShard", "", shardDetails.obj()); return true; }
void DistributedLock::LastPings::setLastPing(const ConnectionString& conn, const string& lockName, const DistLockPingInfo& pd) { stdx::lock_guard<stdx::mutex> lock(_mutex); _lastPings[std::make_pair(conn.toString(), lockName)] = pd; }
/** * Skews the clocks of a remote cluster by a particular amount, specified by * the "skewHosts" element in a BSONObj. */ static void skewClocks( ConnectionString& cluster, BSONObj& cmdObj ) { vector<long long> skew; if(cmdObj.hasField("skewHosts")) { bsonArrToNumVector<long long>(cmdObj["skewHosts"], skew); } else { LOG( logLvl ) << "No host clocks to skew." << endl; return; } LOG( logLvl ) << "Skewing clocks of hosts " << cluster << endl; unsigned s = 0; for(vector<long long>::iterator i = skew.begin(); i != skew.end(); ++i,s++) { ConnectionString server( cluster.getServers()[s] ); ScopedDbConnection conn(server.toString()); BSONObj result; try { bool success = conn->runCommand( string("admin"), BSON( "_skewClockCommand" << 1 << "skew" << *i ), result ); uassert(13678, str::stream() << "Could not communicate with server " << server.toString() << " in cluster " << cluster.toString() << " to change skew by " << *i, success ); LOG( logLvl + 1 ) << " Skewed host " << server << " clock by " << *i << endl; } catch(...) { conn.done(); throw; } conn.done(); } }
Status Strategy::commandOpWrite(const std::string& dbName, const BSONObj& command, BatchItemRef targetingBatchItem, std::vector<CommandResult>* results) { // Note that this implementation will not handle targeting retries and does not completely // emulate write behavior ChunkManagerTargeter targeter(NamespaceString( targetingBatchItem.getRequest()->getTargetingNS())); Status status = targeter.init(); if (!status.isOK()) return status; OwnedPointerVector<ShardEndpoint> endpointsOwned; vector<ShardEndpoint*>& endpoints = endpointsOwned.mutableVector(); if (targetingBatchItem.getOpType() == BatchedCommandRequest::BatchType_Insert) { ShardEndpoint* endpoint; Status status = targeter.targetInsert(targetingBatchItem.getDocument(), &endpoint); if (!status.isOK()) return status; endpoints.push_back(endpoint); } else if (targetingBatchItem.getOpType() == BatchedCommandRequest::BatchType_Update) { Status status = targeter.targetUpdate(*targetingBatchItem.getUpdate(), &endpoints); if (!status.isOK()) return status; } else { invariant(targetingBatchItem.getOpType() == BatchedCommandRequest::BatchType_Delete); Status status = targeter.targetDelete(*targetingBatchItem.getDelete(), &endpoints); if (!status.isOK()) return status; } DBClientShardResolver resolver; DBClientMultiCommand dispatcher; // Assemble requests for (vector<ShardEndpoint*>::const_iterator it = endpoints.begin(); it != endpoints.end(); ++it) { const ShardEndpoint* endpoint = *it; ConnectionString host; Status status = resolver.chooseWriteHost(endpoint->shardName, &host); if (!status.isOK()) return status; RawBSONSerializable request(command); dispatcher.addCommand(host, dbName, request); } // Errors reported when recv'ing responses dispatcher.sendAll(); Status dispatchStatus = Status::OK(); // Recv responses while (dispatcher.numPending() > 0) { ConnectionString host; RawBSONSerializable response; Status status = dispatcher.recvAny(&host, &response); if (!status.isOK()) { // We always need to recv() all the sent operations dispatchStatus = status; continue; } CommandResult result; result.target = host; result.shardTarget = Shard::make(host.toString()); result.result = response.toBSON(); results->push_back(result); } return dispatchStatus; }