Status update(const std::string&, const ParserConfig&) override { return Status(0); }
void DatabaseCloner::_listCollectionsCallback(const StatusWith<Fetcher::QueryResponse>& result, Fetcher::NextAction* nextAction, BSONObjBuilder* getMoreBob) { if (!result.isOK()) { _finishCallback(result.getStatus()); return; } auto batchData(result.getValue()); auto&& documents = batchData.documents; // We may be called with multiple batches leading to a need to grow _collectionInfos. _collectionInfos.reserve(_collectionInfos.size() + documents.size()); std::copy_if(documents.begin(), documents.end(), std::back_inserter(_collectionInfos), _listCollectionsPredicate); // The fetcher will continue to call with kGetMore until an error or the last batch. if (*nextAction == Fetcher::NextAction::kGetMore) { invariant(getMoreBob); getMoreBob->append("getMore", batchData.cursorId); getMoreBob->append("collection", batchData.nss.coll()); return; } // Nothing to do for an empty database. if (_collectionInfos.empty()) { _finishCallback(Status::OK()); return; } _collectionNamespaces.reserve(_collectionInfos.size()); std::set<std::string> seen; for (auto&& info : _collectionInfos) { BSONElement nameElement = info.getField(kNameFieldName); if (nameElement.eoo()) { _finishCallback(Status(ErrorCodes::FailedToParse, str::stream() << "collection info must contain '" << kNameFieldName << "' " << "field : " << info)); return; } if (nameElement.type() != mongo::String) { _finishCallback(Status(ErrorCodes::TypeMismatch, str::stream() << "'" << kNameFieldName << "' field must be a string: " << info)); return; } const std::string collectionName = nameElement.String(); if (seen.find(collectionName) != seen.end()) { _finishCallback(Status(ErrorCodes::DuplicateKey, str::stream() << "collection info contains duplicate collection name " << "'" << collectionName << "': " << info)); return; } BSONElement optionsElement = info.getField(kOptionsFieldName); if (optionsElement.eoo()) { _finishCallback(Status(ErrorCodes::FailedToParse, str::stream() << "collection info must contain '" << kOptionsFieldName << "' " << "field : " << info)); return; } if (!optionsElement.isABSONObj()) { _finishCallback(Status(ErrorCodes::TypeMismatch, str::stream() << "'" << kOptionsFieldName << "' field must be an object: " << info)); return; } const BSONObj optionsObj = optionsElement.Obj(); CollectionOptions options; Status parseStatus = options.parse(optionsObj); if (!parseStatus.isOK()) { _finishCallback(parseStatus); return; } seen.insert(collectionName); _collectionNamespaces.emplace_back(_dbname, collectionName); auto&& nss = *_collectionNamespaces.crbegin(); try { _collectionCloners.emplace_back( _executor, _source, nss, options, stdx::bind( &DatabaseCloner::_collectionClonerCallback, this, stdx::placeholders::_1, nss), _storageInterface); } catch (const UserException& ex) { _finishCallback(ex.toStatus()); return; } } for (auto&& collectionCloner : _collectionCloners) { collectionCloner.setScheduleDbWorkFn(_scheduleDbWorkFn); } // Start first collection cloner. _currentCollectionClonerIter = _collectionCloners.begin(); LOG(1) << " cloning collection " << _currentCollectionClonerIter->getSourceNamespace(); Status startStatus = _startCollectionCloner(*_currentCollectionClonerIter); if (!startStatus.isOK()) { LOG(1) << " failed to start collection cloning on " << _currentCollectionClonerIter->getSourceNamespace() << ": " << startStatus; _finishCallback(startStatus); return; } }
Status ModifierPull::prepare(mb::Element root, StringData matchedField, ExecInfo* execInfo) { _preparedState.reset(new PreparedState(root.getDocument())); // If we have a $-positional field, it is time to bind it to an actual field part. if (_posDollar) { if (matchedField.empty()) { return Status(ErrorCodes::BadValue, str::stream() << "The positional operator did not find the match " "needed from the query. Unexpanded update: " << _fieldRef.dottedField()); } _fieldRef.setPart(_posDollar, matchedField); } // Locate the field name in 'root'. Status status = pathsupport::findLongestPrefix( _fieldRef, root, &_preparedState->idxFound, &_preparedState->elemFound); // FindLongestPrefix may say the path does not exist at all, which is fine here, or // that the path was not viable or otherwise wrong, in which case, the mod cannot // proceed. if (status.code() == ErrorCodes::NonExistentPath) { _preparedState->elemFound = root.getDocument().end(); } else if (!status.isOK()) { return status; } // We register interest in the field name. The driver needs this info to sort out if // there is any conflict among mods. execInfo->fieldRef[0] = &_fieldRef; if (!_preparedState->elemFound.ok() || _preparedState->idxFound < (_fieldRef.numParts() - 1)) { // If no target element exists, then there is nothing to do here. _preparedState->noOp = execInfo->noOp = true; return Status::OK(); } // This operation only applies to arrays if (_preparedState->elemFound.getType() != mongo::Array) return Status(ErrorCodes::BadValue, "Cannot apply $pull to a non-array value"); // If the array is empty, there is nothing to pull, so this is a noop. if (!_preparedState->elemFound.hasChildren()) { _preparedState->noOp = execInfo->noOp = true; return Status::OK(); } // Walk the values in the array mb::Element cursor = _preparedState->elemFound.leftChild(); while (cursor.ok()) { if (isMatch(cursor)) _preparedState->elementsToRemove.push_back(cursor); cursor = cursor.rightSibling(); } // If we didn't find any elements to add, then this is a no-op, and therefore in place. if (_preparedState->elementsToRemove.empty()) { _preparedState->noOp = execInfo->noOp = true; } return Status::OK(); }
StatusWith<HostAndPort> RemoteCommandTargeterRS::findHost( const ReadPreferenceSetting& readPref) { invariant(false); return Status(ErrorCodes::IllegalOperation, "Not yet implemented"); }
Status resolveFilePattern(const fs::path& fs_path, std::vector<std::string>& results, GlobLimits setting) { genGlobs(fs_path.string(), results, setting); return Status(0, "OK"); }
StatusWith<BalancerSettingsType> BalancerSettingsType::fromBSON(const BSONObj& obj) { BalancerSettingsType settings; { bool stopped; Status status = bsonExtractBooleanFieldWithDefault(obj, kStopped, false, &stopped); if (!status.isOK()) return status; if (stopped) { settings._mode = kOff; } else { std::string modeStr; status = bsonExtractStringFieldWithDefault(obj, kMode, kBalancerModes[kFull], &modeStr); if (!status.isOK()) return status; auto it = std::find(std::begin(kBalancerModes), std::end(kBalancerModes), modeStr); if (it == std::end(kBalancerModes)) { return Status(ErrorCodes::BadValue, "Invalid balancer mode"); } settings._mode = static_cast<BalancerMode>(it - std::begin(kBalancerModes)); } } { BSONElement activeWindowElem; Status status = bsonExtractTypedField(obj, kActiveWindow, Object, &activeWindowElem); if (status.isOK()) { const BSONObj balancingWindowObj = activeWindowElem.Obj(); if (balancingWindowObj.isEmpty()) { return Status(ErrorCodes::BadValue, "activeWindow not specified"); } // Check if both 'start' and 'stop' are present const std::string start = balancingWindowObj.getField("start").str(); const std::string stop = balancingWindowObj.getField("stop").str(); if (start.empty() || stop.empty()) { return Status(ErrorCodes::BadValue, str::stream() << "must specify both start and stop of balancing window: " << balancingWindowObj); } // Check that both 'start' and 'stop' are valid time-of-day boost::posix_time::ptime startTime; boost::posix_time::ptime stopTime; if (!toPointInTime(start, &startTime) || !toPointInTime(stop, &stopTime)) { return Status(ErrorCodes::BadValue, str::stream() << kActiveWindow << " format is " << " { start: \"hh:mm\" , stop: \"hh:mm\" }"); } // Check that start and stop designate different time points if (startTime == stopTime) { return Status(ErrorCodes::BadValue, str::stream() << "start and stop times must be different"); } settings._activeWindowStart = startTime; settings._activeWindowStop = stopTime; } else if (status != ErrorCodes::NoSuchKey) { return status; } } { auto secondaryThrottleStatus = MigrationSecondaryThrottleOptions::createFromBalancerConfig(obj); if (!secondaryThrottleStatus.isOK()) { return secondaryThrottleStatus.getStatus(); } settings._secondaryThrottle = std::move(secondaryThrottleStatus.getValue()); } { bool waitForDelete; Status status = bsonExtractBooleanFieldWithDefault(obj, kWaitForDelete, false, &waitForDelete); if (!status.isOK()) return status; settings._waitForDelete = waitForDelete; } return settings; }
Status s = dbCloner ? dbCloner->start() : Status(ErrorCodes::UnknownError, "Bad!"); if (!s.isOK()) { std::string err = str::stream() << "could not create cloner for database: " << name << " due to: " << s.toString(); _setStatus(Status(ErrorCodes::InitialSyncFailure, err)); error() << err; break; // exit for_each loop } // add cloner to list. _databaseCloners.push_back(dbCloner); } } else { _setStatus(Status(ErrorCodes::InitialSyncFailure, "failed to clone databases due to failed server response.")); } // Move on to the next steps in the process. _doNextActions(); } void DatabasesCloner::_onEachDBCloneFinish(const Status& status, const std::string name) { auto clonersLeft = --_clonersActive; if (status.isOK()) { log() << "database clone finished: " << name; } else { log() << "database clone failed due to " << status.toString(); _setStatus(status); }
Status MetadataLoader::initCollection( const string& ns, const string& shard, CollectionMetadata* metadata ) const { // // Bring collection entry from the config server. // BSONObj collDoc; { try { ScopedDbConnection conn( _configLoc.toString(), 30 ); collDoc = conn->findOne( CollectionType::ConfigNS, QUERY(CollectionType::ns()<<ns)); conn.done(); } catch ( const DBException& e ) { string errMsg = str::stream() << "could not query collection metadata" << causedBy( e ); // We deliberately do not return conn to the pool, since it was involved // with the error here. return Status( ErrorCodes::HostUnreachable, errMsg ); } } string errMsg; if ( collDoc.isEmpty() ) { errMsg = str::stream() << "could not load metadata, collection " << ns << " not found"; warning() << errMsg << endl; return Status( ErrorCodes::NamespaceNotFound, errMsg ); } CollectionType collInfo; if ( !collInfo.parseBSON( collDoc, &errMsg ) || !collInfo.isValid( &errMsg ) ) { errMsg = str::stream() << "could not parse metadata for collection " << ns << causedBy( errMsg ); warning() << errMsg << endl; return Status( ErrorCodes::FailedToParse, errMsg ); } log() << "Collection Info: " << collInfo << endl; if ( collInfo.isDroppedSet() && collInfo.getDropped() ) { errMsg = str::stream() << "could not load metadata, collection " << ns << " was dropped"; warning() << errMsg << endl; return Status( ErrorCodes::NamespaceNotFound, errMsg ); } if ( collInfo.isKeyPatternSet() && !collInfo.getKeyPattern().isEmpty() ) { // Sharded collection, need to load chunks metadata->_keyPattern = collInfo.getKeyPattern(); metadata->_shardVersion = ChunkVersion( 0, 0, collInfo.getEpoch() ); metadata->_collVersion = ChunkVersion( 0, 0, collInfo.getEpoch() ); metadata->_linkedNS = collInfo.getLinked(); return Status::OK(); } else if ( collInfo.isPrimarySet() && collInfo.getPrimary() == shard ) { // A collection with a non-default primary // Empty primary field not allowed if set dassert( collInfo.getPrimary() != "" ); metadata->_keyPattern = BSONObj(); metadata->_shardVersion = ChunkVersion( 1, 0, collInfo.getEpoch() ); metadata->_collVersion = metadata->_shardVersion; return Status::OK(); } else { // A collection with a primary that doesn't match this shard or is empty, the primary // may have changed before we loaded. errMsg = // br str::stream() << "collection " << ns << " does not have a shard key " << "and primary " << ( collInfo.isPrimarySet() ? collInfo.getPrimary() : "" ) << " does not match this shard " << shard; warning() << errMsg << endl; metadata->_collVersion = ChunkVersion( 0, 0, OID() ); return Status( ErrorCodes::RemoteChangeDetected, errMsg ); } }
Status MetadataLoader::initChunks( const string& ns, const string& shard, const CollectionMetadata* oldMetadata, CollectionMetadata* metadata ) const { map<string, ChunkVersion> versionMap; // Preserve the epoch versionMap[shard] = metadata->_shardVersion; OID epoch = metadata->getCollVersion().epoch(); bool fullReload = true; // Check to see if we should use the old version or not. if ( oldMetadata ) { // If our epochs are compatible, it's useful to use the old metadata for diffs if ( oldMetadata->getCollVersion().hasCompatibleEpoch( epoch ) ) { fullReload = false; dassert( oldMetadata->isValid() ); versionMap[shard] = oldMetadata->_shardVersion; metadata->_collVersion = oldMetadata->_collVersion; // TODO: This could be made more efficient if copying not required, but // not as frequently reloaded as in mongos. metadata->_chunksMap = oldMetadata->_chunksMap; LOG( 2 ) << "loading new chunks for collection " << ns << " using old metadata w/ version " << oldMetadata->getShardVersion() << " and " << metadata->_chunksMap.size() << " chunks" << endl; } else { warning() << "reloading collection metadata for " << ns << " with new epoch " << epoch.toString() << ", the current epoch is " << oldMetadata->getCollVersion().epoch().toString() << endl; } } // Exposes the new metadata's range map and version to the "differ," who // would ultimately be responsible of filling them up. SCMConfigDiffTracker differ( shard ); log() << "Metadata attach" << endl; differ.attach( ns, metadata->_linkedNS, metadata->_chunksMap, metadata->_collVersion, versionMap ); try { ScopedDbConnection conn( _configLoc.toString(), 30 ); auto_ptr<DBClientCursor> cursor = conn->query( ChunkType::ConfigNS, differ.configDiffQuery() ); if ( !cursor.get() ) { // Make our metadata invalid metadata->_collVersion = ChunkVersion( 0, 0, OID() ); metadata->_chunksMap.clear(); conn.done(); return Status( ErrorCodes::HostUnreachable, "problem opening chunk metadata cursor" ); } // // The diff tracker should always find at least one chunk (the highest chunk we saw // last time). If not, something has changed on the config server (potentially between // when we read the collection data and when we read the chunks data). // int diffsApplied = differ.calculateConfigDiff( *cursor ); if ( diffsApplied > 0 ) { // Chunks found, return ok LOG(2) << "loaded " << diffsApplied << " chunks into new metadata for " << ns << " with version " << metadata->_collVersion << endl; metadata->_shardVersion = versionMap[shard]; metadata->fillRanges(); conn.done(); dassert( metadata->isValid() ); return Status::OK(); } else if ( diffsApplied == 0 ) { // No chunks found, the collection is dropping or we're confused // If this is a full reload, assume it is a drop for backwards compatibility // TODO: drop the config.collections entry *before* the chunks and eliminate this // ambiguity string errMsg = str::stream() << "no chunks found when reloading " << ns << ", previous version was " << metadata->_collVersion.toString() << ( fullReload ? ", this is a drop" : "" ); warning() << errMsg << endl; metadata->_collVersion = ChunkVersion( 0, 0, OID() ); metadata->_chunksMap.clear(); conn.done(); return fullReload ? Status( ErrorCodes::NamespaceNotFound, errMsg ) : Status( ErrorCodes::RemoteChangeDetected, errMsg ); } else { // Invalid chunks found, our epoch may have changed because we dropped/recreated // the collection. string errMsg = // br str::stream() << "invalid chunks found when reloading " << ns << ", previous version was " << metadata->_collVersion.toString() << ", this should be rare"; warning() << errMsg << endl; metadata->_collVersion = ChunkVersion( 0, 0, OID() ); metadata->_chunksMap.clear(); conn.done(); return Status( ErrorCodes::RemoteChangeDetected, errMsg ); } } catch ( const DBException& e ) { string errMsg = str::stream() << "problem querying chunks metadata" << causedBy( e ); // We deliberately do not return connPtr to the pool, since it was involved // with the error here. return Status( ErrorCodes::HostUnreachable, errMsg ); } }
Status init() { callback_count_ = 0; return Status(0, "OK"); }
Status SimpleCallback(const INotifyEventContextRef& ec, const void* user_data) { callback_count_ += 1; return Status(0, "OK"); }
Status DatabasePlugin::call(const PluginRequest& request, PluginResponse& response) { if (request.count("action") == 0) { return Status(1, "Database plugin must include a request action"); } // Get a domain/key, which are used for most database plugin actions. auto domain = (request.count("domain") > 0) ? request.at("domain") : ""; auto key = (request.count("key") > 0) ? request.at("key") : ""; if (request.at("action") == "reset") { WriteLock lock(kDatabaseReset); DatabasePlugin::kDBInitialized = false; // Prevent RocksDB reentrancy by logger plugins during plugin setup. VLOG(1) << "Resetting the database plugin: " << getName(); auto status = this->reset(); if (!status.ok()) { // The active database could not be reset, fallback to an ephemeral. Registry::get().setActive("database", "ephemeral"); LOG(WARNING) << "Unable to reset database plugin: " << getName(); } DatabasePlugin::kDBInitialized = true; return status; } // Switch over the possible database plugin actions. ReadLock lock(kDatabaseReset); if (request.at("action") == "get") { std::string value; auto status = this->get(domain, key, value); response.push_back({{"v", value}}); return status; } else if (request.at("action") == "put") { if (request.count("value") == 0) { return Status(1, "Database plugin put action requires a value"); } return this->put(domain, key, request.at("value")); } else if (request.at("action") == "remove") { return this->remove(domain, key); } else if (request.at("action") == "remove_range") { auto key_high = (request.count("high") > 0) ? request.at("key_high") : ""; if (!key_high.empty() && !key.empty()) { return this->removeRange(domain, key, key_high); } return Status(1, "Missing range"); } else if (request.at("action") == "scan") { // Accumulate scanned keys into a vector. std::vector<std::string> keys; // Optionally allow the caller to request a max number of keys. size_t max = 0; if (request.count("max") > 0) { max = std::stoul(request.at("max")); } auto status = this->scan(domain, keys, request.at("prefix"), max); for (const auto& k : keys) { response.push_back({{"k", k}}); } return status; } return Status(1, "Unknown database plugin action"); }
Status Database::dropCollection( OperationContext* txn, const StringData& fullns ) { LOG(1) << "dropCollection: " << fullns << endl; massertNamespaceNotIndex( fullns, "dropCollection" ); Collection* collection = getCollection( txn, fullns ); if ( !collection ) { // collection doesn't exist return Status::OK(); } { NamespaceString s( fullns ); verify( s.db() == _name ); if( s.isSystem() ) { if( s.coll() == "system.profile" ) { if ( _profile != 0 ) return Status( ErrorCodes::IllegalOperation, "turn off profiling before dropping system.profile collection" ); } else { return Status( ErrorCodes::IllegalOperation, "can't drop system ns" ); } } } BackgroundOperation::assertNoBgOpInProgForNs( fullns ); audit::logDropCollection( currentClient.get(), fullns ); try { Status s = collection->getIndexCatalog()->dropAllIndexes(txn, true); if ( !s.isOK() ) { warning() << "could not drop collection, trying to drop indexes" << fullns << " because of " << s.toString(); return s; } } catch( DBException& e ) { stringstream ss; ss << "drop: dropIndexes for collection failed. cause: " << e.what(); ss << ". See http://dochub.mongodb.org/core/data-recovery"; warning() << ss.str() << endl; return Status( ErrorCodes::InternalError, ss.str() ); } verify( collection->_details->getTotalIndexCount() == 0 ); LOG(1) << "\t dropIndexes done" << endl; Top::global.collectionDropped( fullns ); Status s = _dbEntry->dropCollection( txn, fullns ); _clearCollectionCache( fullns ); // we want to do this always if ( !s.isOK() ) return s; DEV { // check all index collection entries are gone string nstocheck = fullns.toString() + ".$"; scoped_lock lk( _collectionLock ); for ( CollectionMap::const_iterator i = _collections.begin(); i != _collections.end(); ++i ) { string temp = i->first; if ( temp.find( nstocheck ) != 0 ) continue; log() << "after drop, bad cache entries for: " << fullns << " have " << temp; verify(0); } } return Status::OK(); }
void BatchWriteExec::executeBatch( const BatchedCommandRequest& clientRequest, BatchedCommandResponse* clientResponse ) { LOG( 4 ) << "starting execution of write batch of size " << static_cast<int>( clientRequest.sizeWriteOps() ) << " for " << clientRequest.getNS() << endl; BatchWriteOp batchOp; batchOp.initClientRequest( &clientRequest ); // Current batch status bool refreshedTargeter = false; int rounds = 0; int numCompletedOps = 0; int numRoundsWithoutProgress = 0; while ( !batchOp.isFinished() ) { // // Get child batches to send using the targeter // // Targeting errors can be caused by remote metadata changing (the collection could have // been dropped and recreated, for example with a new shard key). If a remote metadata // change occurs *before* a client sends us a batch, we need to make sure that we don't // error out just because we're staler than the client - otherwise mongos will be have // unpredictable behavior. // // (If a metadata change happens *during* or *after* a client sends us a batch, however, // we make no guarantees about delivery.) // // For this reason, we don't record targeting errors until we've refreshed our targeting // metadata at least once *after* receiving the client batch - at that point, we know: // // 1) our new metadata is the same as the metadata when the client sent a batch, and so // targeting errors are real. // OR // 2) our new metadata is a newer version than when the client sent a batch, and so // the metadata must have changed after the client batch was sent. We don't need to // deliver in this case, since for all the client knows we may have gotten the batch // exactly when the metadata changed. // OwnedPointerVector<TargetedWriteBatch> childBatchesOwned; vector<TargetedWriteBatch*>& childBatches = childBatchesOwned.mutableVector(); // If we've already had a targeting error, we've refreshed the metadata once and can // record target errors definitively. bool recordTargetErrors = refreshedTargeter; Status targetStatus = batchOp.targetBatch( *_targeter, recordTargetErrors, &childBatches ); if ( !targetStatus.isOK() ) { // Don't do anything until a targeter refresh _targeter->noteCouldNotTarget(); refreshedTargeter = true; ++_stats->numTargetErrors; dassert( childBatches.size() == 0u ); } // // Send all child batches // size_t numSent = 0; size_t numToSend = childBatches.size(); bool remoteMetadataChanging = false; while ( numSent != numToSend ) { // Collect batches out on the network, mapped by endpoint OwnedHostBatchMap ownedPendingBatches; OwnedHostBatchMap::MapType& pendingBatches = ownedPendingBatches.mutableMap(); // // Send side // // Get as many batches as we can at once for ( vector<TargetedWriteBatch*>::iterator it = childBatches.begin(); it != childBatches.end(); ++it ) { // // Collect the info needed to dispatch our targeted batch // TargetedWriteBatch* nextBatch = *it; // If the batch is NULL, we sent it previously, so skip if ( nextBatch == NULL ) continue; // Figure out what host we need to dispatch our targeted batch ConnectionString shardHost; Status resolveStatus = _resolver->chooseWriteHost( nextBatch->getEndpoint() .shardName, &shardHost ); if ( !resolveStatus.isOK() ) { ++_stats->numResolveErrors; // Record a resolve failure // TODO: It may be necessary to refresh the cache if stale, or maybe just // cancel and retarget the batch WriteErrorDetail error; buildErrorFrom( resolveStatus, &error ); LOG( 4 ) << "unable to send write batch to " << shardHost.toString() << causedBy( resolveStatus.toString() ) << endl; batchOp.noteBatchError( *nextBatch, error ); // We're done with this batch // Clean up when we can't resolve a host delete *it; *it = NULL; --numToSend; continue; } // If we already have a batch for this host, wait until the next time OwnedHostBatchMap::MapType::iterator pendingIt = pendingBatches.find( shardHost ); if ( pendingIt != pendingBatches.end() ) continue; // // We now have all the info needed to dispatch the batch // BatchedCommandRequest request( clientRequest.getBatchType() ); batchOp.buildBatchRequest( *nextBatch, &request ); // Internally we use full namespaces for request/response, but we send the // command to a database with the collection name in the request. NamespaceString nss( request.getNS() ); request.setNS( nss.coll() ); LOG( 4 ) << "sending write batch to " << shardHost.toString() << ": " << request.toString() << endl; _dispatcher->addCommand( shardHost, nss.db(), request ); // Indicate we're done by setting the batch to NULL // We'll only get duplicate hostEndpoints if we have broadcast and non-broadcast // endpoints for the same host, so this should be pretty efficient without // moving stuff around. *it = NULL; // Recv-side is responsible for cleaning up the nextBatch when used pendingBatches.insert( make_pair( shardHost, nextBatch ) ); } // Send them all out _dispatcher->sendAll(); numSent += pendingBatches.size(); // // Recv side // while ( _dispatcher->numPending() > 0 ) { // Get the response ConnectionString shardHost; BatchedCommandResponse response; Status dispatchStatus = _dispatcher->recvAny( &shardHost, &response ); // Get the TargetedWriteBatch to find where to put the response dassert( pendingBatches.find( shardHost ) != pendingBatches.end() ); TargetedWriteBatch* batch = pendingBatches.find( shardHost )->second; if ( dispatchStatus.isOK() ) { TrackedErrors trackedErrors; trackedErrors.startTracking( ErrorCodes::StaleShardVersion ); LOG( 4 ) << "write results received from " << shardHost.toString() << ": " << response.toString() << endl; // Dispatch was ok, note response batchOp.noteBatchResponse( *batch, response, &trackedErrors ); // Note if anything was stale const vector<ShardError*>& staleErrors = trackedErrors.getErrors( ErrorCodes::StaleShardVersion ); if ( staleErrors.size() > 0 ) { noteStaleResponses( staleErrors, _targeter ); ++_stats->numStaleBatches; } // Remember if the shard is actively changing metadata right now if ( isShardMetadataChanging( staleErrors ) ) { remoteMetadataChanging = true; } // Remember that we successfully wrote to this shard // NOTE: This will record lastOps for shards where we actually didn't update // or delete any documents, which preserves old behavior but is conservative _stats->noteWriteAt( shardHost, response.isLastOpSet() ? response.getLastOp() : OpTime(), response.isElectionIdSet() ? response.getElectionId() : OID()); } else { // Error occurred dispatching, note it stringstream msg; msg << "write results unavailable from " << shardHost.toString() << causedBy( dispatchStatus.toString() ); WriteErrorDetail error; buildErrorFrom( Status( ErrorCodes::RemoteResultsUnavailable, msg.str() ), &error ); LOG( 4 ) << "unable to receive write results from " << shardHost.toString() << causedBy( dispatchStatus.toString() ) << endl; batchOp.noteBatchError( *batch, error ); } } } ++rounds; ++_stats->numRounds; // If we're done, get out if ( batchOp.isFinished() ) break; // MORE WORK TO DO // // Refresh the targeter if we need to (no-op if nothing stale) // bool targeterChanged = false; Status refreshStatus = _targeter->refreshIfNeeded( &targeterChanged ); if ( !refreshStatus.isOK() ) { // It's okay if we can't refresh, we'll just record errors for the ops if // needed. warning() << "could not refresh targeter" << causedBy( refreshStatus.reason() ) << endl; } // // Ensure progress is being made toward completing the batch op // int currCompletedOps = batchOp.numWriteOpsIn( WriteOpState_Completed ); if ( currCompletedOps == numCompletedOps && !targeterChanged && !remoteMetadataChanging ) { ++numRoundsWithoutProgress; } else { numRoundsWithoutProgress = 0; } numCompletedOps = currCompletedOps; if ( numRoundsWithoutProgress > kMaxRoundsWithoutProgress ) { stringstream msg; msg << "no progress was made executing batch write op in " << clientRequest.getNS() << " after " << kMaxRoundsWithoutProgress << " rounds (" << numCompletedOps << " ops completed in " << rounds << " rounds total)"; WriteErrorDetail error; buildErrorFrom( Status( ErrorCodes::NoProgressMade, msg.str() ), &error ); batchOp.abortBatch( error ); break; } } batchOp.buildClientResponse( clientResponse ); LOG( 4 ) << "finished execution of write batch" << ( clientResponse->isErrDetailsSet() ? " with write errors" : "") << ( clientResponse->isErrDetailsSet() && clientResponse->isWriteConcernErrorSet() ? " and" : "" ) << ( clientResponse->isWriteConcernErrorSet() ? " with write concern error" : "" ) << " for " << clientRequest.getNS() << endl; }
StatusWith<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>> IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, CollectionUUID collectionUUID, const std::vector<BSONObj>& specs, const UUID& buildUUID) { std::vector<std::string> indexNames; for (auto& spec : specs) { std::string name = spec.getStringField(IndexDescriptor::kIndexNameFieldName); if (name.empty()) { return Status( ErrorCodes::CannotCreateIndex, str::stream() << "Cannot create an index for a spec '" << spec << "' without a non-empty string value for the 'name' field"); } indexNames.push_back(name); } auto nss = UUIDCatalog::get(opCtx).lookupNSSByUUID(collectionUUID); auto dbName = nss.db().toString(); auto replIndexBuildState = std::make_shared<ReplIndexBuildState>(buildUUID, collectionUUID, dbName, indexNames, specs); Status status = _registerIndexBuild(opCtx, replIndexBuildState); if (!status.isOK()) { return status; } // Run index build in-line if we are transitioning between replication modes. // While the RSTLExclusive is being held, the async thread in the thread pool is not allowed // to take locks. if (opCtx->lockState()->isRSTLExclusive()) { log() << "Running index build on current thread because we are transitioning between " "replication states: " << buildUUID; // Sets up and runs the index build. Sets result and cleans up index build. _runIndexBuild(opCtx, buildUUID); return replIndexBuildState->sharedPromise.getFuture(); } // Task in thread pool should retain the caller's deadline. auto deadline = opCtx->getDeadline(); auto timeoutError = opCtx->getTimeoutError(); // Task in thread pool should have similar CurOp representation to the caller so that it can be // identified as a createIndexes operation. BSONObj opDesc; { stdx::unique_lock<Client> lk(*opCtx->getClient()); auto curOp = CurOp::get(opCtx); opDesc = curOp->opDescription().getOwned(); } status = _threadPool.schedule([ this, buildUUID, deadline, timeoutError, opDesc ]() noexcept { auto opCtx = Client::getCurrent()->makeOperationContext(); opCtx->setDeadlineByDate(deadline, timeoutError); { stdx::unique_lock<Client> lk(*opCtx->getClient()); auto curOp = CurOp::get(opCtx.get()); curOp->setOpDescription_inlock(opDesc); } // Sets up and runs the index build. Sets result and cleans up index build. _runIndexBuild(opCtx.get(), buildUUID); }); // Clean up the index build if we failed to schedule it. if (!status.isOK()) { stdx::unique_lock<stdx::mutex> lk(_mutex); // Unregister the index build before setting the promises, so callers do not see the build // again. _unregisterIndexBuild(lk, opCtx, replIndexBuildState); // Set the promise in case another thread already joined the index build. replIndexBuildState->sharedPromise.setError(status); return status; } return replIndexBuildState->sharedPromise.getFuture(); }
Status SimpleRecordStoreV1::truncate(OperationContext* txn) { return Status( ErrorCodes::InternalError, "SimpleRecordStoreV1::truncate not implemented" ); }
bool cSmartCardNagra::Init(void) { block=0; isTiger=isT14Nagra=isN3=swapCW=false; caid=SYSTEM_NAGRA; ResetIdSet(); static const unsigned char atrDNASP[] = { 'D','N','A','S','P' }; static const unsigned char atrTIGER[] = { 'T','I','G','E','R' }; static const unsigned char atrNCMED[] = { 'N','C','M','E','D' }; static const unsigned char atrIRDET[] = { 'I','R','D','E','T','O' }; if(!memcmp(atr->hist,atrDNASP,sizeof(atrDNASP))) { if(atr->hist[5]=='2' && atr->hist[6]=='4') isN3=true; PRINTF(L_SC_INIT,"detected native T1 nagra card (N%d Mode)",isN3?3:2); if(!SetIFS(0xFE)) return false; memcpy(rominfo,atr->hist,sizeof(rominfo)); } else if(!memcmp(atr->hist,atrTIGER,sizeof(atrTIGER)) || !memcmp(atr->hist,atrNCMED,sizeof(atrNCMED))) { PRINTF(L_SC_INIT,"detected nagra tiger card"); if(!SetIFS(0xFE)) return false; memcpy(rominfo,atr->hist,sizeof(rominfo)); cardId=0xFFFFFFFF; isTiger=true; } else if(!memcmp(atr->hist,atrIRDET,sizeof(atrIRDET))) { PRINTF(L_SC_INIT,"detected tunneled T14 nagra card"); if(!allowT14) { PRINTF(L_SC_INIT,"Nagra mode for T14 card disabled in setup"); return false; } PRINTF(L_SC_INIT,"using nagra mode"); isT14Nagra=true; if(!DoBlkCmd(0x10,0x02,0x90,0x11)) { PRINTF(L_SC_ERROR,"get rom version failed"); return false; } memcpy(rominfo,&buff[2],15); } else { PRINTF(L_SC_INIT,"doesn't look like a nagra card"); return false; } infoStr.Begin(); infoStr.Strcat("Nagra smartcard\n"); char rom[12], rev[12]; snprintf(rom,sizeof(rom),"%c%c%c%c%c%c%c%c",rominfo[0],rominfo[1],rominfo[2],rominfo[3],rominfo[4],rominfo[5],rominfo[6],rominfo[7]); snprintf(rev,sizeof(rev),"%c%c%c%c%c%c",rominfo[9],rominfo[10],rominfo[11],rominfo[12],rominfo[13],rominfo[14]); PRINTF(L_SC_INIT,"rom version: %s revision: %s",rom,rev); infoStr.Printf("Rom %s Rev %s\n",rom,rev); if(!isTiger) { GetCardStatus(); if(!DoBlkCmd(0x12,0x02,0x92,0x06) || !Status()) return false; cardId=UINT32_BE(buff+5); SetCard(new cCardNagra2(buff+5)); if(!GetDataType(DT01,0x0E)) return false; GetCardStatus(); if(!GetDataType(IRDINFO,0x39)) return false; GetCardStatus(); if(!GetDataType(CAMDATA,0x55)) return false; GetCardStatus(); if(!GetDataType(DT04,0x44)) return false; GetCardStatus(); if(memcmp(rominfo+5,"181",3)!=0) { // not working on ROM181 infoStr.Printf("Tiers\n"); infoStr.Printf("|id |chid| dates |\n"); infoStr.Printf("+----+----+---------------------+\n"); if(!GetDataType(TIERS,0x57)) return false; GetCardStatus(); } if(!GetDataType(DT06,0x16)) return false; GetCardStatus(); } if(!HasCamMod()) { cSmartCardDataNagra cd(cardId,false); cSmartCardDataNagra *entry=(cSmartCardDataNagra *)smartcards.FindCardData(&cd); if(entry) { SetCardData(cardId,entry->bk,entry->exp); SetCamMod(entry->mod); } else { PRINTF(L_SC_ERROR,"can't find CARD modulus"); return false; } } if(!DoCamExchange()) return false; infoStr.Finish(); return true; }
Status repairDatabase( string dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { scoped_ptr<RepairFileDeleter> repairFileDeleter; doingRepair dr; dbName = nsToDatabase( dbName ); log() << "repairDatabase " << dbName << endl; invariant( cc().database()->name() == dbName ); invariant( cc().database()->path() == storageGlobalParams.dbpath ); BackgroundOperation::assertNoBgOpInProgForDb(dbName); getDur().syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } killCurrentOp.checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath ); if ( originalDatabase == NULL ) return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" ); Database* tempDatabase = NULL; { bool justCreated = false; tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated ); invariant( justCreated ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx( ns ); Collection* coll = originalDatabase->getCollection( ns ); if ( coll ) { scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext( ns, tempDatabase ); tempCollection = tempDatabase->createCollection( ns, options, true, false ); } Client::Context readContext( ns, originalDatabase ); Collection* originalCollection = originalDatabase->getCollection( ns ); invariant( originalCollection ); // data MultiIndexBlock indexBlock( tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext( ns, tempDatabase ); StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock ); if ( !result.isOK() ) return result.getStatus(); getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } { Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.commit(); if ( !status.isOK() ) return status; } } getDur().syncDataAndTruncateJournal(); MongoFile::flushAll(true); // need both in case journaling is disabled killCurrentOp.checkForInterrupt(false); Client::Context tempContext( dbName, reservedPathString ); Database::closeDatabase( dbName, reservedPathString ); } Client::Context ctx( dbName ); Database::closeDatabase(dbName, storageGlobalParams.dbpath); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { _deleteDataFiles( dbName ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory(Path(storageGlobalParams.dbpath) / dbName)); } if ( repairFileDeleter.get() ) repairFileDeleter->success(); _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }
StatusWith<MongosType> MongosType::fromBSON(const BSONObj& source) { MongosType mt; { std::string mtName; Status status = bsonExtractStringField(source, name.name(), &mtName); if (!status.isOK()) return status; mt._name = mtName; } { BSONElement mtPingElem; Status status = bsonExtractTypedField(source, ping.name(), BSONType::Date, &mtPingElem); if (!status.isOK()) return status; mt._ping = mtPingElem.date(); } { long long mtUptime; Status status = bsonExtractIntegerField(source, uptime.name(), &mtUptime); if (!status.isOK()) return status; mt._uptime = mtUptime; } { bool mtWaiting; Status status = bsonExtractBooleanField(source, waiting.name(), &mtWaiting); if (!status.isOK()) return status; mt._waiting = mtWaiting; } if (source.hasField(mongoVersion.name())) { std::string mtMongoVersion; Status status = bsonExtractStringField(source, mongoVersion.name(), &mtMongoVersion); if (!status.isOK()) return status; mt._mongoVersion = mtMongoVersion; } if (source.hasField(configVersion.name())) { long long mtConfigVersion; Status status = bsonExtractIntegerField(source, configVersion.name(), &mtConfigVersion); if (!status.isOK()) return status; mt._configVersion = mtConfigVersion; } if (source.hasField(advisoryHostFQDNs.name())) { mt._advisoryHostFQDNs = std::vector<std::string>(); BSONElement array; Status status = bsonExtractTypedField(source, advisoryHostFQDNs.name(), Array, &array); if (!status.isOK()) return status; BSONObjIterator it(array.Obj()); while (it.more()) { BSONElement arrayElement = it.next(); if (arrayElement.type() != String) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Elements in \"" << advisoryHostFQDNs.name() << "\" array must be strings but found " << typeName(arrayElement.type())); } mt._advisoryHostFQDNs->push_back(arrayElement.String()); } } return mt; }
StatusWith<Shard::CommandResponse> ShardingCatalogManager::_runCommandForAddShard( OperationContext* opCtx, RemoteCommandTargeter* targeter, const std::string& dbName, const BSONObj& cmdObj) { auto swHost = targeter->findHost(opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}); if (!swHost.isOK()) { return swHost.getStatus(); } auto host = std::move(swHost.getValue()); executor::RemoteCommandRequest request( host, dbName, cmdObj, rpc::makeEmptyMetadata(), nullptr, Seconds(30)); executor::RemoteCommandResponse response = Status(ErrorCodes::InternalError, "Internal error running command"); auto swCallbackHandle = _executorForAddShard->scheduleRemoteCommand( request, [&response](const executor::TaskExecutor::RemoteCommandCallbackArgs& args) { response = args.response; }); if (!swCallbackHandle.isOK()) { return swCallbackHandle.getStatus(); } // Block until the command is carried out _executorForAddShard->wait(swCallbackHandle.getValue()); if (response.status == ErrorCodes::ExceededTimeLimit) { LOG(0) << "Operation timed out with status " << redact(response.status); } if (!response.isOK()) { if (!Shard::shouldErrorBePropagated(response.status.code())) { return {ErrorCodes::OperationFailed, str::stream() << "failed to run command " << cmdObj << " when attempting to add shard " << targeter->connectionString().toString() << causedBy(response.status)}; } return response.status; } BSONObj result = response.data.getOwned(); Status commandStatus = getStatusFromCommandResult(result); if (!Shard::shouldErrorBePropagated(commandStatus.code())) { commandStatus = {ErrorCodes::OperationFailed, str::stream() << "failed to run command " << cmdObj << " when attempting to add shard " << targeter->connectionString().toString() << causedBy(commandStatus)}; } Status writeConcernStatus = getWriteConcernStatusFromCommandResult(result); if (!Shard::shouldErrorBePropagated(writeConcernStatus.code())) { writeConcernStatus = {ErrorCodes::OperationFailed, str::stream() << "failed to satisfy writeConcern for command " << cmdObj << " when attempting to add shard " << targeter->connectionString().toString() << causedBy(writeConcernStatus)}; } return Shard::CommandResponse(std::move(host), std::move(result), response.metadata.getOwned(), std::move(commandStatus), std::move(writeConcernStatus)); }
Status explainPlan(const PlanStageStats& stats, TypeExplain** explain, bool fullDetails) { auto_ptr<TypeExplain> res(new TypeExplain); // Descend the plan looking for structural properties: // + is there any 'or's (TODO ands)? if so, prepare to explain each branch recursively // + is is a collection scan or a an index scan? // + if the latter, was it covered? // + was a sort necessary? // // TODO: For now, we assume that at most one index is used in a plan bool covered = true; bool sortPresent = false; const PlanStageStats* logicalStage = NULL; const PlanStageStats* root = &stats; const PlanStageStats* leaf = root; while (leaf->children.size() > 0) { // We're failing a plan with multiple children other than OR. // TODO: explain richer plans. if (leaf->children.size() > 1 && !isLogicalStage(leaf->stageType)) { res->setCursor("Complex Plan"); res->setNScanned(0); res->setNScannedObjects(0); *explain = res.release(); return Status::OK(); } if (isLogicalStage(leaf->stageType)) { logicalStage = leaf; break; } if (leaf->stageType == STAGE_FETCH) { covered = false; } if (leaf->stageType == STAGE_SORT) { sortPresent = true; } leaf = leaf->children[0]; } // How many documents did the query return? res->setN(root->common.advanced); // Accounting for 'nscanned' and 'nscannedObjects' is specific to the kind of leaf: // // + on collection scan, both are the same; all the documents retrieved were // fetched in practice. To get how many documents were retrieved, one simply // looks at the number of 'advanced' in the stats. // // + on an index scan, we'd neeed to look into the index scan cursor to extract the // number of keys that cursor retrieved, and into the stage's stats 'advanced' for // nscannedObjects', which would be the number of keys that survived the IXSCAN // filter. Those keys would have been FETCH-ed, if a fetch is present. if (logicalStage != NULL) { uint64_t nScanned = 0; uint64_t nScannedObjects = 0; bool isMultiKey = false; bool isIndexOnly = covered; const std::vector<PlanStageStats*>& children = logicalStage->children; for (std::vector<PlanStageStats*>::const_iterator it = children.begin(); it != children.end(); ++it) { TypeExplain* childExplain = NULL; explainPlan(**it, &childExplain, false /* no full details */); if (childExplain) { res->addToClauses(childExplain); nScanned += childExplain->getNScanned(); // We don't necessarilly fetch on a branch, but the old query framework // did. We're still emulating the number it would have produced. nScannedObjects += childExplain->getNScanned(); isMultiKey |= childExplain->getIsMultiKey(); isIndexOnly &= childExplain->getIndexOnly(); } } res->setNScanned(nScanned); res->setNScannedObjects(nScannedObjects); } else if (leaf->stageType == STAGE_COLLSCAN) { CollectionScanStats* csStats = static_cast<CollectionScanStats*>(leaf->specific.get()); res->setCursor("BasicCursor"); res->setNScanned(csStats->docsTested); res->setNScannedObjects(csStats->docsTested); } else if (leaf->stageType == STAGE_GEO_NEAR_2DSPHERE) { // TODO: This is kind of a lie for STAGE_GEO_NEAR_2DSPHERE. res->setCursor("S2NearCursor"); // The first work() is an init. Every subsequent work examines a document. res->setNScanned(leaf->common.works); res->setNScannedObjects(leaf->common.works); // TODO: Could be multikey. res->setIsMultiKey(false); res->setIndexOnly(false); } else if (leaf->stageType == STAGE_GEO_NEAR_2D) { // TODO: This is kind of a lie. res->setCursor("GeoSearchCursor"); // The first work() is an init. Every subsequent work examines a document. res->setNScanned(leaf->common.works); res->setNScannedObjects(leaf->common.works); // TODO: Could be multikey. res->setIsMultiKey(false); res->setIndexOnly(false); } else if (leaf->stageType == STAGE_IXSCAN) { IndexScanStats* indexStats = static_cast<IndexScanStats*>(leaf->specific.get()); dassert(indexStats); string direction = indexStats > 0 ? "" : " reverse"; res->setCursor(indexStats->indexType + " " + indexStats->indexName + direction); res->setNScanned(indexStats->keysExamined); // If we're covered, that is, no FETCH is present, then, by definition, // nScannedObject would be zero because no full document would have been fetched // from disk. res->setNScannedObjects(covered ? 0 : leaf->common.advanced); res->setIndexBounds(indexStats->indexBounds); res->setIsMultiKey(indexStats->isMultiKey); res->setIndexOnly(covered); } else { return Status(ErrorCodes::InternalError, "cannot interpret execution plan"); } res->setScanAndOrder(sortPresent); // Statistics for the plan (appear only in a detailed mode) // TODO: if we can get this from the runner, we can kill "detailed mode" if (fullDetails) { res->setNYields(root->common.yields); } *explain = res.release(); return Status::OK(); }
StatusWith<std::string> ShardingCatalogManager::addShard( OperationContext* opCtx, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // Only one addShard operation can be in progress at a time. Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock); // Check if this shard has already been added (can happen in the case of a retry after a network // error, for example) and thus this addShard request should be considered a no-op. auto existingShard = _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize); if (!existingShard.isOK()) { return existingShard.getStatus(); } if (existingShard.getValue()) { // These hosts already belong to an existing shard, so report success and terminate the // addShard request. Make sure to set the last optime for the client to the system last // optime so that we'll still wait for replication so that this state is visible in the // committed snapshot. repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); return existingShard.getValue()->getName(); } // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the // set with that setName from the ReplicaSetMonitorManager and will create a new // ReplicaSetMonitor when targeting the set below. // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a // shard is removed. if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) { // If the first reload joined an existing one, call reload again to ensure the reload is // fresh. Grid::get(opCtx)->shardRegistry()->reload(opCtx); } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); auto stopMonitoringGuard = MakeGuard([&] { if (shardConnectionString.type() == ConnectionString::SET) { // This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is // added, it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); } }); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); // Check that none of the existing shard candidate's dbs exist already auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } for (const auto& dbName : dbNamesStatus.getValue()) { auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase( opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // Check that the shard candidate does not have a local config.system.sessions collection auto res = _dropSessionsCollection(opCtx, targeter); if (!res.isOK()) { return res.withContext( "can't add shard with a local copy of config.system.sessions, please drop this " "collection from the shard manually and try again."); } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { auto result = generateNewShardName(opCtx); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } // Insert a shardIdentity document onto the shard. This also triggers sharding initialization on // the shard. LOG(2) << "going to insert shardIdentity document into shard: " << shardType; auto commandRequest = createShardIdentityUpsertForAddShard(opCtx, shardType.getName()); auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", commandRequest); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; auto batchResponseStatus = Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); if (!batchResponseStatus.isOK()) { return batchResponseStatus; } // The featureCompatibilityVersion should be the same throughout the cluster. We don't // explicitly send writeConcern majority to the added shard, because a 3.4 mongod will reject // it (setFCV did not support writeConcern until 3.6), and a 3.6 mongod will still default to // majority writeConcern. // // TODO SERVER-32045: propagate the user's writeConcern auto versionResponse = _runCommandForAddShard( opCtx, targeter.get(), "admin", BSON(FeatureCompatibilityVersion::kCommandName << FeatureCompatibilityVersion::toString( serverGlobalParams.featureCompatibility.getVersion()))); if (!versionResponse.isOK()) { return versionResponse.getStatus(); } if (!versionResponse.getValue().commandStatus.isOK()) { return versionResponse.getValue().commandStatus; } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument( opCtx, ShardType::ConfigNS, shardType.toBSON(), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } // Add all databases which were discovered on the new shard for (const auto& dbName : dbNamesStatus.getValue()) { DatabaseType dbt(dbName, shardType.getName(), false); Status status = Grid::get(opCtx)->catalogClient()->updateDatabase(opCtx, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); Grid::get(opCtx) ->catalogClient() ->logChange( opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern) .transitional_ignore(); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(opCtx)->shardRegistry(); if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } stopMonitoringGuard.Dismiss(); return shardType.getName(); }
Status remove(const fs::path& path) { auto status_code = std::remove(path.string().c_str()); return Status(status_code, "N/A"); }
StatusWith<ShardDrainingStatus> ShardingCatalogManager::removeShard(OperationContext* opCtx, const ShardId& shardId) { // Check preconditions for removing the shard std::string name = shardId.toString(); auto countStatus = _runCountCommandOnConfig( opCtx, ShardType::ConfigNS, BSON(ShardType::name() << NE << name << ShardType::draining(true))); if (!countStatus.isOK()) { return countStatus.getStatus(); } if (countStatus.getValue() > 0) { return Status(ErrorCodes::ConflictingOperationInProgress, "Can't have more than one draining shard at a time"); } countStatus = _runCountCommandOnConfig(opCtx, ShardType::ConfigNS, BSON(ShardType::name() << NE << name)); if (!countStatus.isOK()) { return countStatus.getStatus(); } if (countStatus.getValue() == 0) { return Status(ErrorCodes::IllegalOperation, "Can't remove last shard"); } // Figure out if shard is already draining countStatus = _runCountCommandOnConfig( opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name << ShardType::draining(true))); if (!countStatus.isOK()) { return countStatus.getStatus(); } auto* const shardRegistry = Grid::get(opCtx)->shardRegistry(); if (countStatus.getValue() == 0) { log() << "going to start draining shard: " << name; auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument( opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name), BSON("$set" << BSON(ShardType::draining(true))), false, ShardingCatalogClient::kLocalWriteConcern); if (!updateStatus.isOK()) { log() << "error starting removeShard: " << name << causedBy(redact(updateStatus.getStatus())); return updateStatus.getStatus(); } shardRegistry->reload(opCtx); // Record start in changelog Grid::get(opCtx) ->catalogClient() ->logChange(opCtx, "removeShard.start", "", BSON("shard" << name), ShardingCatalogClient::kLocalWriteConcern) .transitional_ignore(); return ShardDrainingStatus::STARTED; } // Draining has already started, now figure out how many chunks and databases are still on the // shard. countStatus = _runCountCommandOnConfig(opCtx, ChunkType::ConfigNS, BSON(ChunkType::shard(name))); if (!countStatus.isOK()) { return countStatus.getStatus(); } const long long chunkCount = countStatus.getValue(); countStatus = _runCountCommandOnConfig(opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::primary(name))); if (!countStatus.isOK()) { return countStatus.getStatus(); } const long long databaseCount = countStatus.getValue(); if (chunkCount > 0 || databaseCount > 0) { // Still more draining to do LOG(0) << "chunkCount: " << chunkCount; LOG(0) << "databaseCount: " << databaseCount; return ShardDrainingStatus::ONGOING; } // Draining is done, now finish removing the shard. log() << "going to remove shard: " << name; audit::logRemoveShard(opCtx->getClient(), name); Status status = Grid::get(opCtx)->catalogClient()->removeConfigDocuments( opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name), ShardingCatalogClient::kLocalWriteConcern); if (!status.isOK()) { log() << "Error concluding removeShard operation on: " << name << "; err: " << status.reason(); return status; } shardConnectionPool.removeHost(name); ReplicaSetMonitor::remove(name); shardRegistry->reload(opCtx); // Record finish in changelog Grid::get(opCtx) ->catalogClient() ->logChange(opCtx, "removeShard", "", BSON("shard" << name), ShardingCatalogClient::kLocalWriteConcern) .transitional_ignore(); return ShardDrainingStatus::COMPLETED; }
Status readFile( const fs::path& path, size_t size, size_t block_size, bool dry_run, bool preserve_time, std::function<void(std::string& buffer, size_t size)> predicate) { auto handle = OpenReadableFile(path); if (handle.fd < 0) { return Status(1, "Cannot open file for reading: " + path.string()); } struct stat file; if (fstat(handle.fd, &file) < 0) { return Status(1, "Cannot access path: " + path.string()); } off_t file_size = file.st_size; if (file_size == 0 && size > 0) { file_size = static_cast<off_t>(size); } // Apply the max byte-read based on file/link target ownership. off_t read_max = (file.st_uid == 0) ? FLAGS_read_max : std::min(FLAGS_read_max, FLAGS_read_user_max); if (file_size > read_max) { VLOG(1) << "Cannot read " << path << " size exceeds limit: " << file_size << " > " << read_max; return Status(1, "File exceeds read limits"); } if (dry_run) { // The caller is only interested in performing file read checks. boost::system::error_code ec; return Status(0, fs::canonical(path, ec).string()); } struct timeval times[2]; #if defined(__linux__) TIMESPEC_TO_TIMEVAL(×[0], &file.st_atim); TIMESPEC_TO_TIMEVAL(×[1], &file.st_mtim); #else TIMESPEC_TO_TIMEVAL(×[0], &file.st_atimespec); TIMESPEC_TO_TIMEVAL(×[1], &file.st_mtimespec); #endif if (file_size == 0) { off_t total_bytes = 0; ssize_t part_bytes = 0; do { auto part = std::string(4096, '\0'); part_bytes = read(handle.fd, &part[0], block_size); if (part_bytes > 0) { total_bytes += part_bytes; if (total_bytes >= read_max) { return Status(1, "File exceeds read limits"); } // content += part.substr(0, part_bytes); predicate(part, part_bytes); } } while (part_bytes > 0); } else { auto content = std::string(file_size, '\0'); read(handle.fd, &content[0], file_size); predicate(content, file_size); } // Attempt to restore the atime and mtime before the file read. if (preserve_time && !FLAGS_disable_forensic) { futimes(handle.fd, times); } return Status(0, "OK"); }
Status Element::popFront() { Element left = leftChild(); if (!left.ok()) return Status(ErrorCodes::EmptyArrayOperation, "popFront on empty"); return left.remove(); }
Status cloneCollectionAsCapped( OperationContext* txn, Database* db, const string& shortFrom, const string& shortTo, double size, bool temp, bool logForReplication ) { string fromNs = db->name() + "." + shortFrom; string toNs = db->name() + "." + shortTo; Collection* fromCollection = db->getCollection( txn, fromNs ); if ( !fromCollection ) return Status( ErrorCodes::NamespaceNotFound, str::stream() << "source collection " << fromNs << " does not exist" ); if ( db->getCollection( txn, toNs ) ) return Status( ErrorCodes::NamespaceExists, "to collection already exists" ); // create new collection { Client::Context ctx(txn, toNs ); BSONObjBuilder spec; spec.appendBool( "capped", true ); spec.append( "size", size ); if ( temp ) spec.appendBool( "temp", true ); WriteUnitOfWork wunit(txn); Status status = userCreateNS( txn, ctx.db(), toNs, spec.done(), logForReplication ); if ( !status.isOK() ) return status; wunit.commit(); } Collection* toCollection = db->getCollection( txn, toNs ); invariant( toCollection ); // we created above // how much data to ignore because it won't fit anyway // datasize and extentSize can't be compared exactly, so add some padding to 'size' long long allocatedSpaceGuess = std::max( static_cast<long long>(size * 2), static_cast<long long>(toCollection->getRecordStore()->storageSize(txn) * 2)); long long excessSize = fromCollection->dataSize(txn) - allocatedSpaceGuess; scoped_ptr<PlanExecutor> exec( InternalPlanner::collectionScan(txn, fromNs, fromCollection, InternalPlanner::FORWARD ) ); while ( true ) { BSONObj obj; PlanExecutor::ExecState state = exec->getNext(&obj, NULL); switch( state ) { case PlanExecutor::IS_EOF: return Status::OK(); case PlanExecutor::DEAD: db->dropCollection( txn, toNs ); return Status( ErrorCodes::InternalError, "executor turned dead while iterating" ); case PlanExecutor::EXEC_ERROR: return Status( ErrorCodes::InternalError, "executor error while iterating" ); case PlanExecutor::ADVANCED: if ( excessSize > 0 ) { excessSize -= ( 4 * obj.objsize() ); // 4x is for padding, power of 2, etc... continue; } WriteUnitOfWork wunit(txn); toCollection->insertDocument( txn, obj, true ); if ( logForReplication ) repl::logOp(txn, "i", toNs.c_str(), obj); wunit.commit(); } } invariant( false ); // unreachable }
Status Element::popBack() { Element right = rightChild(); if (!right.ok()) return Status(ErrorCodes::EmptyArrayOperation, "popBack on empty"); return right.remove(); }
Status ReplSetImpl::forceSyncFrom(const string& host, BSONObjBuilder* result) { lock lk(this); // initial sanity check if (iAmArbiterOnly()) { return Status(ErrorCodes::NotSecondary, "arbiters don't sync"); } if (box.getState().primary()) { return Status(ErrorCodes::NotSecondary, "primaries don't sync"); } if (_self != NULL && host == _self->fullName()) { return Status(ErrorCodes::InvalidOptions, "I cannot sync from myself"); } // find the member we want to sync from Member *newTarget = 0; for (Member *m = _members.head(); m; m = m->next()) { if (m->fullName() == host) { newTarget = m; break; } } // do some more sanity checks if (!newTarget) { // this will also catch if someone tries to sync a member from itself, as _self is not // included in the _members list. return Status(ErrorCodes::NodeNotFound, "could not find member in replica set"); } if (newTarget->config().arbiterOnly) { return Status(ErrorCodes::InvalidOptions, "I cannot sync from an arbiter"); } if (!newTarget->config().buildIndexes && myConfig().buildIndexes) { return Status(ErrorCodes::InvalidOptions, "I cannot sync from a member who does not build indexes"); } if (newTarget->hbinfo().authIssue) { return Status(ErrorCodes::Unauthorized, "not authorized to communicate with " + newTarget->fullName()); } if (newTarget->hbinfo().health == 0) { return Status(ErrorCodes::HostUnreachable, "I cannot reach the requested member"); } if (newTarget->hbinfo().opTime.getSecs()+10 < lastOpTimeWritten.getSecs()) { log() << "attempting to sync from " << newTarget->fullName() << ", but its latest opTime is " << newTarget->hbinfo().opTime.getSecs() << " and ours is " << lastOpTimeWritten.getSecs() << " so this may not work" << rsLog; result->append("warning", "requested member is more than 10 seconds behind us"); // not returning false, just warning } // record the previous member we were syncing from const Member *prev = BackgroundSync::get()->getSyncTarget(); if (prev) { result->append("prevSyncTarget", prev->fullName()); } // finally, set the new target _forceSyncTarget = newTarget; return Status::OK(); }
Status verifySystemIndexes(OperationContext* opCtx) { const NamespaceString& systemUsers = AuthorizationManager::usersCollectionNamespace; const NamespaceString& systemRoles = AuthorizationManager::rolesCollectionNamespace; AutoGetDb autoDb(opCtx, systemUsers.db(), MODE_X); if (!autoDb.getDb()) { return Status::OK(); } Collection* collection = autoDb.getDb()->getCollection(opCtx, systemUsers); if (collection) { IndexCatalog* indexCatalog = collection->getIndexCatalog(); invariant(indexCatalog); // Make sure the old unique index from v2.4 on system.users doesn't exist. std::vector<IndexDescriptor*> indexes; indexCatalog->findIndexesByKeyPattern(opCtx, v1SystemUsersKeyPattern, false, &indexes); if (!indexes.empty()) { fassert(ErrorCodes::AmbiguousIndexKeyPattern, indexes.size() == 1); return Status(ErrorCodes::AuthSchemaIncompatible, "Old 2.4 style user index identified. " "The authentication schema needs to be updated by " "running authSchemaUpgrade on a 2.6 server."); } // Ensure that system indexes exist for the user collection indexCatalog->findIndexesByKeyPattern(opCtx, v3SystemUsersKeyPattern, false, &indexes); if (indexes.empty()) { try { generateSystemIndexForExistingCollection( opCtx, collection, systemUsers, v3SystemUsersIndexSpec); } catch (...) { return exceptionToStatus(); } } } // Ensure that system indexes exist for the roles collection, if it exists. collection = autoDb.getDb()->getCollection(opCtx, systemRoles); if (collection) { IndexCatalog* indexCatalog = collection->getIndexCatalog(); invariant(indexCatalog); std::vector<IndexDescriptor*> indexes; indexCatalog->findIndexesByKeyPattern(opCtx, v3SystemRolesKeyPattern, false, &indexes); if (indexes.empty()) { try { generateSystemIndexForExistingCollection( opCtx, collection, systemRoles, v3SystemRolesIndexSpec); } catch (...) { return exceptionToStatus(); } } } // Ensure that system indexes exist for the sessions collection, if it exists. collection = autoDb.getDb()->getCollection(opCtx, sessionCollectionNamespace); if (collection) { IndexCatalog* indexCatalog = collection->getIndexCatalog(); invariant(indexCatalog); std::vector<IndexDescriptor*> indexes; indexCatalog->findIndexesByKeyPattern(opCtx, v1SystemSessionsKeyPattern, false, &indexes); if (indexes.empty()) { try { generateSystemIndexForExistingCollection( opCtx, collection, sessionCollectionNamespace, v1SystemSessionsIndexSpec); } catch (...) { return exceptionToStatus(); } } } return Status::OK(); }