void ResourceProviderManagerProcess::subscribe( const HttpConnection& http, const Call::Subscribe& subscribe) { ResourceProviderInfo resourceProviderInfo = subscribe.resource_provider_info(); resourceProviderInfo.mutable_id()->CopyFrom(newResourceProviderId()); ResourceProvider resourceProvider(resourceProviderInfo, http); Event event; event.set_type(Event::SUBSCRIBED); event.mutable_subscribed()->mutable_provider_id()->CopyFrom( resourceProvider.info.id()); if (!resourceProvider.http.send(event)) { LOG(WARNING) << "Unable to send event to resource provider " << stringify(resourceProvider.info.id()) << ": connection closed"; } resourceProviders.put(resourceProviderInfo.id(), std::move(resourceProvider)); }
bool operator==( const ResourceProviderInfo& left, const ResourceProviderInfo& right) { if (left.id() != right.id()) { return false; } if (Attributes(left.attributes()) != Attributes(right.attributes())) { return false; } if (Resources(left.resources()) != Resources(right.resources())) { return false; } return true; }
// This test verifies that, after a master failover, reconciliation of an // operation that is still pending on an agent results in `OPERATION_PENDING`. TEST_P(OperationReconciliationTest, AgentPendingOperationAfterMasterFailover) { Clock::pause(); mesos::internal::master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); Future<UpdateSlaveMessage> updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); auto detector = std::make_shared<StandaloneMasterDetector>(master.get()->pid); mesos::internal::slave::Flags slaveFlags = CreateSlaveFlags(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags); ASSERT_SOME(slave); // Advance the clock to trigger agent registration. Clock::advance(slaveFlags.registration_backoff_factor); // Wait for the agent to register. AWAIT_READY(updateSlaveMessage); // Start and register a resource provider. ResourceProviderInfo resourceProviderInfo; resourceProviderInfo.set_type("org.apache.mesos.rp.test"); resourceProviderInfo.set_name("test"); Resource disk = createDiskResource( "200", "*", None(), None(), createDiskSourceRaw(None(), "profile")); Owned<MockResourceProvider> resourceProvider( new MockResourceProvider( resourceProviderInfo, Resources(disk))); // We override the mock resource provider's default action, so the operation // will stay in `OPERATION_PENDING`. Future<resource_provider::Event::ApplyOperation> applyOperation; EXPECT_CALL(*resourceProvider, applyOperation(_)) .WillOnce(FutureArg<0>(&applyOperation)); Owned<EndpointDetector> endpointDetector( mesos::internal::tests::resource_provider::createEndpointDetector( slave.get()->pid)); updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); // NOTE: We need to resume the clock so that the resource provider can // fully register. Clock::resume(); ContentType contentType = GetParam(); resourceProvider->start(endpointDetector, contentType); // Wait until the agent's resources have been updated to include the // resource provider resources. AWAIT_READY(updateSlaveMessage); ASSERT_TRUE(updateSlaveMessage->has_resource_providers()); ASSERT_EQ(1, updateSlaveMessage->resource_providers().providers_size()); Clock::pause(); // Start a v1 framework. auto scheduler = std::make_shared<MockHTTPScheduler>(); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, DEFAULT_TEST_ROLE); EXPECT_CALL(*scheduler, connected(_)) .WillOnce(scheduler::SendSubscribe(frameworkInfo)); Future<scheduler::Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); // Ignore heartbeats. EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Decline offers that do not contain wanted resources. EXPECT_CALL(*scheduler, offers(_, _)) .WillRepeatedly(scheduler::DeclineOffers()); Future<scheduler::Event::Offers> offers; auto isRaw = [](const Resource& r) { return r.has_disk() && r.disk().has_source() && r.disk().source().type() == Resource::DiskInfo::Source::RAW; }; EXPECT_CALL(*scheduler, offers(_, scheduler::OffersHaveAnyResource( std::bind(isRaw, lambda::_1)))) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(scheduler::DeclineOffers()); // Decline successive offers. scheduler::TestMesos mesos( master.get()->pid, contentType, scheduler, detector); AWAIT_READY(subscribed); FrameworkID frameworkId(subscribed->framework_id()); // NOTE: If the framework has not declined an unwanted offer yet when // the master updates the agent with the RAW disk resource, the new // allocation triggered by this update won't generate an allocatable // offer due to no CPU and memory resources. So here we first settle // the clock to ensure that the unwanted offer has been declined, then // advance the clock to trigger another allocation. Clock::settle(); Clock::advance(masterFlags.allocation_interval); AWAIT_READY(offers); ASSERT_FALSE(offers->offers().empty()); const Offer& offer = offers->offers(0); const AgentID& agentId = offer.agent_id(); Option<Resource> source; Option<ResourceProviderID> resourceProviderId; foreach (const Resource& resource, offer.resources()) { if (isRaw(resource)) { source = resource; ASSERT_TRUE(resource.has_provider_id()); resourceProviderId = resource.provider_id(); break; } } ASSERT_SOME(source); ASSERT_SOME(resourceProviderId); OperationID operationId; operationId.set_value("operation"); mesos.send(createCallAccept( frameworkId, offer, {CREATE_DISK( source.get(), Resource::DiskInfo::Source::MOUNT, None(), operationId)})); AWAIT_READY(applyOperation); // Simulate master failover. EXPECT_CALL(*scheduler, disconnected(_)); detector->appoint(None()); master->reset(); master = StartMaster(); ASSERT_SOME(master); // Settle the clock to ensure the master finishes recovering the registry. Clock::settle(); Future<SlaveReregisteredMessage> slaveReregistered = FUTURE_PROTOBUF( SlaveReregisteredMessage(), master.get()->pid, slave.get()->pid); updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); EXPECT_CALL(*scheduler, connected(_)) .WillOnce(scheduler::SendSubscribe(frameworkInfo, frameworkId)); Future<scheduler::Event::Subscribed> frameworkResubscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&frameworkResubscribed)); // Simulate a new master detected event to the agent and the scheduler. detector->appoint(master.get()->pid); // Advance the clock, so that the agent re-registers. Clock::advance(slaveFlags.registration_backoff_factor); // Resume the clock to avoid deadlocks related to agent registration. // See MESOS-8828. Clock::resume(); // Wait for the framework and agent to re-register. AWAIT_READY(slaveReregistered); AWAIT_READY(updateSlaveMessage); AWAIT_READY(frameworkResubscribed); Clock::pause(); // Test explicit reconciliation { scheduler::Call::ReconcileOperations::Operation operation; operation.mutable_operation_id()->CopyFrom(operationId); operation.mutable_agent_id()->CopyFrom(agentId); const Future<scheduler::APIResult> result = mesos.call({createCallReconcileOperations(frameworkId, {operation})}); AWAIT_READY(result); // The master should respond with '200 OK' and with a `scheduler::Response`. ASSERT_EQ(process::http::Status::OK, result->status_code()); ASSERT_TRUE(result->has_response()); const scheduler::Response response = result->response(); ASSERT_EQ(scheduler::Response::RECONCILE_OPERATIONS, response.type()); ASSERT_TRUE(response.has_reconcile_operations()); const scheduler::Response::ReconcileOperations& reconcile = response.reconcile_operations(); ASSERT_EQ(1, reconcile.operation_statuses_size()); const OperationStatus& operationStatus = reconcile.operation_statuses(0); EXPECT_EQ(operationId, operationStatus.operation_id()); EXPECT_EQ(OPERATION_PENDING, operationStatus.state()); EXPECT_FALSE(operationStatus.has_uuid()); } // Test implicit reconciliation { const Future<scheduler::APIResult> result = mesos.call({createCallReconcileOperations(frameworkId, {})}); AWAIT_READY(result); // The master should respond with '200 OK' and with a `scheduler::Response`. ASSERT_EQ(process::http::Status::OK, result->status_code()); ASSERT_TRUE(result->has_response()); const scheduler::Response response = result->response(); ASSERT_EQ(scheduler::Response::RECONCILE_OPERATIONS, response.type()); ASSERT_TRUE(response.has_reconcile_operations()); const scheduler::Response::ReconcileOperations& reconcile = response.reconcile_operations(); ASSERT_EQ(1, reconcile.operation_statuses_size()); const OperationStatus& operationStatus = reconcile.operation_statuses(0); EXPECT_EQ(operationId, operationStatus.operation_id()); EXPECT_EQ(OPERATION_PENDING, operationStatus.state()); EXPECT_FALSE(operationStatus.has_uuid()); } }
bool operator==( const ResourceProviderInfo& left, const ResourceProviderInfo& right) { // Order of reservations is important. if (left.default_reservations_size() != right.default_reservations_size()) { return false; } for (int i = 0; i < left.default_reservations_size(); i++) { if (left.default_reservations(i) != right.default_reservations(i)) { return false; } } return left.has_id() == right.has_id() && (!left.has_id() || left.id() == right.id()) && Attributes(left.attributes()) == Attributes(right.attributes()) && left.type() == right.type() && left.name() == right.name() && left.has_storage() == right.has_storage() && (!left.has_storage() || left.storage() == right.storage()); }