// This test ensures that the failed over scheduler is able to send a message // to the executor. TEST_F(HttpFaultToleranceTest, SchedulerFailoverFrameworkToExecutorMessage) { master::Flags flags = CreateMasterFlags(); flags.authenticate_frameworks = false; Try<Owned<cluster::Master>> master = StartMaster(flags); ASSERT_SOME(master); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); auto executor = std::make_shared<v1::MockHTTPExecutor>(); ExecutorID executorId = DEFAULT_EXECUTOR_ID; TestContainerizer containerizer(executorId, executor); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)) .WillRepeatedly(Return()); // Ignore future invocations. ContentType contentType = ContentType::PROTOBUF; v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); Future<Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. Future<Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)); { Call call; call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); schedulerLibrary.send(call); } AWAIT_READY(subscribed); v1::FrameworkID frameworkId(subscribed->framework_id()); AWAIT_READY(offers); EXPECT_NE(0, offers->offers().size()); EXPECT_CALL(*executor, connected(_)) .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId))); EXPECT_CALL(*executor, subscribed(_, _)); Future<Nothing> launch; EXPECT_CALL(*executor, launch(_, _)) .WillOnce(FutureSatisfy(&launch)); const v1::Offer& offer = offers->offers(0); v1::TaskInfo taskInfo = evolve(createTask(devolve(offer), "", executorId)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); v1::Offer::Operation* operation = accept->add_operations(); operation->set_type(v1::Offer::Operation::LAUNCH); operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); schedulerLibrary.send(call); } AWAIT_READY(launch); auto scheduler2 = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected2; EXPECT_CALL(*scheduler2, connected(_)) .WillOnce(FutureSatisfy(&connected2)); // Failover to another scheduler instance. v1::scheduler::TestMesos schedulerLibrary2( master.get()->pid, contentType, scheduler2); AWAIT_READY(connected2); // The previously connected scheduler instance should receive an // error/disconnected event. Future<Nothing> error; EXPECT_CALL(*scheduler, error(_, _)) .WillOnce(FutureSatisfy(&error)); Future<Nothing> disconnected; EXPECT_CALL(*scheduler, disconnected(_)) .WillOnce(FutureSatisfy(&disconnected)); EXPECT_CALL(*scheduler2, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler2, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId); schedulerLibrary2.send(call); } AWAIT_READY(error); AWAIT_READY(disconnected); AWAIT_READY(subscribed); EXPECT_EQ(frameworkId, subscribed->framework_id()); Future<v1::executor::Event::Message> message; EXPECT_CALL(*executor, message(_, _)) .WillOnce(FutureArg<1>(&message)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::MESSAGE); Call::Message* message = call.mutable_message(); message->mutable_agent_id()->CopyFrom(offer.agent_id()); message->mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID); message->set_data("hello world"); schedulerLibrary2.send(call); } AWAIT_READY(message); ASSERT_EQ("hello world", message->data()); EXPECT_CALL(*executor, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(*executor, disconnected(_)) .Times(AtMost(1)); }
// This test checks that a scheduler exit shuts down the executor. TEST_F(HttpFaultToleranceTest, SchedulerExit) { master::Flags flags = CreateMasterFlags(); flags.authenticate_frameworks = false; Try<Owned<cluster::Master>> master = StartMaster(flags); ASSERT_SOME(master); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); auto executor = std::make_shared<v1::MockHTTPExecutor>(); ExecutorID executorId = DEFAULT_EXECUTOR_ID; TestContainerizer containerizer(executorId, executor); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)) .WillRepeatedly(Return()); // Ignore future invocations. ContentType contentType = ContentType::PROTOBUF; v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); Future<Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. Future<Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)); { Call call; call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); schedulerLibrary.send(call); } AWAIT_READY(subscribed); v1::FrameworkID frameworkId(subscribed->framework_id()); AWAIT_READY(offers); EXPECT_NE(0, offers->offers().size()); EXPECT_CALL(*executor, connected(_)) .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId))); EXPECT_CALL(*executor, subscribed(_, _)); Future<Nothing> launch; EXPECT_CALL(*executor, launch(_, _)) .WillOnce(FutureSatisfy(&launch)); const v1::Offer& offer = offers->offers(0); v1::TaskInfo taskInfo = evolve(createTask(devolve(offer), "", DEFAULT_EXECUTOR_ID)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); v1::Offer::Operation* operation = accept->add_operations(); operation->set_type(v1::Offer::Operation::LAUNCH); operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); schedulerLibrary.send(call); } AWAIT_READY(launch); EXPECT_CALL(*scheduler, disconnected(_)) .Times(AtMost(1)); Future<Nothing> shutdown; EXPECT_CALL(*executor, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::TEARDOWN); schedulerLibrary.send(call); } // Ensure that the executor receives a `Event::Shutdown` after the // scheduler exit. AWAIT_READY(shutdown); }
// This test checks that a failed over scheduler gets the retried status update // when the original instance dies without acknowledging the update. TEST_F(HttpFaultToleranceTest, SchedulerFailoverStatusUpdate) { master::Flags flags = CreateMasterFlags(); flags.authenticate_frameworks = false; Try<Owned<cluster::Master>> master = StartMaster(flags); ASSERT_SOME(master); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); auto executor = std::make_shared<v1::MockHTTPExecutor>(); ExecutorID executorId = DEFAULT_EXECUTOR_ID; TestContainerizer containerizer(executorId, executor); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)) .WillRepeatedly(Return()); // Ignore future invocations. ContentType contentType = ContentType::PROTOBUF; v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); Future<Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. Future<Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)); { Call call; call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); schedulerLibrary.send(call); } AWAIT_READY(subscribed); v1::FrameworkID frameworkId(subscribed->framework_id()); AWAIT_READY(offers); EXPECT_NE(0, offers->offers().size()); EXPECT_CALL(*executor, connected(_)) .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId))); EXPECT_CALL(*executor, subscribed(_, _)); EXPECT_CALL(*executor, launch(_, _)) .WillOnce(v1::executor::SendUpdateFromTask( frameworkId, evolve(executorId), v1::TASK_RUNNING)); Future<Nothing> acknowledged; EXPECT_CALL(*executor, acknowledged(_, _)) .WillOnce(FutureSatisfy(&acknowledged)); Future<Event::Update> update; EXPECT_CALL(*scheduler, update(_, _)) .WillOnce(FutureArg<1>(&update)); const v1::Offer& offer = offers->offers(0); v1::TaskInfo taskInfo = evolve(createTask(devolve(offer), "", executorId)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); v1::Offer::Operation* operation = accept->add_operations(); operation->set_type(v1::Offer::Operation::LAUNCH); operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); schedulerLibrary.send(call); } AWAIT_READY(acknowledged); AWAIT_READY(update); EXPECT_EQ(v1::TASK_RUNNING, update->status().state()); EXPECT_EQ(executorId, devolve(update->status().executor_id())); EXPECT_TRUE(update->status().has_executor_id()); EXPECT_TRUE(update->status().has_uuid()); // Failover the scheduler without acknowledging the status update. auto scheduler2 = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected2; EXPECT_CALL(*scheduler2, connected(_)) .WillOnce(FutureSatisfy(&connected2)); // Failover to another scheduler instance. v1::scheduler::TestMesos schedulerLibrary2( master.get()->pid, contentType, scheduler2); AWAIT_READY(connected2); // The previously connected scheduler instance should receive an // error/disconnected event. Future<Nothing> error; EXPECT_CALL(*scheduler, error(_, _)) .WillOnce(FutureSatisfy(&error)); Future<Nothing> disconnected; EXPECT_CALL(*scheduler, disconnected(_)) .WillOnce(FutureSatisfy(&disconnected)); EXPECT_CALL(*scheduler2, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler2, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. // Scheduler2 should receive the retried status update. Future<Nothing> update2; EXPECT_CALL(*scheduler2, update(_, _)) .WillOnce(FutureSatisfy(&update2)) .WillRepeatedly(Return()); // Ignore subsequent updates. { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId); schedulerLibrary2.send(call); } AWAIT_READY(error); AWAIT_READY(disconnected); AWAIT_READY(subscribed); EXPECT_EQ(frameworkId, subscribed->framework_id()); Clock::pause(); // Now advance time enough for the reliable timeout to kick in and // another status update to be sent. Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); AWAIT_READY(update2); EXPECT_CALL(*executor, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(*executor, disconnected(_)) .Times(AtMost(1)); }
// This test verifies that, after a master failover, reconciliation of an // operation that is still pending on an agent results in `OPERATION_PENDING`. TEST_P(OperationReconciliationTest, AgentPendingOperationAfterMasterFailover) { Clock::pause(); mesos::internal::master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); Future<UpdateSlaveMessage> updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); auto detector = std::make_shared<StandaloneMasterDetector>(master.get()->pid); mesos::internal::slave::Flags slaveFlags = CreateSlaveFlags(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags); ASSERT_SOME(slave); // Advance the clock to trigger agent registration. Clock::advance(slaveFlags.registration_backoff_factor); // Wait for the agent to register. AWAIT_READY(updateSlaveMessage); // Start and register a resource provider. ResourceProviderInfo resourceProviderInfo; resourceProviderInfo.set_type("org.apache.mesos.rp.test"); resourceProviderInfo.set_name("test"); Resource disk = createDiskResource( "200", "*", None(), None(), createDiskSourceRaw(None(), "profile")); Owned<MockResourceProvider> resourceProvider( new MockResourceProvider( resourceProviderInfo, Resources(disk))); // We override the mock resource provider's default action, so the operation // will stay in `OPERATION_PENDING`. Future<resource_provider::Event::ApplyOperation> applyOperation; EXPECT_CALL(*resourceProvider, applyOperation(_)) .WillOnce(FutureArg<0>(&applyOperation)); Owned<EndpointDetector> endpointDetector( mesos::internal::tests::resource_provider::createEndpointDetector( slave.get()->pid)); updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); // NOTE: We need to resume the clock so that the resource provider can // fully register. Clock::resume(); ContentType contentType = GetParam(); resourceProvider->start(endpointDetector, contentType); // Wait until the agent's resources have been updated to include the // resource provider resources. AWAIT_READY(updateSlaveMessage); ASSERT_TRUE(updateSlaveMessage->has_resource_providers()); ASSERT_EQ(1, updateSlaveMessage->resource_providers().providers_size()); Clock::pause(); // Start a v1 framework. auto scheduler = std::make_shared<MockHTTPScheduler>(); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, DEFAULT_TEST_ROLE); EXPECT_CALL(*scheduler, connected(_)) .WillOnce(scheduler::SendSubscribe(frameworkInfo)); Future<scheduler::Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); // Ignore heartbeats. EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Decline offers that do not contain wanted resources. EXPECT_CALL(*scheduler, offers(_, _)) .WillRepeatedly(scheduler::DeclineOffers()); Future<scheduler::Event::Offers> offers; auto isRaw = [](const Resource& r) { return r.has_disk() && r.disk().has_source() && r.disk().source().type() == Resource::DiskInfo::Source::RAW; }; EXPECT_CALL(*scheduler, offers(_, scheduler::OffersHaveAnyResource( std::bind(isRaw, lambda::_1)))) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(scheduler::DeclineOffers()); // Decline successive offers. scheduler::TestMesos mesos( master.get()->pid, contentType, scheduler, detector); AWAIT_READY(subscribed); FrameworkID frameworkId(subscribed->framework_id()); // NOTE: If the framework has not declined an unwanted offer yet when // the master updates the agent with the RAW disk resource, the new // allocation triggered by this update won't generate an allocatable // offer due to no CPU and memory resources. So here we first settle // the clock to ensure that the unwanted offer has been declined, then // advance the clock to trigger another allocation. Clock::settle(); Clock::advance(masterFlags.allocation_interval); AWAIT_READY(offers); ASSERT_FALSE(offers->offers().empty()); const Offer& offer = offers->offers(0); const AgentID& agentId = offer.agent_id(); Option<Resource> source; Option<ResourceProviderID> resourceProviderId; foreach (const Resource& resource, offer.resources()) { if (isRaw(resource)) { source = resource; ASSERT_TRUE(resource.has_provider_id()); resourceProviderId = resource.provider_id(); break; } } ASSERT_SOME(source); ASSERT_SOME(resourceProviderId); OperationID operationId; operationId.set_value("operation"); mesos.send(createCallAccept( frameworkId, offer, {CREATE_DISK( source.get(), Resource::DiskInfo::Source::MOUNT, None(), operationId)})); AWAIT_READY(applyOperation); // Simulate master failover. EXPECT_CALL(*scheduler, disconnected(_)); detector->appoint(None()); master->reset(); master = StartMaster(); ASSERT_SOME(master); // Settle the clock to ensure the master finishes recovering the registry. Clock::settle(); Future<SlaveReregisteredMessage> slaveReregistered = FUTURE_PROTOBUF( SlaveReregisteredMessage(), master.get()->pid, slave.get()->pid); updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); EXPECT_CALL(*scheduler, connected(_)) .WillOnce(scheduler::SendSubscribe(frameworkInfo, frameworkId)); Future<scheduler::Event::Subscribed> frameworkResubscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&frameworkResubscribed)); // Simulate a new master detected event to the agent and the scheduler. detector->appoint(master.get()->pid); // Advance the clock, so that the agent re-registers. Clock::advance(slaveFlags.registration_backoff_factor); // Resume the clock to avoid deadlocks related to agent registration. // See MESOS-8828. Clock::resume(); // Wait for the framework and agent to re-register. AWAIT_READY(slaveReregistered); AWAIT_READY(updateSlaveMessage); AWAIT_READY(frameworkResubscribed); Clock::pause(); // Test explicit reconciliation { scheduler::Call::ReconcileOperations::Operation operation; operation.mutable_operation_id()->CopyFrom(operationId); operation.mutable_agent_id()->CopyFrom(agentId); const Future<scheduler::APIResult> result = mesos.call({createCallReconcileOperations(frameworkId, {operation})}); AWAIT_READY(result); // The master should respond with '200 OK' and with a `scheduler::Response`. ASSERT_EQ(process::http::Status::OK, result->status_code()); ASSERT_TRUE(result->has_response()); const scheduler::Response response = result->response(); ASSERT_EQ(scheduler::Response::RECONCILE_OPERATIONS, response.type()); ASSERT_TRUE(response.has_reconcile_operations()); const scheduler::Response::ReconcileOperations& reconcile = response.reconcile_operations(); ASSERT_EQ(1, reconcile.operation_statuses_size()); const OperationStatus& operationStatus = reconcile.operation_statuses(0); EXPECT_EQ(operationId, operationStatus.operation_id()); EXPECT_EQ(OPERATION_PENDING, operationStatus.state()); EXPECT_FALSE(operationStatus.has_uuid()); } // Test implicit reconciliation { const Future<scheduler::APIResult> result = mesos.call({createCallReconcileOperations(frameworkId, {})}); AWAIT_READY(result); // The master should respond with '200 OK' and with a `scheduler::Response`. ASSERT_EQ(process::http::Status::OK, result->status_code()); ASSERT_TRUE(result->has_response()); const scheduler::Response response = result->response(); ASSERT_EQ(scheduler::Response::RECONCILE_OPERATIONS, response.type()); ASSERT_TRUE(response.has_reconcile_operations()); const scheduler::Response::ReconcileOperations& reconcile = response.reconcile_operations(); ASSERT_EQ(1, reconcile.operation_statuses_size()); const OperationStatus& operationStatus = reconcile.operation_statuses(0); EXPECT_EQ(operationId, operationStatus.operation_id()); EXPECT_EQ(OPERATION_PENDING, operationStatus.state()); EXPECT_FALSE(operationStatus.has_uuid()); } }
// This test verifies that executor API and operator API calls receive an // unsuccessful response if the request contains a properly-signed // authentication token with invalid claims. TEST_F(ExecutorAuthorizationTest, FailedApiCalls) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); // Start an agent with permissive ACLs so that a task can be launched and the // local authorizer's implicit executor authorization will be performed. ACLs acls; acls.set_permissive(true); slave::Flags flags = CreateSlaveFlags(); flags.acls = acls; Owned<MasterDetector> detector = master.get()->createDetector(); v1::Resources resources = v1::Resources::parse("cpus:0.1;mem:32;disk:32").get(); v1::ExecutorInfo executorInfo; executorInfo.set_type(v1::ExecutorInfo::DEFAULT); executorInfo.mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID); executorInfo.mutable_resources()->CopyFrom(resources); auto executor = std::make_shared<v1::MockHTTPExecutor>(); Owned<TestContainerizer> containerizer(new TestContainerizer( devolve(executorInfo.executor_id()), executor)); Try<Owned<cluster::Slave>> slave = this->StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)); v1::scheduler::TestMesos mesos( master.get()->pid, ContentType::PROTOBUF, scheduler); AWAIT_READY(connected); Future<v1::scheduler::Event::Subscribed> frameworkSubscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&frameworkSubscribed)); Future<v1::scheduler::Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. mesos.send(v1::createCallSubscribe(v1::DEFAULT_FRAMEWORK_INFO)); AWAIT_READY(frameworkSubscribed); v1::FrameworkID frameworkId(frameworkSubscribed->framework_id()); executorInfo.mutable_framework_id()->CopyFrom(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->offers().empty()); Future<v1::executor::Mesos*> executorLib; EXPECT_CALL(*executor, connected(_)) .WillOnce(FutureArg<0>(&executorLib)); const v1::Offer& offer = offers->offers(0); const v1::AgentID& agentId = offer.agent_id(); { v1::scheduler::Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(v1::scheduler::Call::ACCEPT); v1::scheduler::Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); v1::Offer::Operation* operation = accept->add_operations(); operation->set_type(v1::Offer::Operation::LAUNCH_GROUP); v1::TaskInfo taskInfo = v1::createTask(agentId, resources, SLEEP_COMMAND(1000)); v1::TaskGroupInfo taskGroup; taskGroup.add_tasks()->CopyFrom(taskInfo); v1::Offer::Operation::LaunchGroup* launchGroup = operation->mutable_launch_group(); launchGroup->mutable_executor()->CopyFrom(executorInfo); launchGroup->mutable_task_group()->CopyFrom(taskGroup); mesos.send(call); } AWAIT_READY(executorLib); Future<v1::executor::Event::Subscribed> executorSubscribed; EXPECT_CALL(*executor, subscribed(_, _)) .WillOnce(FutureArg<1>(&executorSubscribed)); Future<Nothing> launchGroup; EXPECT_CALL(*executor, launchGroup(_, _)) .WillOnce(FutureSatisfy(&launchGroup)); { v1::executor::Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID); call.set_type(v1::executor::Call::SUBSCRIBE); call.mutable_subscribe(); executorLib.get()->send(call); } // Wait for the executor to subscribe. Once it is in the SUBSCRIBED state, // the UPDATE and MESSAGE executor calls can be attempted. AWAIT_READY(executorSubscribed); AWAIT_READY(launchGroup); // Create a principal which contains an incorrect ContainerID. hashmap<string, string> claims; claims["fid"] = frameworkId.value(); claims["eid"] = v1::DEFAULT_EXECUTOR_ID.value(); claims["cid"] = id::UUID::random().toString(); Principal incorrectPrincipal(None(), claims); // Generate an authentication token which is signed using the correct key, // but contains an invalid set of claims. Owned<JWTSecretGenerator> jwtSecretGenerator( new JWTSecretGenerator(DEFAULT_JWT_SECRET_KEY)); Future<Secret> authenticationToken = jwtSecretGenerator->generate(incorrectPrincipal); AWAIT_READY(authenticationToken); v1::ContainerID containerId; containerId.set_value(id::UUID::random().toString()); containerId.mutable_parent()->CopyFrom(executorSubscribed->container_id()); http::Headers headers; headers["Authorization"] = "Bearer " + authenticationToken->value().data(); // Since the executor library has already been initialized with a valid // authentication token, we use an HTTP helper function to send the // executor API and operator API calls with an invalid token. { v1::agent::Call call; call.set_type(v1::agent::Call::LAUNCH_NESTED_CONTAINER); call.mutable_launch_nested_container()->mutable_container_id() ->CopyFrom(containerId); Future<http::Response> response = http::post( slave.get()->pid, "api/v1", headers, serialize(ContentType::PROTOBUF, call), stringify(ContentType::PROTOBUF)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::Forbidden().status, response); } { v1::agent::Call call; call.set_type(v1::agent::Call::LAUNCH_NESTED_CONTAINER_SESSION); call.mutable_launch_nested_container_session()->mutable_container_id() ->CopyFrom(containerId); call.mutable_launch_nested_container_session()->mutable_command() ->set_value("sleep 120"); Future<http::Response> response = http::post( slave.get()->pid, "api/v1", headers, serialize(ContentType::PROTOBUF, call), stringify(ContentType::PROTOBUF)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::Forbidden().status, response); } { v1::agent::Call call; call.set_type(v1::agent::Call::WAIT_NESTED_CONTAINER); call.mutable_wait_nested_container()->mutable_container_id() ->CopyFrom(containerId); Future<http::Response> response = http::post( slave.get()->pid, "api/v1", headers, serialize(ContentType::PROTOBUF, call), stringify(ContentType::PROTOBUF)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::Forbidden().status, response); } { v1::agent::Call call; call.set_type(v1::agent::Call::KILL_NESTED_CONTAINER); call.mutable_kill_nested_container()->mutable_container_id() ->CopyFrom(containerId); Future<http::Response> response = http::post( slave.get()->pid, "api/v1", headers, serialize(ContentType::PROTOBUF, call), stringify(ContentType::PROTOBUF)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::Forbidden().status, response); } { v1::agent::Call call; call.set_type(v1::agent::Call::REMOVE_NESTED_CONTAINER); call.mutable_remove_nested_container()->mutable_container_id() ->CopyFrom(containerId); Future<http::Response> response = http::post( slave.get()->pid, "api/v1", headers, serialize(ContentType::PROTOBUF, call), stringify(ContentType::PROTOBUF)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::Forbidden().status, response); } { v1::agent::Call call; call.set_type(v1::agent::Call::ATTACH_CONTAINER_OUTPUT); call.mutable_attach_container_output()->mutable_container_id() ->CopyFrom(containerId); Future<http::Response> response = http::post( slave.get()->pid, "api/v1", headers, serialize(ContentType::PROTOBUF, call), stringify(ContentType::PROTOBUF)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::Forbidden().status, response); } const string failureMessage = "does not contain a 'cid' claim with the correct active ContainerID"; { v1::TaskStatus status; status.mutable_task_id()->set_value(id::UUID::random().toString()); status.set_state(v1::TASK_RUNNING); status.set_uuid(id::UUID::random().toBytes()); status.set_source(v1::TaskStatus::SOURCE_EXECUTOR); v1::executor::Call call; call.set_type(v1::executor::Call::UPDATE); call.mutable_framework_id()->CopyFrom(frameworkId); call.mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID); call.mutable_update()->mutable_status()->CopyFrom(status); Future<http::Response> response = http::post( slave.get()->pid, "api/v1/executor", headers, serialize(ContentType::PROTOBUF, call), stringify(ContentType::PROTOBUF)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::Forbidden().status, response); EXPECT_TRUE(strings::contains(response->body, failureMessage)); } { v1::executor::Call call; call.set_type(v1::executor::Call::MESSAGE); call.mutable_framework_id()->CopyFrom(frameworkId); call.mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID); call.mutable_message()->set_data("executor message"); Future<http::Response> response = http::post( slave.get()->pid, "api/v1/executor", headers, serialize(ContentType::PROTOBUF, call), stringify(ContentType::PROTOBUF)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::Forbidden().status, response); EXPECT_TRUE(strings::contains(response->body, failureMessage)); } EXPECT_CALL(*executor, shutdown(_)) .Times(AtMost(1)); }
// This test verifies that default executor subscription fails if the executor // provides a properly-signed authentication token with invalid claims. TEST_F(ExecutorAuthorizationTest, FailedSubscribe) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); // Start an agent with permissive ACLs so that a task can be launched. ACLs acls; acls.set_permissive(true); Result<Authorizer*> authorizer = Authorizer::create(acls); ASSERT_SOME(authorizer); slave::Flags flags = CreateSlaveFlags(); flags.acls = acls; Owned<MasterDetector> detector = master.get()->createDetector(); auto executor = std::make_shared<v1::MockHTTPExecutor>(); v1::Resources resources = v1::Resources::parse("cpus:0.1;mem:32;disk:32").get(); v1::ExecutorInfo executorInfo; executorInfo.set_type(v1::ExecutorInfo::DEFAULT); executorInfo.mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID); executorInfo.mutable_resources()->CopyFrom(resources); Owned<TestContainerizer> containerizer( new TestContainerizer(devolve(executorInfo.executor_id()), executor)); // This pointer is passed to the agent, which will perform the cleanup. Owned<MockSecretGenerator> mockSecretGenerator(new MockSecretGenerator()); Try<Owned<cluster::Slave>> slave = StartSlave( detector.get(), containerizer.get(), mockSecretGenerator.get(), authorizer.get(), flags); ASSERT_SOME(slave); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)); v1::scheduler::TestMesos mesos( master.get()->pid, ContentType::PROTOBUF, scheduler); AWAIT_READY(connected); Future<v1::scheduler::Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); Future<v1::scheduler::Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. mesos.send(v1::createCallSubscribe(v1::DEFAULT_FRAMEWORK_INFO)); AWAIT_READY(subscribed); v1::FrameworkID frameworkId(subscribed->framework_id()); executorInfo.mutable_framework_id()->CopyFrom(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->offers().empty()); Future<v1::executor::Mesos*> executorLib; EXPECT_CALL(*executor, connected(_)) .WillOnce(FutureArg<0>(&executorLib)); Owned<JWTSecretGenerator> jwtSecretGenerator( new JWTSecretGenerator(DEFAULT_JWT_SECRET_KEY)); // Create a principal which contains an incorrect ContainerID. hashmap<string, string> claims; claims["fid"] = frameworkId.value(); claims["eid"] = v1::DEFAULT_EXECUTOR_ID.value(); claims["cid"] = id::UUID::random().toString(); Principal principal(None(), claims); // Generate an authentication token which is signed using the correct key, // but contains an invalid set of claims. Future<Secret> authenticationToken = jwtSecretGenerator->generate(principal); AWAIT_READY(authenticationToken); EXPECT_CALL(*mockSecretGenerator, generate(_)) .WillOnce(Return(authenticationToken.get())); const v1::Offer& offer = offers->offers(0); const v1::AgentID& agentId = offer.agent_id(); { v1::TaskInfo taskInfo = v1::createTask(agentId, resources, SLEEP_COMMAND(1000)); v1::TaskGroupInfo taskGroup; taskGroup.add_tasks()->CopyFrom(taskInfo); v1::scheduler::Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(v1::scheduler::Call::ACCEPT); v1::scheduler::Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); v1::Offer::Operation* operation = accept->add_operations(); operation->set_type(v1::Offer::Operation::LAUNCH_GROUP); v1::Offer::Operation::LaunchGroup* launchGroup = operation->mutable_launch_group(); launchGroup->mutable_executor()->CopyFrom(executorInfo); launchGroup->mutable_task_group()->CopyFrom(taskGroup); mesos.send(call); } AWAIT_READY(executorLib); { v1::executor::Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID); call.set_type(v1::executor::Call::SUBSCRIBE); call.mutable_subscribe(); executorLib.get()->send(call); } Future<v1::executor::Event::Error> error; EXPECT_CALL(*executor, error(_, _)) .WillOnce(FutureArg<1>(&error)); AWAIT_READY(error); EXPECT_EQ( error->message(), "Received unexpected '403 Forbidden' () for SUBSCRIBE"); }
// The master reconciles operations that are missing from a re-registering // agent. // // In this case, the `ApplyOperationMessage` is dropped, so the agent should // respond with a OPERATION_DROPPED operation status update. // // This test verifies that if an operation ID is set, the framework receives // the OPERATION_DROPPED operation status update. // // This is a regression test for MESOS-8784. TEST_F( MasterSlaveReconciliationTest, ForwardOperationDroppedAfterExplicitReconciliation) { Clock::pause(); mesos::internal::master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); Future<UpdateSlaveMessage> updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); auto detector = std::make_shared<StandaloneMasterDetector>(master.get()->pid); mesos::internal::slave::Flags slaveFlags = CreateSlaveFlags(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags); ASSERT_SOME(slave); // Advance the clock to trigger agent registration. Clock::advance(slaveFlags.registration_backoff_factor); // Wait for the agent to register. AWAIT_READY(updateSlaveMessage); // Start and register a resource provider. v1::ResourceProviderInfo resourceProviderInfo; resourceProviderInfo.set_type("org.apache.mesos.rp.test"); resourceProviderInfo.set_name("test"); v1::Resource disk = v1::createDiskResource( "200", "*", None(), None(), v1::createDiskSourceRaw()); Owned<v1::MockResourceProvider> resourceProvider( new v1::MockResourceProvider(resourceProviderInfo, v1::Resources(disk))); // Make the mock resource provider answer to reconciliation events with // OPERATION_DROPPED operation status updates. auto reconcileOperations = [&resourceProvider]( const v1::resource_provider::Event::ReconcileOperations& reconcile) { foreach (const v1::UUID& operationUuid, reconcile.operation_uuids()) { v1::resource_provider::Call call; call.set_type(v1::resource_provider::Call::UPDATE_OPERATION_STATUS); call.mutable_resource_provider_id()->CopyFrom( resourceProvider->info.id()); v1::resource_provider::Call::UpdateOperationStatus* updateOperationStatus = call.mutable_update_operation_status(); updateOperationStatus->mutable_status()->set_state( v1::OPERATION_DROPPED); updateOperationStatus->mutable_operation_uuid()->CopyFrom( operationUuid); resourceProvider->send(call); } }; EXPECT_CALL(*resourceProvider, reconcileOperations(_)) .WillOnce(Invoke(reconcileOperations)); Owned<EndpointDetector> endpointDetector( mesos::internal::tests::resource_provider::createEndpointDetector( slave.get()->pid)); updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); // NOTE: We need to resume the clock so that the resource provider can // fully register. Clock::resume(); ContentType contentType = ContentType::PROTOBUF; resourceProvider->start(endpointDetector, contentType); // Wait until the agent's resources have been updated to include the // resource provider resources. AWAIT_READY(updateSlaveMessage); Clock::pause(); // Start a v1 framework. auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); v1::FrameworkInfo frameworkInfo = v1::DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, DEFAULT_TEST_ROLE); EXPECT_CALL(*scheduler, connected(_)) .WillOnce(v1::scheduler::SendSubscribe(frameworkInfo)); Future<v1::scheduler::Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); // Ignore heartbeats. EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); Future<v1::scheduler::Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(v1::scheduler::DeclineOffers()); v1::scheduler::TestMesos mesos(master.get()->pid, contentType, scheduler); AWAIT_READY(subscribed); v1::FrameworkID frameworkId(subscribed->framework_id()); AWAIT_READY(offers); ASSERT_FALSE(offers->offers().empty()); const v1::Offer& offer = offers->offers(0); // We'll drop the `ApplyOperationMessage` from the master to the agent. Future<ApplyOperationMessage> applyOperationMessage = DROP_PROTOBUF(ApplyOperationMessage(), master.get()->pid, _); v1::Resources resources = v1::Resources(offer.resources()).filter([](const v1::Resource& resource) { return resource.has_provider_id(); }); ASSERT_FALSE(resources.empty()); v1::Resource reserved = *(resources.begin()); reserved.add_reservations()->CopyFrom( v1::createDynamicReservationInfo( frameworkInfo.roles(0), DEFAULT_CREDENTIAL.principal())); v1::OperationID operationId; operationId.set_value("operation"); mesos.send(v1::createCallAccept( frameworkId, offer, {v1::RESERVE(reserved, operationId.value())})); AWAIT_READY(applyOperationMessage); Future<v1::scheduler::Event::UpdateOperationStatus> operationDroppedUpdate; EXPECT_CALL(*scheduler, updateOperationStatus(_, _)) .WillOnce(FutureArg<1>(&operationDroppedUpdate)); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector->appoint(master.get()->pid); // Advance the clock, so that the agent re-registers. Clock::advance(slaveFlags.registration_backoff_factor); // Wait for the framework to receive the OPERATION_DROPPED update. AWAIT_READY(operationDroppedUpdate); EXPECT_EQ(operationId, operationDroppedUpdate->status().operation_id()); EXPECT_EQ(v1::OPERATION_DROPPED, operationDroppedUpdate->status().state()); }