virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; Resources remaining = offer.resources(); static Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify<float>(CPUS_PER_TASK) + ";mem:" + stringify<size_t>(MEM_PER_TASK)).get(); size_t maxTasks = 0; while (remaining.flatten().contains(TASK_RESOURCES)) { maxTasks++; remaining -= TASK_RESOURCES; } // Launch tasks. vector<TaskInfo> tasks; for (size_t i = 0; i < maxTasks / 2 && crawlQueue.size() > 0; i++) { string url = crawlQueue.front(); crawlQueue.pop(); string urlId = "C" + stringify<size_t>(processed[url]); TaskInfo task; task.set_name("Crawler " + urlId); task.mutable_task_id()->set_value(urlId); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(crawler); task.mutable_resources()->MergeFrom(TASK_RESOURCES); task.set_data(url); tasks.push_back(task); tasksLaunched++; cout << "Crawler " << urlId << " " << url << endl; } for (size_t i = maxTasks/2; i < maxTasks && renderQueue.size() > 0; i++) { string url = renderQueue.front(); renderQueue.pop(); string urlId = "R" + stringify<size_t>(processed[url]); TaskInfo task; task.set_name("Renderer " + urlId); task.mutable_task_id()->set_value(urlId); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(renderer); task.mutable_resources()->MergeFrom(TASK_RESOURCES); task.set_data(url); tasks.push_back(task); tasksLaunched++; cout << "Renderer " << urlId << " " << url << endl; } driver->launchTasks(offer.id(), tasks); } }
// This test checks that a scheduler exit shuts down the executor. TEST_F(FaultToleranceTest, SchedulerExit) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); AWAIT_READY(offers); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { cout << "." << flush; for (int i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; // Lookup resources we care about. // TODO(benh): It would be nice to ultimately have some helper // functions for looking up resources. double cpus = 0; double mem = 0; for (int i = 0; i < offer.resources_size(); i++) { const Resource& resource = offer.resources(i); if (resource.name() == "cpus" && resource.type() == Value::SCALAR) { cpus = resource.scalar().value(); } else if (resource.name() == "mem" && resource.type() == Value::SCALAR) { mem = resource.scalar().value(); } } // Launch tasks (only one per offer). vector<TaskInfo> tasks; if (cpus >= CPUS_PER_TASK && mem >= MEM_PER_TASK) { int taskId = tasksLaunched++; cout << "Starting task " << taskId << " on " << offer.hostname() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value(lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Resource* resource; resource = task.add_resources(); resource->set_name("cpus"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(CPUS_PER_TASK); resource = task.add_resources(); resource->set_name("mem"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(MEM_PER_TASK); tasks.push_back(task); cpus -= CPUS_PER_TASK; mem -= MEM_PER_TASK; } driver->launchTasks(offer.id(), tasks); } }
TEST_F(ResourceOffersTest, TaskUsesMoreResourcesThanOffered) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); Resource* cpus = task.add_resources(); cpus->set_name("cpus"); cpus->set_type(Value::SCALAR); cpus->mutable_scalar()->set_value(2.01); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_LOST, status.get().state()); EXPECT_TRUE(status.get().has_message()); EXPECT_EQ("Task uses more resources than offered", status.get().message()); driver.stop(); driver.join(); Shutdown(); }
void resourceOffers(const vector<Offer>& offers) { foreach (const Offer& offer, offers) { cout << "Received offer " << offer.id() << " with " << Resources(offer.resources()) << endl; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && remaining.flatten().contains(TASK_RESOURCES)) { int taskId = tasksLaunched++; cout << "Launching task " << taskId << " using offer " << offer.id() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value( lexical_cast<string>(taskId)); task.mutable_agent_id()->MergeFrom(offer.agent_id()); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES.flatten(framework.role())); CHECK_SOME(resources); task.mutable_resources()->CopyFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } Call call; CHECK(framework.has_id()); call.mutable_framework_id()->CopyFrom(framework.id()); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); Offer::Operation* operation = accept->add_operations(); operation->set_type(Offer::Operation::LAUNCH); foreach (const TaskInfo& taskInfo, tasks) { operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); }
// TODO(benh): Move this into utils, make more generic, and use in // other tests. vector<TaskInfo> createTasks(const Offer& offer) { TaskInfo task; task.set_name("test-task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->MergeFrom(offer.resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); return tasks; }
virtual void resourceOffers(SchedulerDriver* driver, const std::vector<Offer>& offers) { std::cout << "Resource offers received" << std::endl; for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; // We just launch one task. if (!taskLaunched) { double mem = getScalarResource(offer, "mem"); assert(mem > EXECUTOR_MEMORY_MB); std::vector<TaskInfo> tasks; std::cout << "Starting the task" << std::endl; TaskInfo task; task.set_name("Balloon Task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); task.set_data(stringify<size_t>(balloonLimit)); // Use up all the memory from the offer. Resource* resource; resource = task.add_resources(); resource->set_name("mem"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(mem - EXECUTOR_MEMORY_MB); // And all the CPU. double cpus = getScalarResource(offer, "cpus"); resource = task.add_resources(); resource->set_name("cpus"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(cpus); tasks.push_back(task); driver->launchTasks(offer.id(), tasks); taskLaunched = true; } } }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { foreach (const Offer& offer, offers) { cout << "Received offer " << offer.id() << " with " << offer.resources() << endl; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && remaining.flatten().contains(TASK_RESOURCES)) { int taskId = tasksLaunched++; cout << "Launching task " << taskId << " using offer " << offer.id() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value(lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Try<Resources> flattened = TASK_RESOURCES.flatten(role); CHECK_SOME(flattened); Option<Resources> resources = remaining.find(flattened.get()); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } driver->launchTasks(offer.id(), tasks); }
// For use with a MockScheduler, for example: // EXPECT_CALL(sched, resourceOffers(_, _)) // .WillOnce(LaunchTasks(TASKS, CPUS, MEM)); // Launches up to TASKS no-op tasks, if possible, // each with CPUS cpus and MEM memory. ACTION_P4(LaunchTasks, tasks, cpus, mem, role) { SchedulerDriver* driver = arg0; std::vector<Offer> offers = arg1; int numTasks = tasks; int launched = 0; for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(cpus) + ";mem:" + stringify(mem)).get(); int nextTaskId = 0; std::vector<TaskInfo> tasks; Resources remaining = offer.resources(); while (TASK_RESOURCES <= remaining.flatten() && launched < numTasks) { TaskInfo task; task.set_name("TestTask"); task.mutable_task_id()->set_value(stringify(nextTaskId++)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.mutable_command()->set_value(":"); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES, role); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); launched++; } driver->launchTasks(offer.id(), tasks); } }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { cout << "." << flush; for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && TASK_RESOURCES <= remaining.flatten()) { int taskId = tasksLaunched++; cout << "Starting task " << taskId << " on " << offer.hostname() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value(lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES, role); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } driver->launchTasks(offer.id(), tasks); } }
inline TaskInfo createTask( const Offer& offer, const std::string& command, const Option<mesos::ExecutorID>& executorId = None(), const std::string& name = "test-task", const std::string& id = UUID::random().toString()) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(id); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(offer.resources()); if (executorId.isSome()) { ExecutorInfo executor; executor.mutable_executor_id()->CopyFrom(executorId.get()); executor.mutable_command()->set_value(command); task.mutable_executor()->CopyFrom(executor); } else { task.mutable_command()->set_value(command); } return task; }
// This test verifies that when the slave reregisters, the master // does not send TASK_LOST update for a task that has reached terminal // state but is waiting for an acknowledgement. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTerminalTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); // Drop the status update from slave to the master, so that // the slave has a pending terminal update when it reregisters. DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(_, &Slave::_statusUpdate); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore retried update due to update framework. driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(_statusUpdate); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); // The master should not send a TASK_LOST after the slave // reregisters. We check this by calling Clock::settle() so that // the only update the scheduler receives is the retried // TASK_FINISHED update. // NOTE: The task status update manager resends the status update // when it detects a new master. Clock::pause(); Clock::settle(); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status->state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test checks that a failover scheduler gets the // retried status update. TEST_F(FaultToleranceTest, SchedulerFailoverStatusUpdate) { Clock::pause(); Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); // Launch the first (i.e., failing) scheduler. MockScheduler sched1; MesosSchedulerDriver driver1(&sched1, DEFAULT_FRAMEWORK_INFO, master.get()); FrameworkID frameworkId; EXPECT_CALL(sched1, registered(&driver1, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver1.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop the first status update message // between master and the scheduler. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), _, Not(AnyOf(Eq(master.get()), Eq(slave.get())))); driver1.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(statusUpdateMessage); // Now launch the second (i.e., failover) scheduler using the // framework id recorded from the first scheduler and wait until it // registers. MockScheduler sched2; FrameworkInfo framework2; // Bug in gcc 4.1.*, must assign on next line. framework2 = DEFAULT_FRAMEWORK_INFO; framework2.mutable_id()->MergeFrom(frameworkId); MesosSchedulerDriver driver2(&sched2, framework2, master.get()); Future<Nothing> registered2; EXPECT_CALL(sched2, registered(&driver2, frameworkId, _)) .WillOnce(FutureSatisfy(®istered2)); // Scheduler1 should get an error due to failover. EXPECT_CALL(sched1, error(&driver1, "Framework failed over")); driver2.start(); AWAIT_READY(registered2); // Now advance time enough for the reliable timeout // to kick in and another status update is sent. Future<Nothing> statusUpdate; EXPECT_CALL(sched2, statusUpdate(&driver2, _)) .WillOnce(FutureSatisfy(&statusUpdate)); Clock::advance(STATUS_UPDATE_RETRY_INTERVAL); AWAIT_READY(statusUpdate); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver1.stop(); driver2.stop(); driver1.join(); driver2.join(); Shutdown(); Clock::resume(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send exited executor messages, // we send a ShutdownMessage to the slave. Why? Because during a // network partition, the master will remove a partitioned slave, thus // sending its tasks to LOST. At this point, when the partition is // removed, the slave may attempt to send exited executor messages if // it was unaware that the master removed it. We've already // notified frameworks that the tasks under the executors were LOST, // so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveExitedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId));\ Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This allows us to have the slave send an // ExitedExecutorMessage. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop all the status updates from the slave, so that we can // ensure the ExitedExecutorMessage is what triggers the slave // shutdown. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()); driver.launchTasks(offers.get()[0].id(), tasks); // Drop the first shutdown message from the master (simulated // partition) and allow the second shutdown message to pass when // triggered by the ExitedExecutorMessage. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Induce an ExitedExecutorMessage from the slave. containerizer.destroy( frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id()); // Upon receiving the message, the master will shutdown the slave. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that when the slave reregisters, we correctly // send the information about actively running frameworks. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterFrameworks) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); // Send an update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore retried update due to update framework. driver.launchTasks(offers.get()[0].id(), {task}); // Wait until TASK_RUNNING of the task is received. AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status->state()); Future<ReregisterSlaveMessage> reregisterSlave = FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); // Expect to receive the 'ReregisterSlaveMessage' containing the // active frameworks. AWAIT_READY(reregisterSlave); EXPECT_EQ(1, reregisterSlave->frameworks().size()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that an authorized task launch is successful. TEST_F(MasterAuthorizationTest, AuthorizedTask) { // Setup ACLs so that the framework can launch tasks as "foo". ACLs acls; mesos::ACL::RunTasks* acl = acls.add_run_tasks(); acl->mutable_principals()->add_values(DEFAULT_FRAMEWORK_INFO.principal()); acl->mutable_users()->add_values("foo"); master::Flags flags = CreateMasterFlags(); flags.acls = acls; Try<PID<Master> > master = StartMaster(flags); ASSERT_SOME(master); // Create an authorized executor. ExecutorInfo executor; // Bug in gcc 4.1.*, must assign on next line. executor = CREATE_EXECUTOR_INFO("test-executor", "exit 1"); executor.mutable_command()->set_user("foo"); MockExecutor exec(executor.executor_id()); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Create an authorized task. TaskInfo task; task.set_name("test"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(executor); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); StandaloneMasterDetector detector(master.get()); Try<PID<Slave> > slave = StartSlave(&exec, &detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Have the slave re-register with the master. detector.appoint(master.get()); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); Shutdown(); }
TEST_F(ResourceOffersTest, ResourcesGetReofferedAfterTaskInfoError) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); MockScheduler sched1; MesosSchedulerDriver driver1( &sched1, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched1, registered(&driver1, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver1.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); Resource* cpus = task.add_resources(); cpus->set_name("cpus"); cpus->set_type(Value::SCALAR); cpus->mutable_scalar()->set_value(-1); Resource* mem = task.add_resources(); mem->set_name("mem"); mem->set_type(Value::SCALAR); mem->mutable_scalar()->set_value(static_cast<double>(Gigabytes(1).bytes())); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched1, statusUpdate(&driver1, _)) .WillOnce(FutureArg<1>(&status)); driver1.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status->task_id()); EXPECT_EQ(TASK_ERROR, status->state()); EXPECT_EQ(TaskStatus::REASON_TASK_INVALID, status->reason()); EXPECT_TRUE(status->has_message()); EXPECT_TRUE(strings::contains(status->message(), "Invalid scalar resource")) << status->message(); MockScheduler sched2; MesosSchedulerDriver driver2( &sched2, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched2, registered(&driver2, _, _)); EXPECT_CALL(sched2, resourceOffers(&driver2, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver2.start(); AWAIT_READY(offers); driver1.stop(); driver1.join(); driver2.stop(); driver2.join(); }
TEST_F(FaultToleranceTest, TaskLost) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. Future<process::Message> message = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); AWAIT_READY(message); Future<Nothing> disconnected; EXPECT_CALL(sched, disconnected(&driver)) .WillOnce(FutureSatisfy(&disconnected)); // Simulate a spurious noMasterDetected event at the scheduler. process::post(message.get().to, NoMasterDetectedMessage()); AWAIT_READY(disconnected); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_LOST, status.get().state()); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the master reconciles tasks that are // missing from a reregistering slave. In this case, we trigger // a race between the slave re-registration message and the launch // message. There should be no TASK_LOST / TASK_DROPPED. // This was motivated by MESOS-1696. TEST_F(MasterSlaveReconciliationTest, ReconcileRace) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get()->pid, _); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Since the agent may have retried registration, we want to // ensure that any duplicate registrations are flushed before // we appoint the master again. Otherwise, the agent may // receive a stale registration message. Clock::pause(); Clock::settle(); Clock::resume(); // Trigger a re-registration of the slave and capture the message // so that we can spoof a race with a launch task message. DROP_PROTOBUFS(ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); Future<ReregisterSlaveMessage> reregisterSlaveMessage = DROP_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); detector.appoint(master.get()->pid); AWAIT_READY(reregisterSlaveMessage); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); ExecutorDriver* executorDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&executorDriver)); // Leave the task in TASK_STAGING. Future<Nothing> launchTask; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(FutureSatisfy(&launchTask)); EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(launchTask); // Send the stale re-registration message, which does not contain // the task we just launched. This will trigger a reconciliation // by the master. Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Prevent this from being dropped per the DROP_PROTOBUFS above. FUTURE_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); process::post( slave.get()->pid, master.get()->pid, reregisterSlaveMessage.get()); AWAIT_READY(slaveReregisteredMessage); // Neither the master nor the slave should send a TASK_LOST // as part of the reconciliation. We check this by calling // Clock::settle() to flush all pending events. Clock::pause(); Clock::settle(); Clock::resume(); // Now send TASK_FINISHED and make sure it's the only message // received by the scheduler. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); TaskStatus taskStatus; taskStatus.mutable_task_id()->CopyFrom(task.task_id()); taskStatus.set_state(TASK_FINISHED); executorDriver->sendStatusUpdate(taskStatus); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status->state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
TEST_F(FaultToleranceTest, ForwardStatusUpdateUnknownExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); FrameworkID frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->MergeFrom(offer.resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<Nothing> statusUpdate; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureSatisfy(&statusUpdate)); // TASK_RUNNING of task1. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); driver.launchTasks(offer.id(), tasks); // Wait until TASK_RUNNING of task1 is received. AWAIT_READY(statusUpdate); // Simulate the slave receiving status update from an unknown // (e.g. exited) executor of the given framework. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // TASK_RUNNING of task2. TaskID taskId; taskId.set_value("task2"); StatusUpdate statusUpdate2 = createStatusUpdate( frameworkId, offer.slave_id(), taskId, TASK_RUNNING, "Dummy update"); process::dispatch(slave.get(), &Slave::statusUpdate, statusUpdate2); // Ensure that the scheduler receives task2's update. AWAIT_READY(status); EXPECT_EQ(taskId, status.get().task_id()); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// Test executor environment decorator hook and remove executor hook // for slave. We expect the environment-decorator hook to create a // temporary file and the remove-executor hook to delete that file. TEST_F(HookTest, VerifySlaveLaunchExecutorHook) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); slave::Flags slaveFlags = CreateSlaveFlags(); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); task.mutable_executor()->CopyFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Executor shutdown would force the Slave to execute the // remove-executor hook. EXPECT_CALL(exec, shutdown(_)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // On successful completion of the "slaveLaunchExecutorHook", the // test hook will send a HookExecuted message to itself. We wait // until that message is intercepted by the testing infrastructure. Future<HookExecuted> hookFuture = FUTURE_PROTOBUF(HookExecuted(), _, _); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(status); driver.stop(); driver.join(); // The scheduler shutdown from above forces the executor to // shutdown. This in turn should force the Slave to execute // the remove-executor hook. // Here, we wait for the hook to finish execution. AWAIT_READY(hookFuture); }
// This test ensures that a killTask() can happen between runTask() // and _runTask() and then gets "handled properly". This means that // the task never gets started, but also does not get lost. The end // result is status TASK_KILLED. Essentially, killing the task is // realized while preparing to start it. See MESOS-947. // Temporarily disabled due to MESOS-1945. TEST_F(SlaveTest, DISABLED_KillTaskBetweenRunTaskParts) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()); MockSlave slave(CreateSlaveFlags(), &detector, &containerizer); process::spawn(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(0); EXPECT_CALL(exec, launchTask(_, _)) .Times(0); EXPECT_CALL(exec, shutdown(_)) .Times(0); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillRepeatedly(FutureArg<1>(&status)); EXPECT_CALL(slave, runTask(_, _, _, _, _)) .WillOnce(Invoke(&slave, &MockSlave::unmocked_runTask)); // Saved arguments from Slave::_runTask(). Future<bool> future; FrameworkInfo frameworkInfo; FrameworkID frameworkId; // Skip what Slave::_runTask() normally does, save its arguments for // later, tie reaching the critical moment when to kill the task to // a future. Future<Nothing> _runTask; EXPECT_CALL(slave, _runTask(_, _, _, _, _)) .WillOnce(DoAll(FutureSatisfy(&_runTask), SaveArg<0>(&future), SaveArg<1>(&frameworkInfo), SaveArg<2>(&frameworkId))); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(_runTask); Future<Nothing> killTask; EXPECT_CALL(slave, killTask(_, _, _)) .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_killTask), FutureSatisfy(&killTask))); driver.killTask(task.task_id()); // Since this is the only task ever for this framework, the // framework should get removed in Slave::_runTask(). // Thus we can observe that this happens before Shutdown(). Future<Nothing> removeFramework; EXPECT_CALL(slave, removeFramework(_)) .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_removeFramework), FutureSatisfy(&removeFramework))); AWAIT_READY(killTask); slave.unmocked__runTask( future, frameworkInfo, frameworkId, master.get(), task); AWAIT_READY(removeFramework); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); process::terminate(slave); process::wait(slave); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
Future<ExecutorInfo> ExternalContainerizerProcess::launch( const ContainerID& containerId, const TaskInfo& taskInfo, const FrameworkID& frameworkId, const std::string& directory, const Option<std::string>& user, const SlaveID& slaveId, const PID<Slave>& slavePid, bool checkpoint) { LOG(INFO) << "Launching container '" << containerId << "'"; // Get the executor from our task. If no executor is associated with // the given task, this function renders an ExecutorInfo using the // mesos-executor as its command. ExecutorInfo executor = containerExecutorInfo(flags, taskInfo, frameworkId); executor.mutable_resources()->MergeFrom(taskInfo.resources()); if (containers.contains(containerId)) { return Failure("Cannot start already running container '" + containerId.value() + "'"); } sandboxes.put(containerId, Owned<Sandbox>(new Sandbox(directory, user))); map<string, string> environment = executorEnvironment( executor, directory, slaveId, slavePid, checkpoint, flags.recovery_timeout); if (!flags.hadoop_home.empty()) { environment["HADOOP_HOME"] = flags.hadoop_home; } TaskInfo task; task.CopyFrom(taskInfo); CommandInfo* command = task.has_executor() ? task.mutable_executor()->mutable_command() : task.mutable_command(); // When the selected command has no container attached, use the // default from the slave startup flags, if available. if (!command->has_container()) { if (flags.default_container_image.isSome()) { command->mutable_container()->set_image( flags.default_container_image.get()); } else { LOG(INFO) << "No container specified in task and no default given. " << "The external containerizer will have to fill in " << "defaults."; } } ExternalTask external; external.mutable_task()->CopyFrom(task); external.set_mesos_executor_path( path::join(flags.launcher_dir, "mesos-executor")); stringstream output; external.SerializeToOstream(&output); Try<Subprocess> invoked = invoke( "launch", containerId, output.str(), environment); if (invoked.isError()) { return Failure("Launch of container '" + containerId.value() + "' failed (error: " + invoked.error() + ")"); } // Record the process. containers.put( containerId, Owned<Container>(new Container(invoked.get().pid()))); VLOG(2) << "Now awaiting data from pipe..."; // Read from the result-pipe and invoke callbacks when reaching EOF. return await(read(invoked.get().out()), invoked.get().status()) .then(defer( PID<ExternalContainerizerProcess>(this), &ExternalContainerizerProcess::_launch, containerId, frameworkId, executor, slaveId, checkpoint, lambda::_1)); }
// This test verifies that authorization based endpoint filtering // works correctly on the /state endpoint. // Both default users are allowed to view high level frameworks, but only // one is allowed to view the tasks. // After launching a single task per each framework, one for role "superhero" // and the other for role "muggle", this test verifies that each of two // default users can view resource allocations and resource reservations for // corresponding allowed roles only. TYPED_TEST(SlaveAuthorizerTest, FilterStateEndpoint) { ACLs acls; const string roleSuperhero = "superhero"; const string roleMuggle = "muggle"; { // Default principal can see all frameworks. mesos::ACL::ViewFramework* acl = acls.add_view_frameworks(); acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal()); acl->mutable_users()->set_type(ACL::Entity::ANY); } { // Second default principal can see all frameworks. mesos::ACL::ViewFramework* acl = acls.add_view_frameworks(); acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL_2.principal()); acl->mutable_users()->set_type(ACL::Entity::ANY); } { // No other principal can see frameworks running under any user. ACL::ViewFramework* acl = acls.add_view_frameworks(); acl->mutable_principals()->set_type(ACL::Entity::ANY); acl->mutable_users()->set_type(ACL::Entity::NONE); } { // Default principal can see all executors. mesos::ACL::ViewExecutor* acl = acls.add_view_executors(); acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal()); acl->mutable_users()->set_type(ACL::Entity::ANY); } { // No other principal can see executors running under any user. ACL::ViewExecutor* acl = acls.add_view_executors(); acl->mutable_principals()->set_type(ACL::Entity::ANY); acl->mutable_users()->set_type(ACL::Entity::NONE); } { // Default principal can see all tasks. mesos::ACL::ViewTask* acl = acls.add_view_tasks(); acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal()); acl->mutable_users()->set_type(ACL::Entity::ANY); } { // No other principal can see tasks running under any user. ACL::ViewTask* acl = acls.add_view_tasks(); acl->mutable_principals()->set_type(ACL::Entity::ANY); acl->mutable_users()->set_type(ACL::Entity::NONE); } { // Default principal can view "superhero" role only. ACL::ViewRole* acl = acls.add_view_roles(); acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal()); acl->mutable_roles()->add_values(roleSuperhero); acl = acls.add_view_roles(); acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal()); acl->mutable_roles()->set_type(mesos::ACL::Entity::NONE); } { // Second default principal can view "muggle" role only. ACL::ViewRole* acl = acls.add_view_roles(); acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL_2.principal()); acl->mutable_roles()->add_values(roleMuggle); acl = acls.add_view_roles(); acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL_2.principal()); acl->mutable_roles()->set_type(mesos::ACL::Entity::NONE); } // Create an `Authorizer` with the ACLs. Try<Authorizer*> create = TypeParam::create(parameterize(acls)); ASSERT_SOME(create); Owned<Authorizer> authorizer(create.get()); Try<Owned<cluster::Master>> master = this->StartMaster(authorizer.get()); ASSERT_SOME(master); // Register framework with user "bar" and role "superhero". FrameworkInfo frameworkSuperhero = DEFAULT_FRAMEWORK_INFO; frameworkSuperhero.set_name("framework-" + roleSuperhero); frameworkSuperhero.set_roles(0, roleSuperhero); frameworkSuperhero.set_user("bar"); // Create an executor with user "bar". ExecutorInfo executorSuperhero = createExecutorInfo("test-executor-" + roleSuperhero, "sleep 2"); executorSuperhero.mutable_command()->set_user("bar"); MockExecutor execSuperhero(executorSuperhero.executor_id()); // Register framework with user "foo" and role "muggle". FrameworkInfo frameworkMuggle = DEFAULT_FRAMEWORK_INFO; frameworkMuggle.set_name("framework-" + roleMuggle); frameworkMuggle.set_principal(DEFAULT_CREDENTIAL_2.principal()); frameworkMuggle.set_roles(0, roleMuggle); frameworkMuggle.set_user("foo"); // Create an executor with user "foo". ExecutorInfo executorMuggle = createExecutorInfo("test-executor-" + roleMuggle, "sleep 2"); executorMuggle.mutable_command()->set_user("foo"); MockExecutor execMuggle(executorMuggle.executor_id()); TestContainerizer containerizer( {{executorSuperhero.executor_id(), &execSuperhero}, {executorMuggle.executor_id(), &execMuggle}}); slave::Flags flags = this->CreateSlaveFlags(); // Statically reserve resources for each role. flags.resources = "cpus(" + roleSuperhero + "):2;" + "cpus(" + roleMuggle + "):3;mem(" + roleSuperhero + "):512;" + "mem(" + roleMuggle + "):1024;"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = this->StartSlave( detector.get(), &containerizer, authorizer.get(), flags); ASSERT_SOME(slave); MockScheduler schedSuperhero; MesosSchedulerDriver driverSuperhero( &schedSuperhero, frameworkSuperhero, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(execSuperhero, registered(_, _, _, _)) .Times(AtMost(1)); Future<FrameworkID> frameworkIdSuperhero; EXPECT_CALL(schedSuperhero, registered(&driverSuperhero, _, _)) .WillOnce(FutureArg<1>(&frameworkIdSuperhero)); Future<vector<Offer>> offersSuperhero; EXPECT_CALL(schedSuperhero, resourceOffers(&driverSuperhero, _)) .WillOnce(FutureArg<1>(&offersSuperhero)) .WillRepeatedly(Return()); // Ignore subsequent offers. driverSuperhero.start(); AWAIT_READY(frameworkIdSuperhero); AWAIT_READY(offersSuperhero); ASSERT_FALSE(offersSuperhero->empty()); // Define a task which will run on executorSuperhero of frameworkSuperhero. TaskInfo taskSuperhero; taskSuperhero.set_name("test-" + roleSuperhero); taskSuperhero.mutable_task_id()->set_value("1"); taskSuperhero.mutable_slave_id()->MergeFrom( offersSuperhero.get()[0].slave_id()); taskSuperhero.mutable_resources()->MergeFrom( offersSuperhero.get()[0].resources()); taskSuperhero.mutable_executor()->MergeFrom(executorSuperhero); EXPECT_CALL(execSuperhero, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)) .WillRepeatedly(Return()); Future<TaskStatus> statusSuperhero; EXPECT_CALL(schedSuperhero, statusUpdate(&driverSuperhero, _)) .WillOnce(FutureArg<1>(&statusSuperhero)); driverSuperhero.launchTasks(offersSuperhero.get()[0].id(), {taskSuperhero}); AWAIT_READY(statusSuperhero); EXPECT_EQ(TASK_RUNNING, statusSuperhero->state()); MockScheduler schedMuggle; MesosSchedulerDriver driverMuggle( &schedMuggle, frameworkMuggle, master.get()->pid, DEFAULT_CREDENTIAL_2); EXPECT_CALL(execMuggle, registered(_, _, _, _)) .Times(AtMost(1)); Future<FrameworkID> frameworkIdMuggle; EXPECT_CALL(schedMuggle, registered(&driverMuggle, _, _)) .WillOnce(FutureArg<1>(&frameworkIdMuggle)); Future<vector<Offer>> offersMuggle; EXPECT_CALL(schedMuggle, resourceOffers(&driverMuggle, _)) .WillOnce(FutureArg<1>(&offersMuggle)) .WillRepeatedly(Return()); // Ignore subsequent offers. driverMuggle.start(); AWAIT_READY(frameworkIdMuggle); AWAIT_READY(offersMuggle); ASSERT_FALSE(offersMuggle->empty()); // Define a task which will run on executorMuggle of frameworkMuggle. TaskInfo taskMuggle; taskMuggle.set_name("test-" + roleMuggle); taskMuggle.mutable_task_id()->set_value("2"); taskMuggle.mutable_slave_id()->MergeFrom( offersMuggle.get()[0].slave_id()); taskMuggle.mutable_resources()->MergeFrom( offersMuggle.get()[0].resources()); taskMuggle.mutable_executor()->MergeFrom(executorMuggle); EXPECT_CALL(execMuggle, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)) .WillRepeatedly(Return()); Future<TaskStatus> statusMuggle; EXPECT_CALL(schedMuggle, statusUpdate(&driverMuggle, _)) .WillOnce(FutureArg<1>(&statusMuggle)); driverMuggle.launchTasks(offersMuggle.get()[0].id(), {taskMuggle}); AWAIT_READY(statusMuggle); ASSERT_EQ(TASK_RUNNING, statusMuggle->state()); // Retrieve endpoint with the user allowed to view the frameworks. // The default user allowed to view role "superhero" only. { Future<Response> response = http::get( slave.get()->pid, "state", None(), createBasicAuthHeaders(DEFAULT_CREDENTIAL)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body); ASSERT_SOME(parse); JSON::Object state = parse.get(); ASSERT_TRUE(state.values["frameworks"].is<JSON::Array>()); JSON::Array frameworks = state.values["frameworks"].as<JSON::Array>(); EXPECT_EQ(2u, frameworks.values.size()); foreach (const JSON::Value& value, frameworks.values) { JSON::Object framework = value.as<JSON::Object>(); EXPECT_FALSE(framework.values.empty()); ASSERT_TRUE(framework.values["executors"].is<JSON::Array>()); JSON::Array executors = framework.values["executors"].as<JSON::Array>(); EXPECT_EQ(1u, executors.values.size()); JSON::Object executor = executors.values.front().as<JSON::Object>(); EXPECT_EQ(1u, executor.values["tasks"].as<JSON::Array>().values.size()); } ASSERT_TRUE(state.values["reserved_resources"].is<JSON::Object>()); JSON::Object reserved_resources = state.values["reserved_resources"].as<JSON::Object>(); EXPECT_TRUE(reserved_resources.values[roleSuperhero].is<JSON::Object>()); EXPECT_FALSE(reserved_resources.values[roleMuggle].is<JSON::Object>()); ASSERT_TRUE( state.values["reserved_resources_allocated"].is<JSON::Object>()); JSON::Object reserved_resources_allocated = state.values["reserved_resources_allocated"].as<JSON::Object>(); EXPECT_TRUE( reserved_resources_allocated.values[roleSuperhero].is<JSON::Object>()); EXPECT_FALSE( reserved_resources_allocated.values[roleMuggle].is<JSON::Object>()); ASSERT_TRUE(state.values["reserved_resources_full"].is<JSON::Object>()); JSON::Object reserved_resources_full = state.values["reserved_resources_full"].as<JSON::Object>(); EXPECT_TRUE( reserved_resources_full.values[roleSuperhero].is<JSON::Array>()); EXPECT_FALSE( reserved_resources_full.values[roleMuggle].is<JSON::Array>()); } // Retrieve endpoint with the user allowed to view the frameworks, // but not the executors. // The second default user allowed to view role "muggle" only. { Future<Response> response = http::get( slave.get()->pid, "state", None(), createBasicAuthHeaders(DEFAULT_CREDENTIAL_2)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body); ASSERT_SOME(parse); JSON::Object state = parse.get(); ASSERT_TRUE(state.values["frameworks"].is<JSON::Array>()); JSON::Array frameworks = state.values["frameworks"].as<JSON::Array>(); EXPECT_EQ(2u, frameworks.values.size()); foreach (const JSON::Value& value, frameworks.values) { JSON::Object framework = value.as<JSON::Object>(); EXPECT_FALSE(framework.values.empty()); EXPECT_TRUE( framework.values["executors"].as<JSON::Array>().values.empty()); } ASSERT_TRUE(state.values["reserved_resources"].is<JSON::Object>()); JSON::Object reserved_resources = state.values["reserved_resources"].as<JSON::Object>(); EXPECT_TRUE(reserved_resources.values[roleMuggle].is<JSON::Object>()); EXPECT_FALSE(reserved_resources.values[roleSuperhero].is<JSON::Object>()); ASSERT_TRUE( state.values["reserved_resources_allocated"].is<JSON::Object>()); JSON::Object reserved_resources_allocated = state.values["reserved_resources_allocated"].as<JSON::Object>(); EXPECT_TRUE( reserved_resources_allocated.values[roleMuggle].is<JSON::Object>()); EXPECT_FALSE( reserved_resources_allocated.values[roleSuperhero].is<JSON::Object>()); ASSERT_TRUE(state.values["reserved_resources_full"].is<JSON::Object>()); JSON::Object reserved_resources_full = state.values["reserved_resources_full"].as<JSON::Object>(); EXPECT_TRUE( reserved_resources_full.values[roleMuggle].is<JSON::Array>()); EXPECT_FALSE( reserved_resources_full.values[roleSuperhero].is<JSON::Array>()); } EXPECT_CALL(execSuperhero, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(execMuggle, shutdown(_)) .Times(AtMost(1)); driverSuperhero.stop(); driverSuperhero.join(); driverMuggle.stop(); driverMuggle.join(); }
// This test ensures that when explicit acknowledgements are enabled, // acknowledgements for master-generated updates are dropped by the // driver. We test this by creating an invalid task that uses no // resources. TEST_F(MesosSchedulerDriverTest, ExplicitAcknowledgementsMasterGeneratedUpdate) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), false, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. // Ensure no status update acknowledgements are sent to the master. EXPECT_NO_FUTURE_CALLS( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _ , master.get()); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task using no resources. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); ASSERT_EQ(TASK_ERROR, status.get().state()); ASSERT_EQ(TaskStatus::SOURCE_MASTER, status.get().source()); ASSERT_EQ(TaskStatus::REASON_TASK_INVALID, status.get().reason()); // Now send the acknowledgement. driver.acknowledgeStatusUpdate(status.get()); // Settle the clock to ensure driver processes the acknowledgement, // which should get dropped due to having come from the master. Clock::pause(); Clock::settle(); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the slave run task label decorator can add // and remove labels from a task during the launch sequence. A task // with two labels ("foo":"bar" and "bar":"baz") is launched and will // get modified by the slave hook to strip the "foo":"bar" pair and // add a new "baz":"qux" pair. TEST_F(HookTest, VerifySlaveRunTaskHook) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); task.mutable_executor()->CopyFrom(DEFAULT_EXECUTOR_INFO); // Add two labels: (1) will be removed by the hook to ensure that // runTaskHook can remove labels (2) will be preserved to ensure // that the framework can add labels to the task and have those be // available by the end of the launch task sequence when hooks are // used (to protect against hooks removing labels completely). Labels* labels = task.mutable_labels(); labels->add_labels()->CopyFrom(createLabel("foo", "bar")); labels->add_labels()->CopyFrom(createLabel("bar", "baz")); EXPECT_CALL(exec, registered(_, _, _, _)); Future<TaskInfo> taskInfo; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(DoAll( FutureArg<1>(&taskInfo), SendStatusUpdateFromTask(TASK_RUNNING))); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(taskInfo); // The master hook will hang an extra label off. const Labels& labels_ = taskInfo.get().labels(); ASSERT_EQ(3, labels_.labels_size()); // The slave run task hook will prepend a new "baz":"qux" label. EXPECT_EQ("baz", labels_.labels(0).key()); EXPECT_EQ("qux", labels_.labels(0).value()); // Master launch task hook will still hang off test label. EXPECT_EQ(testLabelKey, labels_.labels(1).key()); EXPECT_EQ(testLabelValue, labels_.labels(1).value()); // And lastly, we only expect the "foo":"bar" pair to be stripped by // the module. The last pair should be the original "bar":"baz" // pair set by the test. EXPECT_EQ("bar", labels_.labels(2).key()); EXPECT_EQ("baz", labels_.labels(2).value()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that the master reconciles tasks that are // missing from a re-registering slave. In this case, we drop the // RunTaskMessage so the slave should send TASK_LOST. TEST_F(MasterSlaveReconciliationTest, ReconcileLostTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); // We now launch a task and drop the corresponding RunTaskMessage on // the slave, to ensure that only the master knows about this task. Future<RunTaskMessage> runTaskMessage = DROP_PROTOBUF(RunTaskMessage(), _, _); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(runTaskMessage); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); // Make sure the slave generated the TASK_LOST. AWAIT_READY(statusUpdateMessage); AWAIT_READY(status); ASSERT_EQ(task.task_id(), status.get().task_id()); ASSERT_EQ(TASK_LOST, status.get().state()); // Before we obtain the metrics, ensure that the master has finished // processing the status update so metrics have been updated. Clock::pause(); Clock::settle(); Clock::resume(); // Check metrics. JSON::Object stats = Metrics(); EXPECT_EQ(1u, stats.values.count("master/tasks_lost")); EXPECT_EQ(1u, stats.values["master/tasks_lost"]); EXPECT_EQ( 1u, stats.values.count( "master/task_lost/source_slave/reason_reconciliation")); EXPECT_EQ( 1u, stats.values["master/task_lost/source_slave/reason_reconciliation"]); driver.stop(); driver.join(); }
// This test verifies that when an executor terminates before // registering with slave, it is properly cleaned up. TEST_F(SlaveTest, RemoveUnregisteredTerminatedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); // Drop the registration message from the executor to the slave. Future<process::Message> registerExecutorMessage = DROP_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(registerExecutorMessage); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Now kill the executor. containerizer.destroy(offers.get()[0].framework_id(), DEFAULT_EXECUTOR_ID); AWAIT_READY(status); EXPECT_EQ(TASK_LOST, status.get().state()); // We use 'gc.schedule' as a signal for the executor being cleaned // up by the slave. AWAIT_READY(schedule); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
TEST_F(ResourceOffersTest, ResourcesGetReofferedAfterTaskInfoError) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched1; MesosSchedulerDriver driver1( &sched1, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched1, registered(&driver1, _, _)) .Times(1); Future<vector<Offer>> offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver1.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); Resource* cpus = task.add_resources(); cpus->set_name("cpus"); cpus->set_type(Value::SCALAR); cpus->mutable_scalar()->set_value(-1); Resource* mem = task.add_resources(); mem->set_name("mem"); mem->set_type(Value::SCALAR); mem->mutable_scalar()->set_value(Gigabytes(1).bytes()); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched1, statusUpdate(&driver1, _)) .WillOnce(FutureArg<1>(&status)); driver1.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_ERROR, status.get().state()); EXPECT_EQ(TaskStatus::REASON_TASK_INVALID, status.get().reason()); EXPECT_TRUE(status.get().has_message()); EXPECT_TRUE(strings::startsWith( status.get().message(), "Task uses invalid resources")); MockScheduler sched2; MesosSchedulerDriver driver2( &sched2, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched2, registered(&driver2, _, _)) .Times(1); EXPECT_CALL(sched2, resourceOffers(&driver2, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver2.start(); AWAIT_READY(offers); driver1.stop(); driver1.join(); driver2.stop(); driver2.join(); Shutdown(); }