virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; Resources remaining = offer.resources(); static Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify<float>(CPUS_PER_TASK) + ";mem:" + stringify<size_t>(MEM_PER_TASK)).get(); size_t maxTasks = 0; while (remaining.flatten().contains(TASK_RESOURCES)) { maxTasks++; remaining -= TASK_RESOURCES; } // Launch tasks. vector<TaskInfo> tasks; for (size_t i = 0; i < maxTasks / 2 && crawlQueue.size() > 0; i++) { string url = crawlQueue.front(); crawlQueue.pop(); string urlId = "C" + stringify<size_t>(processed[url]); TaskInfo task; task.set_name("Crawler " + urlId); task.mutable_task_id()->set_value(urlId); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(crawler); task.mutable_resources()->MergeFrom(TASK_RESOURCES); task.set_data(url); tasks.push_back(task); tasksLaunched++; cout << "Crawler " << urlId << " " << url << endl; } for (size_t i = maxTasks/2; i < maxTasks && renderQueue.size() > 0; i++) { string url = renderQueue.front(); renderQueue.pop(); string urlId = "R" + stringify<size_t>(processed[url]); TaskInfo task; task.set_name("Renderer " + urlId); task.mutable_task_id()->set_value(urlId); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(renderer); task.mutable_resources()->MergeFrom(TASK_RESOURCES); task.set_data(url); tasks.push_back(task); tasksLaunched++; cout << "Renderer " << urlId << " " << url << endl; } driver->launchTasks(offer.id(), tasks); } }
// This test checks that a scheduler exit shuts down the executor. TEST_F(FaultToleranceTest, SchedulerExit) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); AWAIT_READY(offers); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { cout << "." << flush; for (int i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; // Lookup resources we care about. // TODO(benh): It would be nice to ultimately have some helper // functions for looking up resources. double cpus = 0; double mem = 0; for (int i = 0; i < offer.resources_size(); i++) { const Resource& resource = offer.resources(i); if (resource.name() == "cpus" && resource.type() == Value::SCALAR) { cpus = resource.scalar().value(); } else if (resource.name() == "mem" && resource.type() == Value::SCALAR) { mem = resource.scalar().value(); } } // Launch tasks (only one per offer). vector<TaskInfo> tasks; if (cpus >= CPUS_PER_TASK && mem >= MEM_PER_TASK) { int taskId = tasksLaunched++; cout << "Starting task " << taskId << " on " << offer.hostname() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value(lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Resource* resource; resource = task.add_resources(); resource->set_name("cpus"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(CPUS_PER_TASK); resource = task.add_resources(); resource->set_name("mem"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(MEM_PER_TASK); tasks.push_back(task); cpus -= CPUS_PER_TASK; mem -= MEM_PER_TASK; } driver->launchTasks(offer.id(), tasks); } }
TEST_F(ResourceOffersTest, TaskUsesMoreResourcesThanOffered) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); Resource* cpus = task.add_resources(); cpus->set_name("cpus"); cpus->set_type(Value::SCALAR); cpus->mutable_scalar()->set_value(2.01); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_LOST, status.get().state()); EXPECT_TRUE(status.get().has_message()); EXPECT_EQ("Task uses more resources than offered", status.get().message()); driver.stop(); driver.join(); Shutdown(); }
TaskInfo buildTask (string hostname, string id, const SlaveID& slave) { hostProfile profile = hostList[hostname]; // Define the Docker container. /* Since there is no "executor" to manage the tasks, the container will be built and attached directly into the task below */ ContainerInfo container; container.set_type(container.DOCKER); ContainerInfo::DockerInfo docker; docker.set_image(DOCKER_IMAGE); container.mutable_docker()->MergeFrom(docker); // Mount local volume inside Container Volume * volume = container.add_volumes(); volume->set_container_path("/mnt"); volume->set_host_path("/local/mesos"); volume->set_mode(Volume_Mode_RW); // Define the task TaskInfo task; task.set_name("K3-" + k3binary); task.mutable_task_id()->set_value(id); task.mutable_slave_id()->MergeFrom(slave); task.mutable_container()->MergeFrom(container); //task.set_data(stringify(localTasks)); // Define include files for the command CommandInfo command; CommandInfo_URI * k3_bin = command.add_uris(); k3_bin->set_value(fileServer + "/" + k3binary); k3_bin->set_executable(true); k3_bin->set_extract(false); // CommandInfo_URI * k3_args = command.add_uris(); // k3_args->set_value(runpath + "/k3input.yaml"); // command.set_value("$MESOS_SANDBOX/" + k3binary + " -l INFO -p " + // "$MESOS_SANDBOX/k3input.yaml"); task.mutable_command()->MergeFrom(command); // Option A for doing resources management (see scheduler for option B) Resource* resource; resource = task.add_resources(); resource->set_name("cpus"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(profile.cpu); resource = task.add_resources(); resource->set_name("mem"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(profile.mem); return task; }
void resourceOffers(const vector<Offer>& offers) { foreach (const Offer& offer, offers) { cout << "Received offer " << offer.id() << " with " << offer.resources() << endl; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && remaining.flatten().contains(TASK_RESOURCES)) { int taskId = tasksLaunched++; cout << "Launching task " << taskId << " using offer " << offer.id() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value( lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES.flatten(framework.role())); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } Call call; call.mutable_framework_info()->CopyFrom(framework); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); Offer::Operation* operation = accept->add_operations(); operation->set_type(Offer::Operation::LAUNCH); foreach (const TaskInfo& taskInfo, tasks) { operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); }
// TODO(benh): Move this into utils, make more generic, and use in // other tests. vector<TaskInfo> createTasks(const Offer& offer) { TaskInfo task; task.set_name("test-task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->MergeFrom(offer.resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); return tasks; }
virtual void resourceOffers( SchedulerDriver* driver, const vector<Offer>& offers) { static const Try<Resources> TASK_RESOURCES = Resources::parse(resources); if (TASK_RESOURCES.isError()) { cerr << "Failed to parse resources '" << resources << "': " << TASK_RESOURCES.error() << endl; driver->abort(); return; } foreach (const Offer& offer, offers) { if (!launched && Resources(offer.resources()).contains(TASK_RESOURCES.get())) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(name); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(TASK_RESOURCES.get()); task.mutable_command()->set_value(command); if (uri.isSome()) { task.mutable_command()->add_uris()->set_value(uri.get()); } if (dockerImage.isSome()) { ContainerInfo containerInfo; containerInfo.set_type(ContainerInfo::DOCKER); ContainerInfo::DockerInfo dockerInfo; dockerInfo.set_image(dockerImage.get()); containerInfo.mutable_docker()->CopyFrom(dockerInfo); task.mutable_container()->CopyFrom(containerInfo); } vector<TaskInfo> tasks; tasks.push_back(task); driver->launchTasks(offer.id(), tasks); cout << "task " << name << " submitted to slave " << offer.slave_id() << endl; launched = true; } else { driver->declineOffer(offer.id()); } } }
inline TaskInfo createTask( const Offer& offer, const std::string& command, const std::string& name = "test-task", const std::string& id = UUID::random().toString()) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(id); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->MergeFrom(offer.resources()); task.mutable_command()->set_value(command); return task; }
virtual void resourceOffers(SchedulerDriver* driver, const std::vector<Offer>& offers) { std::cout << "Resource offers received" << std::endl; for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; // We just launch one task. if (!taskLaunched) { double mem = getScalarResource(offer, "mem"); assert(mem > EXECUTOR_MEMORY_MB); std::vector<TaskInfo> tasks; std::cout << "Starting the task" << std::endl; TaskInfo task; task.set_name("Balloon Task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); task.set_data(stringify<size_t>(balloonLimit)); // Use up all the memory from the offer. Resource* resource; resource = task.add_resources(); resource->set_name("mem"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(mem - EXECUTOR_MEMORY_MB); // And all the CPU. double cpus = getScalarResource(offer, "cpus"); resource = task.add_resources(); resource->set_name("cpus"); resource->set_type(Value::SCALAR); resource->mutable_scalar()->set_value(cpus); tasks.push_back(task); driver->launchTasks(offer.id(), tasks); taskLaunched = true; } } }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { foreach (const Offer& offer, offers) { cout << "Received offer " << offer.id() << " with " << offer.resources() << endl; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && remaining.flatten().contains(TASK_RESOURCES)) { int taskId = tasksLaunched++; cout << "Launching task " << taskId << " using offer " << offer.id() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value(lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Try<Resources> flattened = TASK_RESOURCES.flatten(role); CHECK_SOME(flattened); Option<Resources> resources = remaining.find(flattened.get()); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } driver->launchTasks(offer.id(), tasks); }
// For use with a MockScheduler, for example: // EXPECT_CALL(sched, resourceOffers(_, _)) // .WillOnce(LaunchTasks(TASKS, CPUS, MEM)); // Launches up to TASKS no-op tasks, if possible, // each with CPUS cpus and MEM memory. ACTION_P4(LaunchTasks, tasks, cpus, mem, role) { SchedulerDriver* driver = arg0; std::vector<Offer> offers = arg1; int numTasks = tasks; int launched = 0; for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(cpus) + ";mem:" + stringify(mem)).get(); int nextTaskId = 0; std::vector<TaskInfo> tasks; Resources remaining = offer.resources(); while (TASK_RESOURCES <= remaining.flatten() && launched < numTasks) { TaskInfo task; task.set_name("TestTask"); task.mutable_task_id()->set_value(stringify(nextTaskId++)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.mutable_command()->set_value(":"); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES, role); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); launched++; } driver->launchTasks(offer.id(), tasks); } }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { cout << "." << flush; for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && TASK_RESOURCES <= remaining.flatten()) { int taskId = tasksLaunched++; cout << "Starting task " << taskId << " on " << offer.hostname() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value(lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES, role); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } driver->launchTasks(offer.id(), tasks); } }
inline TaskInfo createTask( const Offer& offer, const std::string& command, const Option<mesos::ExecutorID>& executorId = None(), const std::string& name = "test-task", const std::string& id = UUID::random().toString()) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(id); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(offer.resources()); if (executorId.isSome()) { ExecutorInfo executor; executor.mutable_executor_id()->CopyFrom(executorId.get()); executor.mutable_command()->set_value(command); task.mutable_executor()->CopyFrom(executor); } else { task.mutable_command()->set_value(command); } return task; }
vector<TaskInfo> populateTasks( const string& cmd, CommandInfo healthCommand, const Offer& offer, int gracePeriodSeconds = 0, const Option<int>& consecutiveFailures = None(), const Option<map<string, string> >& env = None()) { TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(offer.resources()); CommandInfo command; command.set_value(cmd); Environment::Variable* variable = command.mutable_environment()->add_variables(); // We need to set the correct directory to launch health check process // instead of the default for tests. variable->set_name("MESOS_LAUNCHER_DIR"); variable->set_value(path::join(tests::flags.build_dir, "src")); task.mutable_command()->CopyFrom(command); HealthCheck healthCheck; if (env.isSome()) { foreachpair (const string& name, const string value, env.get()) { Environment::Variable* variable = healthCommand.mutable_environment()->mutable_variables()->Add(); variable->set_name(name); variable->set_value(value); } }
// This test runs a command _with_ the command user field set. The // command will verify the assumption that the command is run as the // specified user. We use (and assume the precense) of the // unprivileged 'nobody' user which should be available on both Linux // and Mac OS X. TEST_F(SlaveTest, DISABLED_ROOT_RunTaskWithCommandInfoWithUser) { // TODO(nnielsen): Introduce STOUT abstraction for user verification // instead of flat getpwnam call. const string testUser = "******"; if (::getpwnam(testUser.c_str()) == NULL) { LOG(WARNING) << "Cannot run ROOT_RunTaskWithCommandInfoWithUser test:" << " user '" << testUser << "' is not present"; return; } Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Need flags for 'executor_registration_timeout'. slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/cpu,posix/mem"; Try<MesosContainerizer*> containerizer = MesosContainerizer::create(flags, false); CHECK_SOME(containerizer); Try<PID<Slave> > slave = StartSlave(containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); CommandInfo command; command.set_value("test `whoami` = " + testUser); command.set_user(testUser); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished.get().state()); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test runs a command without the command user field set. The // command will verify the assumption that the command is run as the // slave user (in this case, root). TEST_F(SlaveTest, ROOT_RunTaskWithCommandInfoWithoutUser) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Need flags for 'executor_registration_timeout'. slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/cpu,posix/mem"; Try<MesosContainerizer*> containerizer = MesosContainerizer::create(flags, false); CHECK_SOME(containerizer); Try<PID<Slave> > slave = StartSlave(containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); Result<string> user = os::user(); CHECK_SOME(user) << "Failed to get current user name" << (user.isError() ? ": " + user.error() : ""); // Command executor will run as user running test. CommandInfo command; command.set_value("test `whoami` = " + user.get()); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished.get().state()); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// Test that we can run the mesos-executor and specify an "override" // command to use via the --override argument. TEST_F(SlaveTest, MesosExecutorWithOverride) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); TestContainerizer containerizer; Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); CommandInfo command; command.set_value("sleep 10"); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); // Expect the launch and just assume it was sucessful since we'll be // launching the executor ourselves manually below. Future<Nothing> launch; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _)) .WillOnce(DoAll(FutureSatisfy(&launch), Return(true))); // Expect wait after launch is called but don't return anything // until after we've finished everything below. Future<Nothing> wait; process::Promise<containerizer::Termination> promise; EXPECT_CALL(containerizer, wait(_)) .WillOnce(DoAll(FutureSatisfy(&wait), Return(promise.future()))); driver.launchTasks(offers.get()[0].id(), tasks); // Once we get the launch the mesos-executor with --override. AWAIT_READY(launch); // Set up fake environment for executor. map<string, string> environment; environment["MESOS_SLAVE_PID"] = stringify(slave.get()); environment["MESOS_SLAVE_ID"] = stringify(offers.get()[0].slave_id()); environment["MESOS_FRAMEWORK_ID"] = stringify(offers.get()[0].framework_id()); environment["MESOS_EXECUTOR_ID"] = stringify(task.task_id()); environment["MESOS_DIRECTORY"] = ""; // Create temporary file to store validation string. If command is // succesfully replaced, this file will end up containing the string // 'Hello World\n'. Otherwise, the original task command i.e. // 'sleep' will be called and the test will fail. Try<std::string> file = os::mktemp(); ASSERT_SOME(file); string executorCommand = path::join(tests::flags.build_dir, "src", "mesos-executor") + " --override -- /bin/sh -c 'echo hello world >" + file.get() + "'"; // Expect two status updates, one for once the mesos-executor says // the task is running and one for after our overridden command // above finishes. Future<TaskStatus> status1, status2; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status1)) .WillOnce(FutureArg<1>(&status2)); Try<process::Subprocess> executor = process::subprocess( executorCommand, process::Subprocess::PIPE(), process::Subprocess::PIPE(), process::Subprocess::PIPE(), environment); ASSERT_SOME(executor); // Scheduler should receive the TASK_RUNNING update. AWAIT_READY(status1); ASSERT_EQ(TASK_RUNNING, status1.get().state()); AWAIT_READY(status2); ASSERT_EQ(TASK_FINISHED, status2.get().state()); AWAIT_READY(wait); containerizer::Termination termination; termination.set_killed(false); termination.set_message("Killed executor"); termination.set_status(0); promise.set(termination); driver.stop(); driver.join(); AWAIT_READY(executor.get().status()); // Verify file contents. Try<std::string> validate = os::read(file.get()); ASSERT_SOME(validate); EXPECT_EQ(validate.get(), "hello world\n"); os::rm(file.get()); Shutdown(); }
// This test verifies that when an executor terminates before // registering with slave, it is properly cleaned up. TEST_F(SlaveTest, RemoveUnregisteredTerminatedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); // Drop the registration message from the executor to the slave. Future<process::Message> registerExecutorMessage = DROP_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(registerExecutorMessage); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Now kill the executor. containerizer.destroy(offers.get()[0].framework_id(), DEFAULT_EXECUTOR_ID); AWAIT_READY(status); EXPECT_EQ(TASK_LOST, status.get().state()); // We use 'gc.schedule' as a signal for the executor being cleaned // up by the slave. AWAIT_READY(schedule); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test ensures that a killTask() can happen between runTask() // and _runTask() and then gets "handled properly". This means that // the task never gets started, but also does not get lost. The end // result is status TASK_KILLED. Essentially, killing the task is // realized while preparing to start it. See MESOS-947. // Temporarily disabled due to MESOS-1945. TEST_F(SlaveTest, DISABLED_KillTaskBetweenRunTaskParts) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()); MockSlave slave(CreateSlaveFlags(), &detector, &containerizer); process::spawn(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(0); EXPECT_CALL(exec, launchTask(_, _)) .Times(0); EXPECT_CALL(exec, shutdown(_)) .Times(0); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillRepeatedly(FutureArg<1>(&status)); EXPECT_CALL(slave, runTask(_, _, _, _, _)) .WillOnce(Invoke(&slave, &MockSlave::unmocked_runTask)); // Saved arguments from Slave::_runTask(). Future<bool> future; FrameworkInfo frameworkInfo; FrameworkID frameworkId; // Skip what Slave::_runTask() normally does, save its arguments for // later, tie reaching the critical moment when to kill the task to // a future. Future<Nothing> _runTask; EXPECT_CALL(slave, _runTask(_, _, _, _, _)) .WillOnce(DoAll(FutureSatisfy(&_runTask), SaveArg<0>(&future), SaveArg<1>(&frameworkInfo), SaveArg<2>(&frameworkId))); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(_runTask); Future<Nothing> killTask; EXPECT_CALL(slave, killTask(_, _, _)) .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_killTask), FutureSatisfy(&killTask))); driver.killTask(task.task_id()); // Since this is the only task ever for this framework, the // framework should get removed in Slave::_runTask(). // Thus we can observe that this happens before Shutdown(). Future<Nothing> removeFramework; EXPECT_CALL(slave, removeFramework(_)) .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_removeFramework), FutureSatisfy(&removeFramework))); AWAIT_READY(killTask); slave.unmocked__runTask( future, frameworkInfo, frameworkId, master.get(), task); AWAIT_READY(removeFramework); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); process::terminate(slave); process::wait(slave); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
void ChapelScheduler::resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { // offers only contain resources describing a single node -> for more details read include/mesos/mesos.proto // cout << "***\tProcessing Offers!" << endl; const int remainingCpusReq = cpusReq - launchedTsks.size(); if(remainingCpusReq == 0) { for(size_t k = 0; k < offers.size(); k++) { const Offer& offer = offers[k]; driver->declineOffer(offer.id()); } cout << "\t\tChapelScheduler declined offer because resource requirements satisfied" << endl; } // cycle through all the offers and resource a task // each offer corresponds to a single compute node // const static Resources TASK_RESOURCES = Resources::parse(mesosReq).get(); vector<TaskInfo> tsks; for(size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; if(tsks.size() == remainingCpusReq) { driver->declineOffer(offer.id()); continue; // need to cycle through the remaining offers and decline them } Resources remaining = offer.resources(); /* attempting to exercise multi-tenancy capabilities in mesos * given an offer from a node, try to maximize the number of jobs * that can be allocated to that node given the job's resource * requirements * * if the desired number of nodes and jobs are met, then launch * all the jobs on that node's offer * * this means some nodes will get multiple tasks assigned for * execution */ vector<TaskInfo> tol; while(remaining.flatten().contains(TASK_RESOUCES) && ((remainingCpusReq-tsks.size()) > 0)) { const string tid = stringify<size_t>(tsks.size()); TaskInfo task; task.set_name("Chapel Remote Program Task\t" + tid); task.mutable_task_id()->set_value(tid); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_command()->MergeFrom(chplCmdInfo); task.mutable_resources()->MergeFrom(TASK_RESOURCES); task.set_data(remoteCmd); tol.push_back(task); // tol means "to launch" tsks.push_back(task); // tsks tracks tasks launched for framework termination purposes remaining-=TASK_RESOURCES; tasksLaunched+=1; cout << "\t\t+++\tLaunching # of Tasks!\t" << tol.size() << " of " << tasksLaunched << endl; } // after all the tasks for this offer have been "resourced" // launch the tasks using this offer.id // driver->launchTasks(offer.id(), tol); } const size_t pendingTsksSize = tsks.size(); cout << endl << "\tAcquired # tasks " << pendingTsksSize << " required # of tasks " << cpusReq << " remaining required # tasks " << remainingCpusReq << endl << endl; if(pendingTsksSize > 0) { for(vector<TaskInfo>::iterator i = tsks.begin(); i != tsks.end(); i++) { launchedTsks.insert(make_pair(i->task_id().value(), *i)); } } }
virtual void resourceOffers( SchedulerDriver* driver, const vector<Offer>& offers) { static const Try<Resources> TASK_RESOURCES = Resources::parse(resources); if (TASK_RESOURCES.isError()) { cerr << "Failed to parse resources '" << resources << "': " << TASK_RESOURCES.error() << endl; driver->abort(); return; } foreach (const Offer& offer, offers) { if (!launched && Resources(offer.resources()).contains(TASK_RESOURCES.get())) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(name); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(TASK_RESOURCES.get()); CommandInfo* commandInfo = task.mutable_command(); commandInfo->set_value(command); if (environment.isSome()) { Environment* environment_ = commandInfo->mutable_environment(); foreachpair (const std::string& name, const std::string& value, environment.get()) { Environment_Variable* environmentVariable = environment_->add_variables(); environmentVariable->set_name(name); environmentVariable->set_value(value); } } if (uri.isSome()) { task.mutable_command()->add_uris()->set_value(uri.get()); } if (dockerImage.isSome()) { ContainerInfo containerInfo; if (containerizer == "mesos") { containerInfo.set_type(ContainerInfo::MESOS); ContainerInfo::MesosInfo mesosInfo; Image mesosImage; mesosImage.set_type(Image::DOCKER); mesosImage.mutable_docker()->set_name(dockerImage.get()); mesosInfo.mutable_image()->CopyFrom(mesosImage); containerInfo.mutable_mesos()->CopyFrom(mesosInfo); } else if (containerizer == "docker") { containerInfo.set_type(ContainerInfo::DOCKER); ContainerInfo::DockerInfo dockerInfo; dockerInfo.set_image(dockerImage.get()); containerInfo.mutable_docker()->CopyFrom(dockerInfo); } else { cerr << "Unsupported containerizer: " << containerizer << endl;; driver->abort(); return; } task.mutable_container()->CopyFrom(containerInfo); } vector<TaskInfo> tasks; tasks.push_back(task); driver->launchTasks(offer.id(), tasks); cout << "task " << name << " submitted to slave " << offer.slave_id() << endl; launched = true; } else {
// Test that the prepare launch docker hook execute before launch // a docker container. Test hook create a file "foo" in the sandbox // directory. When the docker container launched, the sandbox directory // is mounted to the docker container. We validate the hook by verifying // the "foo" file exists in the docker container or not. TEST_F(HookTest, ROOT_DOCKER_VerifySlavePreLaunchDockerHook) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockDocker* mockDocker = new MockDocker(tests::flags.docker, tests::flags.docker_socket); Shared<Docker> docker(mockDocker); slave::Flags flags = CreateSlaveFlags(); Fetcher fetcher; Try<ContainerLogger*> logger = ContainerLogger::create(flags.container_logger); ASSERT_SOME(logger); MockDockerContainerizer containerizer( flags, &fetcher, Owned<ContainerLogger>(logger.get()), docker); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); const Offer& offer = offers.get()[0]; SlaveID slaveId = offer.slave_id(); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(offer.resources()); CommandInfo command; command.set_value("test -f " + path::join(flags.sandbox_directory, "foo")); ContainerInfo containerInfo; containerInfo.set_type(ContainerInfo::DOCKER); // TODO(tnachen): Use local image to test if possible. ContainerInfo::DockerInfo dockerInfo; dockerInfo.set_image("alpine"); containerInfo.mutable_docker()->CopyFrom(dockerInfo); task.mutable_command()->CopyFrom(command); task.mutable_container()->CopyFrom(containerInfo); vector<TaskInfo> tasks; tasks.push_back(task); Future<ContainerID> containerId; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _, _)) .WillOnce(DoAll(FutureArg<0>(&containerId), Invoke(&containerizer, &MockDockerContainerizer::_launch))); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)) .WillRepeatedly(DoDefault()); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY_FOR(containerId, Seconds(60)); AWAIT_READY_FOR(statusRunning, Seconds(60)); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY_FOR(statusFinished, Seconds(60)); EXPECT_EQ(TASK_FINISHED, statusFinished.get().state()); Future<containerizer::Termination> termination = containerizer.wait(containerId.get()); driver.stop(); driver.join(); AWAIT_READY(termination); Future<list<Docker::Container>> containers = docker.get()->ps(true, slave::DOCKER_NAME_PREFIX); AWAIT_READY(containers); // Cleanup all mesos launched containers. foreach (const Docker::Container& container, containers.get()) { AWAIT_READY_FOR(docker.get()->rm(container.id, true), Seconds(30)); } }
// This test verifies that the slave run task label decorator can add // and remove labels from a task during the launch sequence. A task // with two labels ("foo":"bar" and "bar":"baz") is launched and will // get modified by the slave hook to strip the "foo":"bar" pair and // add a new "baz":"qux" pair. TEST_F(HookTest, VerifySlaveRunTaskHook) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); task.mutable_executor()->CopyFrom(DEFAULT_EXECUTOR_INFO); // Add two labels: (1) will be removed by the hook to ensure that // runTaskHook can remove labels (2) will be preserved to ensure // that the framework can add labels to the task and have those be // available by the end of the launch task sequence when hooks are // used (to protect against hooks removing labels completely). Labels* labels = task.mutable_labels(); labels->add_labels()->CopyFrom(createLabel("foo", "bar")); labels->add_labels()->CopyFrom(createLabel("bar", "baz")); EXPECT_CALL(exec, registered(_, _, _, _)); Future<TaskInfo> taskInfo; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(DoAll( FutureArg<1>(&taskInfo), SendStatusUpdateFromTask(TASK_RUNNING))); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(taskInfo); // The master hook will hang an extra label off. const Labels& labels_ = taskInfo.get().labels(); ASSERT_EQ(3, labels_.labels_size()); // The slave run task hook will prepend a new "baz":"qux" label. EXPECT_EQ("baz", labels_.labels(0).key()); EXPECT_EQ("qux", labels_.labels(0).value()); // Master launch task hook will still hang off test label. EXPECT_EQ(testLabelKey, labels_.labels(1).key()); EXPECT_EQ(testLabelValue, labels_.labels(1).value()); // And lastly, we only expect the "foo":"bar" pair to be stripped by // the module. The last pair should be the original "bar":"baz" // pair set by the test. EXPECT_EQ("bar", labels_.labels(2).key()); EXPECT_EQ("baz", labels_.labels(2).value()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// Test executor environment decorator hook and remove executor hook // for slave. We expect the environment-decorator hook to create a // temporary file and the remove-executor hook to delete that file. TEST_F(HookTest, VerifySlaveLaunchExecutorHook) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); slave::Flags slaveFlags = CreateSlaveFlags(); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); task.mutable_executor()->CopyFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Executor shutdown would force the Slave to execute the // remove-executor hook. EXPECT_CALL(exec, shutdown(_)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // On successful completion of the "slaveLaunchExecutorHook", the // test hook will send a HookExecuted message to itself. We wait // until that message is intercepted by the testing infrastructure. Future<HookExecuted> hookFuture = FUTURE_PROTOBUF(HookExecuted(), _, _); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(status); driver.stop(); driver.join(); // The scheduler shutdown from above forces the executor to // shutdown. This in turn should force the Slave to execute // the remove-executor hook. // Here, we wait for the hook to finish execution. AWAIT_READY(hookFuture); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); StandaloneMasterDetector detector(master.get()); Try<PID<Slave> > slave = StartSlave(&exec, &detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Have the slave re-register with the master. detector.appoint(master.get()); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); Shutdown(); }
TEST_F(SlaveTest, ShutdownUnregisteredExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Need flags for 'executor_registration_timeout'. slave::Flags flags = CreateSlaveFlags(); // Set the isolation flag so we know a MesoContainerizer will be created. flags.isolation = "posix/cpu,posix/mem"; Try<MesosContainerizer*> containerizer = MesosContainerizer::create(flags, false); CHECK_SOME(containerizer); Try<PID<Slave> > slave = StartSlave(containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); CommandInfo command; command.set_value("sleep 10"); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); // Drop the registration message from the executor to the slave. Future<process::Message> registerExecutor = DROP_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(registerExecutor); Clock::pause(); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Ensure that the slave times out and kills the executor. Future<Nothing> destroyExecutor = FUTURE_DISPATCH(_, &MesosContainerizerProcess::destroy); Clock::advance(flags.executor_registration_timeout); AWAIT_READY(destroyExecutor); Clock::settle(); // Wait for Containerizer::destroy to complete. // Now advance time until the reaper reaps the executor. while (status.isPending()) { Clock::advance(Seconds(1)); Clock::settle(); } AWAIT_READY(status); ASSERT_EQ(TASK_FAILED, status.get().state()); Clock::resume(); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test has been temporarily disabled due to MESOS-1257. TEST_F(ExternalContainerizerTest, DISABLED_Launch) { Try<PID<Master> > master = this->StartMaster(); ASSERT_SOME(master); Flags testFlags; slave::Flags flags = this->CreateSlaveFlags(); flags.isolation = "external"; flags.containerizer_path = testFlags.build_dir + "/src/examples/python/test-containerizer"; MockExternalContainerizer containerizer(flags); Try<PID<Slave> > slave = this->StartSlave(&containerizer, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("isolator_test"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); Resources resources(offers.get()[0].resources()); Option<Bytes> mem = resources.mem(); ASSERT_SOME(mem); Option<double> cpus = resources.cpus(); ASSERT_SOME(cpus); const std::string& file = path::join(flags.work_dir, "ready"); // This task induces user/system load in a child process by // running top in a child process for ten seconds. task.mutable_command()->set_value( #ifdef __APPLE__ // Use logging mode with 30,000 samples with no interval. "top -l 30000 -s 0 2>&1 > /dev/null & " #else // Batch mode, with 30,000 samples with no interval. "top -b -d 0 -n 30000 2>&1 > /dev/null & " #endif "touch " + file + "; " // Signals that the top command is running. "sleep 60"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore rest for now. Future<ContainerID> containerId; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _, _)) .WillOnce(DoAll(FutureArg<0>(&containerId), Invoke(&containerizer, &MockExternalContainerizer::_launch))); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(containerId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Wait for the task to begin inducing cpu time. while (!os::exists(file)); ExecutorID executorId; executorId.set_value(task.task_id().value()); // We'll wait up to 10 seconds for the child process to induce // 1/8 of a second of user and system cpu time in total. // TODO(bmahler): Also induce rss memory consumption, by re-using // the balloon framework. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer.usage(containerId.get()); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! // NOTE: We are currently getting dummy-data from the test- // containerizer python script matching these expectations. // TODO(tillt): Consider working with real data. if (statistics.cpus_user_time_secs() >= 0.120 && statistics.cpus_system_time_secs() >= 0.05 && statistics.mem_rss_bytes() >= 1024u) { break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(10)); EXPECT_GE(statistics.cpus_user_time_secs(), 0.120); EXPECT_GE(statistics.cpus_system_time_secs(), 0.05); EXPECT_EQ(statistics.cpus_limit(), cpus.get()); EXPECT_GE(statistics.mem_rss_bytes(), 1024u); EXPECT_EQ(statistics.mem_limit_bytes(), mem.get().bytes()); EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.killTask(task.task_id()); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); this->Shutdown(); }
// This test ensures we don't break the API when it comes to JSON // representation of tasks. Also, we want to ensure that tasks are // modeled the same way when using 'Task' vs. 'TaskInfo'. TEST(HTTP, ModelTask) { TaskID taskId; taskId.set_value("t"); SlaveID slaveId; slaveId.set_value("s"); ExecutorID executorId; executorId.set_value("t"); FrameworkID frameworkId; frameworkId.set_value("f"); TaskState state = TASK_RUNNING; vector<TaskStatus> statuses; TaskStatus status; status.mutable_task_id()->CopyFrom(taskId); status.set_state(state); status.mutable_slave_id()->CopyFrom(slaveId); status.mutable_executor_id()->CopyFrom(executorId); status.set_timestamp(0.0); statuses.push_back(status); TaskInfo task; task.set_name("task"); task.mutable_task_id()->CopyFrom(taskId); task.mutable_slave_id()->CopyFrom(slaveId); task.mutable_command()->set_value("echo hello"); Task task_ = protobuf::createTask(task, state, frameworkId); task_.add_statuses()->CopyFrom(statuses[0]); JSON::Value object = model(task, frameworkId, state, statuses); JSON::Value object_ = model(task_); Try<JSON::Value> expected = JSON::parse( "{" " \"executor_id\":\"\"," " \"framework_id\":\"f\"," " \"id\":\"t\"," " \"name\":\"task\"," " \"resources\":" " {" " \"cpus\":0," " \"disk\":0," " \"mem\":0" " }," " \"slave_id\":\"s\"," " \"state\":\"TASK_RUNNING\"," " \"statuses\":" " [" " {" " \"state\":\"TASK_RUNNING\"," " \"timestamp\":0" " }" " ]" "}"); ASSERT_SOME(expected); EXPECT_EQ(expected.get(), object); EXPECT_EQ(expected.get(), object_); // Ensure both are modeled the same. EXPECT_EQ(object, object_); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send exited executor messages, // we send a ShutdownMessage to the slave. Why? Because during a // network partition, the master will remove a partitioned slave, thus // sending its tasks to LOST. At this point, when the partition is // removed, the slave may attempt to send exited executor messages if // it was unaware that the master removed it. We've already // notified frameworks that the tasks under the executors were LOST, // so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveExitedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId));\ Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This allows us to have the slave send an // ExitedExecutorMessage. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop all the status updates from the slave, so that we can // ensure the ExitedExecutorMessage is what triggers the slave // shutdown. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()); driver.launchTasks(offers.get()[0].id(), tasks); // Drop the first shutdown message from the master (simulated // partition) and allow the second shutdown message to pass when // triggered by the ExitedExecutorMessage. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Induce an ExitedExecutorMessage from the slave. containerizer.destroy( frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id()); // Upon receiving the message, the master will shutdown the slave. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }