// TODO(bmahler): Expose the executor name / source. JSON::Object model( const TaskInfo& task, const FrameworkID& frameworkId, const TaskState& state, const vector<TaskStatus>& statuses) { JSON::Object object; object.values["id"] = task.task_id().value(); object.values["name"] = task.name(); object.values["framework_id"] = frameworkId.value(); if (task.has_executor()) { object.values["executor_id"] = task.executor().executor_id().value(); } else { object.values["executor_id"] = ""; } object.values["slave_id"] = task.slave_id().value(); object.values["state"] = TaskState_Name(state); object.values["resources"] = model(task.resources()); JSON::Array array; foreach (const TaskStatus& status, statuses) { array.values.push_back(model(status)); }
Task createTask( const TaskInfo& task, const TaskState& state, const FrameworkID& frameworkId) { Task t; t.mutable_framework_id()->MergeFrom(frameworkId); t.set_state(state); t.set_name(task.name()); t.mutable_task_id()->MergeFrom(task.task_id()); t.mutable_slave_id()->MergeFrom(task.slave_id()); t.mutable_resources()->MergeFrom(task.resources()); if (task.has_executor()) { t.mutable_executor_id()->CopyFrom(task.executor().executor_id()); } t.mutable_labels()->MergeFrom(task.labels()); if (task.has_discovery()) { t.mutable_discovery()->MergeFrom(task.discovery()); } return t; }
void launch(const TaskInfo& task) { cout << "Starting task " << task.task_id().value() << endl; tasks[task.task_id()] = task; std::thread thread([=]() { os::sleep(Seconds(random() % 10)); process::dispatch(self(), &Self::update, task, TaskState::TASK_FINISHED); }); thread.detach(); update(task, TaskState::TASK_RUNNING); }
void update(const TaskInfo& task, const TaskState& state) { UUID uuid = UUID::random(); TaskStatus status; status.mutable_task_id()->CopyFrom(task.task_id()); status.mutable_executor_id()->CopyFrom(executorId); status.set_state(state); status.set_source(TaskStatus::SOURCE_EXECUTOR); status.set_timestamp(process::Clock::now().secs()); status.set_uuid(uuid.toBytes()); Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.mutable_executor_id()->CopyFrom(executorId); call.set_type(Call::UPDATE); call.mutable_update()->mutable_status()->CopyFrom(status); // Capture the status update. updates[uuid] = call.update(); mesos->send(call); }
// This test ensures that the command executor does not send // TASK_KILLING to frameworks that do not support the capability. TEST_P_TEMP_DISABLED_ON_WINDOWS(CommandExecutorTest, NoTaskKillingCapability) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework without the task killing capability. MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // There should only be a TASK_KILLED update. Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusKilled)); driver.killTask(task.task_id()); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }
TEST_F(ResourceOffersTest, TaskUsesMoreResourcesThanOffered) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); Resource* cpus = task.add_resources(); cpus->set_name("cpus"); cpus->set_type(Value::SCALAR); cpus->mutable_scalar()->set_value(2.01); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_LOST, status.get().state()); EXPECT_TRUE(status.get().has_message()); EXPECT_EQ("Task uses more resources than offered", status.get().message()); driver.stop(); driver.join(); Shutdown(); }
virtual void launchTask(ExecutorDriver* driver, const TaskInfo& task) { cout << "Starting task " << task.task_id().value() << endl; TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_RUNNING); driver->sendStatusUpdate(status); sleep(1); cout << "Finishing task " << task.task_id().value() << endl; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_FINISHED); driver->sendStatusUpdate(status); }
virtual void launchTask(ExecutorDriver* driver, const TaskInfo& task) { cout << "Starting task " << task.task_id().value() << endl; TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_RUNNING); driver->sendStatusUpdate(status); // This is where one would perform the requested task. cout << "Finishing task " << task.task_id().value() << endl; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_FINISHED); driver->sendStatusUpdate(status); }
void run(ExecutorDriver* driver, const TaskInfo& task) { os::sleep(Seconds(random() % 10)); TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_FINISHED); driver->sendStatusUpdate(status); }
void operator()() { TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); // Currently, just call the K3 executable with the generated command line from task.data() try { FILE* pipe = popen(k3_cmd.c_str(), "r"); if (!pipe) { status.set_state(TASK_FAILED); driver->sendStatusUpdate(status); cout << "Failed to open subprocess" << endl; } char buffer[256]; while (!feof(pipe)) { if (fgets(buffer, 256, pipe) != NULL) { std::string s = std::string(buffer); if (this->isMaster) { driver->sendFrameworkMessage(s); cout << s << endl; } else { cout << s << endl; } } } int k3 = pclose(pipe); if (k3 == 0) { status.set_state(TASK_FINISHED); cout << "Task " << task.task_id().value() << " Completed!" << endl; driver->sendStatusUpdate(status); } else { status.set_state(TASK_FAILED); cout << "K3 Task " << task.task_id().value() << " returned error code: " << k3 << endl; driver->sendStatusUpdate(status); } } catch (...) { status.set_state(TASK_FAILED); driver->sendStatusUpdate(status); } //------------- END OF TASK ------------------- }
void launchTask (ExecutorDriver* driver, const TaskInfo& task) override { cout << "Starting task " << task.task_id().value() << endl; TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); StartInfo* info = new StartInfo(driver, task); pthread_t pthread; int res = pthread_create(&pthread, NULL, &RunProcess, info); if (res != 0) { status.set_state(TASK_FAILED); delete info; } else { pthread_detach(pthread); status.set_state(TASK_RUNNING); } driver->sendStatusUpdate(status); }
virtual void launchTask(ExecutorDriver* driver, const TaskInfo& task) { cout << "Starting task " << task.task_id().value() << endl; lambda::function<void(void)>* thunk = new lambda::function<void(void)>(lambda::bind(&run, driver, task)); pthread_t pthread; if (pthread_create(&pthread, NULL, &start, thunk) != 0) { TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_FAILED); driver->sendStatusUpdate(status); } else { pthread_detach(pthread); TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_RUNNING); driver->sendStatusUpdate(status); } }
inline Task createTask(const TaskInfo& task, const TaskState& state, const ExecutorID& executorId, const FrameworkID& frameworkId) { Task t; t.mutable_framework_id()->MergeFrom(frameworkId); t.set_state(state); t.set_name(task.name()); t.mutable_task_id()->MergeFrom(task.task_id()); t.mutable_slave_id()->MergeFrom(task.slave_id()); t.mutable_resources()->MergeFrom(task.resources()); if (!task.has_command()) { t.mutable_executor_id()->MergeFrom(executorId); } return t; }
Task createTask( const TaskInfo& task, const TaskState& state, const FrameworkID& frameworkId) { Task t; t.mutable_framework_id()->CopyFrom(frameworkId); t.set_state(state); t.set_name(task.name()); t.mutable_task_id()->CopyFrom(task.task_id()); t.mutable_slave_id()->CopyFrom(task.slave_id()); t.mutable_resources()->CopyFrom(task.resources()); if (task.has_executor()) { t.mutable_executor_id()->CopyFrom(task.executor().executor_id()); } if (task.has_labels()) { t.mutable_labels()->CopyFrom(task.labels()); } if (task.has_discovery()) { t.mutable_discovery()->CopyFrom(task.discovery()); } if (task.has_container()) { t.mutable_container()->CopyFrom(task.container()); } // Copy `user` if set. if (task.has_command() && task.command().has_user()) { t.set_user(task.command().user()); } else if (task.has_executor() && task.executor().command().has_user()) { t.set_user(task.executor().command().user()); } return t; }
// This test ensures that a killTask() can happen between runTask() // and _runTask() and then gets "handled properly". This means that // the task never gets started, but also does not get lost. The end // result is status TASK_KILLED. Essentially, killing the task is // realized while preparing to start it. See MESOS-947. // Temporarily disabled due to MESOS-1945. TEST_F(SlaveTest, DISABLED_KillTaskBetweenRunTaskParts) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()); MockSlave slave(CreateSlaveFlags(), &detector, &containerizer); process::spawn(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(0); EXPECT_CALL(exec, launchTask(_, _)) .Times(0); EXPECT_CALL(exec, shutdown(_)) .Times(0); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillRepeatedly(FutureArg<1>(&status)); EXPECT_CALL(slave, runTask(_, _, _, _, _)) .WillOnce(Invoke(&slave, &MockSlave::unmocked_runTask)); // Saved arguments from Slave::_runTask(). Future<bool> future; FrameworkInfo frameworkInfo; FrameworkID frameworkId; // Skip what Slave::_runTask() normally does, save its arguments for // later, tie reaching the critical moment when to kill the task to // a future. Future<Nothing> _runTask; EXPECT_CALL(slave, _runTask(_, _, _, _, _)) .WillOnce(DoAll(FutureSatisfy(&_runTask), SaveArg<0>(&future), SaveArg<1>(&frameworkInfo), SaveArg<2>(&frameworkId))); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(_runTask); Future<Nothing> killTask; EXPECT_CALL(slave, killTask(_, _, _)) .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_killTask), FutureSatisfy(&killTask))); driver.killTask(task.task_id()); // Since this is the only task ever for this framework, the // framework should get removed in Slave::_runTask(). // Thus we can observe that this happens before Shutdown(). Future<Nothing> removeFramework; EXPECT_CALL(slave, removeFramework(_)) .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_removeFramework), FutureSatisfy(&removeFramework))); AWAIT_READY(killTask); slave.unmocked__runTask( future, frameworkInfo, frameworkId, master.get(), task); AWAIT_READY(removeFramework); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); process::terminate(slave); process::wait(slave); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test confirms that if a task exceeds configured resource // limits it is forcibly terminated. TEST_F(PosixRLimitsIsolatorTest, TaskExceedingLimit) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/rlimits"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); // The task attempts to use an infinite amount of CPU time. TaskInfo task = createTask( offers.get()[0].slave_id(), offers.get()[0].resources(), "while true; do true; done"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); // Limit the process to use maximally 1 second of CPU time. RLimitInfo rlimitInfo; RLimitInfo::RLimit* cpuLimit = rlimitInfo.add_rlimits(); cpuLimit->set_type(RLimitInfo::RLimit::RLMT_CPU); cpuLimit->set_soft(1); cpuLimit->set_hard(1); container->mutable_rlimit_info()->CopyFrom(rlimitInfo); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFailed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFailed)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFailed); EXPECT_EQ(task.task_id(), statusFailed->task_id()); EXPECT_EQ(TASK_FAILED, statusFailed->state()); driver.stop(); driver.join(); }
// This test has been temporarily disabled due to MESOS-1257. TEST_F(ExternalContainerizerTest, DISABLED_Launch) { Try<PID<Master> > master = this->StartMaster(); ASSERT_SOME(master); Flags testFlags; slave::Flags flags = this->CreateSlaveFlags(); flags.isolation = "external"; flags.containerizer_path = testFlags.build_dir + "/src/examples/python/test-containerizer"; MockExternalContainerizer containerizer(flags); Try<PID<Slave> > slave = this->StartSlave(&containerizer, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("isolator_test"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); Resources resources(offers.get()[0].resources()); Option<Bytes> mem = resources.mem(); ASSERT_SOME(mem); Option<double> cpus = resources.cpus(); ASSERT_SOME(cpus); const std::string& file = path::join(flags.work_dir, "ready"); // This task induces user/system load in a child process by // running top in a child process for ten seconds. task.mutable_command()->set_value( #ifdef __APPLE__ // Use logging mode with 30,000 samples with no interval. "top -l 30000 -s 0 2>&1 > /dev/null & " #else // Batch mode, with 30,000 samples with no interval. "top -b -d 0 -n 30000 2>&1 > /dev/null & " #endif "touch " + file + "; " // Signals that the top command is running. "sleep 60"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore rest for now. Future<ContainerID> containerId; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _, _)) .WillOnce(DoAll(FutureArg<0>(&containerId), Invoke(&containerizer, &MockExternalContainerizer::_launch))); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(containerId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Wait for the task to begin inducing cpu time. while (!os::exists(file)); ExecutorID executorId; executorId.set_value(task.task_id().value()); // We'll wait up to 10 seconds for the child process to induce // 1/8 of a second of user and system cpu time in total. // TODO(bmahler): Also induce rss memory consumption, by re-using // the balloon framework. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer.usage(containerId.get()); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! // NOTE: We are currently getting dummy-data from the test- // containerizer python script matching these expectations. // TODO(tillt): Consider working with real data. if (statistics.cpus_user_time_secs() >= 0.120 && statistics.cpus_system_time_secs() >= 0.05 && statistics.mem_rss_bytes() >= 1024u) { break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(10)); EXPECT_GE(statistics.cpus_user_time_secs(), 0.120); EXPECT_GE(statistics.cpus_system_time_secs(), 0.05); EXPECT_EQ(statistics.cpus_limit(), cpus.get()); EXPECT_GE(statistics.mem_rss_bytes(), 1024u); EXPECT_EQ(statistics.mem_limit_bytes(), mem.get().bytes()); EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.killTask(task.task_id()); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); this->Shutdown(); }
// This test launches a container which has an image and joins host // network, and then verifies that the container can access Internet. TEST_F(CniIsolatorTest, ROOT_INTERNET_CURL_LaunchContainerInHostNetwork) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "docker/runtime,filesystem/linux"; flags.image_providers = "docker"; flags.docker_store_dir = path::join(sandbox.get(), "store"); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers->size()); const Offer& offer = offers.get()[0]; // NOTE: We use a non-shell command here because 'sh' might not be // in the PATH. 'alpine' does not specify env PATH in the image. CommandInfo command; command.set_shell(false); command.set_value("/bin/ping"); command.add_arguments("/bin/ping"); command.add_arguments("-c1"); command.add_arguments("google.com"); TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128").get(), command); Image image; image.set_type(Image::DOCKER); image.mutable_docker()->set_name("alpine"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); container->mutable_mesos()->mutable_image()->CopyFrom(image); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offer.id(), {task}); AWAIT_READY_FOR(statusRunning, Seconds(60)); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(task.task_id(), statusFinished->task_id()); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); driver.stop(); driver.join(); }
// This test ensures that a task will transition straight from `TASK_KILLING` to // `TASK_KILLED`, even if the health check begins to fail during the kill policy // grace period. // // TODO(gkleiman): this test takes about 7 seconds to run, consider using mock // tasks and health checkers to speed it up. TEST_P(CommandExecutorTest, NoTransitionFromKillingToRunning) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); const string command = strings::format( "%s %s --sleep_duration=15", getTestHelperPath("test-helper"), KillPolicyTestHelper::NAME).get(); TaskInfo task = createTask(offers->front(), command); // Create a health check that succeeds until a temporary file is removed. Try<string> temporaryPath = os::mktemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(temporaryPath); const string tmpPath = temporaryPath.get(); HealthCheck healthCheck; healthCheck.set_type(HealthCheck::COMMAND); healthCheck.mutable_command()->set_value("ls " + tmpPath + " >/dev/null"); healthCheck.set_delay_seconds(0); healthCheck.set_grace_period_seconds(0); healthCheck.set_interval_seconds(0); task.mutable_health_check()->CopyFrom(healthCheck); // Set the kill policy grace period to 5 seconds. KillPolicy killPolicy; killPolicy.mutable_grace_period()->set_nanoseconds(Seconds(5).ns()); task.mutable_kill_policy()->CopyFrom(killPolicy); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusHealthy; Future<TaskStatus> statusKilling; Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusHealthy)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.launchTasks(offers->front().id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusHealthy); EXPECT_EQ(TASK_RUNNING, statusHealthy.get().state()); EXPECT_TRUE(statusHealthy.get().has_healthy()); EXPECT_TRUE(statusHealthy.get().healthy()); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); EXPECT_FALSE(statusKilling.get().has_healthy()); // Remove the temporary file, so that the health check fails. os::rm(tmpPath); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); EXPECT_FALSE(statusKilled.get().has_healthy()); driver.stop(); driver.join(); }
// This test verifies that a reconciliation request that comes before // '_launchTasks()' is ignored. TEST_F(MasterAuthorizationTest, ReconcileTask) { MockAuthorizer authorizer; Try<PID<Master> > master = StartMaster(&authorizer); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID); vector<TaskInfo> tasks; tasks.push_back(task); // Return a pending future from authorizer. Future<Nothing> future; Promise<bool> promise; EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTasks&>())) .WillOnce(DoAll(FutureSatisfy(&future), Return(promise.future()))); driver.launchTasks(offers.get()[0].id(), tasks); // Wait until authorization is in progress. AWAIT_READY(future); // Scheduler shouldn't get an update from reconciliation. EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); Future<ReconcileTasksMessage> reconcileTasksMessage = FUTURE_PROTOBUF(ReconcileTasksMessage(), _, _); vector<TaskStatus> statuses; TaskStatus status; status.mutable_task_id()->CopyFrom(task.task_id()); status.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); status.set_state(TASK_STAGING); statuses.push_back(status); driver.reconcileTasks(statuses); AWAIT_READY(reconcileTasksMessage); // Make sure the framework doesn't receive any update. Clock::pause(); Clock::settle(); // Now stop the framework. driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_Statistics) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; flags.agent_subsystems = None(); Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> running; Future<TaskStatus> killed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&running)) .WillOnce(FutureArg<1>(&killed)) .WillRepeatedly(Return()); // Ignore subsequent updates. driver.launchTasks(offer.id(), {task}); AWAIT_READY(running); EXPECT_EQ(task.task_id(), running.get().task_id()); EXPECT_EQ(TASK_RUNNING, running.get().state()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers.get().size()); ContainerID containerId = *(containers.get().begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); if (usage.get().mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Pause the clock to ensure that the reaper doesn't reap the exited // command executor and inform the containerizer/slave. Clock::pause(); Clock::settle(); // Stop the memory-hammering task. driver.killTask(task.task_id()); AWAIT_READY_FOR(killed, Seconds(120)); EXPECT_EQ(task.task_id(), killed->task_id()); EXPECT_EQ(TASK_KILLED, killed->state()); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_low_pressure_counter(), usage.get().mem_medium_pressure_counter()); EXPECT_GE(usage.get().mem_medium_pressure_counter(), usage.get().mem_critical_pressure_counter()); Clock::resume(); driver.stop(); driver.join(); }
// This test ensures that reconciliation requests for tasks that are // pending are exposed in reconciliation. TEST_F(ReconciliationTest, PendingTask) { MockAuthorizer authorizer; Try<PID<Master> > master = StartMaster(&authorizer); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); // Wait for the slave to register and get the slave id. AWAIT_READY(slaveRegisteredMessage); const SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Return a pending future from authorizer. Future<Nothing> authorize; Promise<bool> promise; EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTask&>())) .WillOnce(DoAll(FutureSatisfy(&authorize), Return(promise.future()))); TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID); vector<TaskInfo> tasks; tasks.push_back(task); driver.launchTasks(offers.get()[0].id(), tasks); // Wait until authorization is in progress. AWAIT_READY(authorize); // First send an implicit reconciliation request for this task. Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update)); vector<TaskStatus> statuses; driver.reconcileTasks(statuses); AWAIT_READY(update); EXPECT_EQ(TASK_STAGING, update.get().state()); EXPECT_TRUE(update.get().has_slave_id()); // Now send an explicit reconciliation request for this task. Future<TaskStatus> update2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update2)); TaskStatus status; status.mutable_task_id()->CopyFrom(task.task_id()); status.mutable_slave_id()->CopyFrom(slaveId); status.set_state(TASK_STAGING); statuses.push_back(status); driver.reconcileTasks(statuses); AWAIT_READY(update2); EXPECT_EQ(TASK_STAGING, update2.get().state()); EXPECT_TRUE(update2.get().has_slave_id()); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test verifies that a 'killTask()' that comes before // '_launchTasks()' is called results in TASK_KILLED. TEST_F(MasterAuthorizationTest, KillTask) { MockAuthorizer authorizer; Try<PID<Master> > master = StartMaster(&authorizer); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID); vector<TaskInfo> tasks; tasks.push_back(task); // Return a pending future from authorizer. Future<Nothing> future; Promise<bool> promise; EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTasks&>())) .WillOnce(DoAll(FutureSatisfy(&future), Return(promise.future()))); driver.launchTasks(offers.get()[0].id(), tasks); // Wait until authorization is in progress. AWAIT_READY(future); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Now kill the task. driver.killTask(task.task_id()); // Framework should get a TASK_KILLED right away. AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); Future<Nothing> resourcesUnused = FUTURE_DISPATCH(_, &AllocatorProcess::resourcesUnused); // Now complete authorization. promise.set(true); // No task launch should happen resulting in all resources being // returned to the allocator. AWAIT_READY(resourcesUnused); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
void launch(const TaskInfo& _task) { CHECK_EQ(SUBSCRIBED, state); if (launched) { update( _task.task_id(), TASK_FAILED, None(), "Attempted to run multiple tasks using a \"command\" executor"); return; } // Capture the task. task = _task; // Capture the TaskID. taskId = task->task_id(); // Capture the kill policy. if (task->has_kill_policy()) { killPolicy = task->kill_policy(); } // Determine the command to launch the task. CommandInfo command; if (taskCommand.isSome()) { // Get CommandInfo from a JSON string. Try<JSON::Object> object = JSON::parse<JSON::Object>(taskCommand.get()); if (object.isError()) { ABORT("Failed to parse JSON: " + object.error()); } Try<CommandInfo> parse = protobuf::parse<CommandInfo>(object.get()); if (parse.isError()) { ABORT("Failed to parse protobuf: " + parse.error()); } command = parse.get(); } else if (task->has_command()) { command = task->command(); } else { LOG(FATAL) << "Expecting task '" << task->task_id() << "' " << "to have a command"; } // TODO(jieyu): For now, we just fail the executor if the task's // CommandInfo is not valid. The framework will receive // TASK_FAILED for the task, and will most likely find out the // cause with some debugging. This is a temporary solution. A more // correct solution is to perform this validation at master side. if (command.shell()) { CHECK(command.has_value()) << "Shell command of task '" << task->task_id() << "' is not specified!"; } else { CHECK(command.has_value()) << "Executable of task '" << task->task_id() << "' is not specified!"; } cout << "Starting task " << task->task_id() << endl; // Prepare the argv before fork as it's not async signal safe. char **argv = new char*[command.arguments().size() + 1]; for (int i = 0; i < command.arguments().size(); i++) { argv[i] = (char*) command.arguments(i).c_str(); } argv[command.arguments().size()] = nullptr; #ifndef __WINDOWS__ pid = launchTaskPosix( task.get(), command, user, argv, rootfs, sandboxDirectory, workingDirectory); #else // A Windows process is started using the `CREATE_SUSPENDED` flag // and is part of a job object. While the process handle is kept // open the reap function will work. PROCESS_INFORMATION processInformation = launchTaskWindows( task.get(), command, argv, rootfs); pid = processInformation.dwProcessId; ::ResumeThread(processInformation.hThread); CloseHandle(processInformation.hThread); processHandle = processInformation.hProcess; #endif delete[] argv; cout << "Forked command at " << pid << endl; if (task->has_health_check()) { launchHealthCheck(task.get()); } // Monitor this process. process::reap(pid) .onAny(defer(self(), &Self::reaped, pid, lambda::_1)); update(task->task_id(), TASK_RUNNING); launched = true; }
//when the task before starting, //it should check task.data() to determin //what it will do, whether copy config? //whether start fileserver? // //task.data() here format is : //<isInitialMonNode>.<TaskType> void CephExecutor::launchTask(ExecutorDriver* driver, const TaskInfo& task) { //set class member localSharedConfDirRoot string cmd = "echo ~"; string r = runShellCommand(cmd); localSharedConfigDirRoot = r == " " ? r :"/root"; LOG(INFO) << "localSharedConfigDirRoot is " << localSharedConfigDirRoot; bool needCopyConfig = true; bool needStartFileServer = false; int taskType; if (task.has_data()){ LOG(INFO) << "Got TaskInfo data: " << task.data(); vector<string> tokens = StringUtil::explode(task.data(),'.'); //split by '.', the first part is isInitialMonNode, //second part is used for task type if (tokens[0] == "1"){ needCopyConfig = false; } taskType = lexical_cast<int>(tokens[1]); } string localMountDir = localSharedConfigDirRoot + "/" +localConfigDirName; TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); //make local shared dir, all type of task need this: //TODO: check if already exists valid dirctory tree if (!createLocalSharedConfigDir(localConfigDirName)) { LOG(INFO) << "created local shared directory failed!"; status.set_state(TASK_FAILED); driver->sendStatusUpdate(status); return; } LOG(INFO) << "Create directory tree done."; //mount shared local dir if (needCopyConfig) { string abPath = localMountDir + "/" + "/etc/ceph/"; if (!copySharedConfigDir(abPath)) { LOG(INFO) << "Copy shared config file failed!"; status.set_state(TASK_FAILED); driver->sendStatusUpdate(status); return; } LOG(INFO) << "Copy config files done."; } //run docker command for MON and RADOSGW string cName = getContainerName(task.task_id().value()); //set class member containerName, and myTaskId //TODO: see if put these in registed is more proper containerName = cName; myTaskId = task.task_id(); //TODO: kill existing container in case conflict runShellCommand("docker rm -f " + containerName); string dockerCommand; switch (taskType) { case static_cast<int>(TaskType::MON): needStartFileServer = true; dockerCommand = constructMonCommand( localMountDir, cName); downloadDockerImage("ceph/mon"); break; case static_cast<int>(TaskType::OSD): downloadDockerImage("ceph/osd"); //Will get osdId in FrameworkMessage dockerCommand = ""; status.set_state(TASK_STARTING); driver->sendStatusUpdate(status); return; case static_cast<int>(TaskType::RADOSGW): downloadDockerImage("ceph/radosgw"); dockerCommand = constructRADOSGWCommand( localMountDir, cName); break; } if (needStartFileServer) { thread fileServerThread(fileServer, 7777, localSharedConfigDirRoot + "/" + localConfigDirName + "/etc/ceph/"); fileServerThread.detach(); LOG(INFO) << "Mon fileserver started"; } LOG(INFO) << "Stating container with command: "; LOG(INFO) << dockerCommand; //fork a thread to enable docker long running. //TODO: <thread> here seems not working, figure it out //to find a better way myPID = fork(); if (0 == myPID){ //child long running docker thread //TODO: we use fork here. Need to check why below line will hung the executor //thread(&CephExecutor::startLongRunning,*this,"docker", dockerCommand).detach(); startLongRunning("docker",dockerCommand); } else { //parent thread //check if started normally bool started = block_until_started(cName, "30"); if (started) { LOG(INFO) << "Starting task " << task.task_id().value(); status.set_state(TASK_RUNNING); } else { LOG(INFO) << "Failed to start task " << task.task_id().value(); status.set_state(TASK_FAILED); } driver->sendStatusUpdate(status); } }
void launch(const TaskInfo& _task) { CHECK_EQ(SUBSCRIBED, state); if (launched) { update( _task.task_id(), TASK_FAILED, None(), "Attempted to run multiple tasks using a \"command\" executor"); return; } // Capture the task. task = _task; // Capture the TaskID. taskId = task->task_id(); // Capture the kill policy. if (task->has_kill_policy()) { killPolicy = task->kill_policy(); } // Determine the command to launch the task. CommandInfo command; if (taskCommand.isSome()) { // Get CommandInfo from a JSON string. Try<JSON::Object> object = JSON::parse<JSON::Object>(taskCommand.get()); if (object.isError()) { cerr << "Failed to parse JSON: " << object.error() << endl; abort(); } Try<CommandInfo> parse = protobuf::parse<CommandInfo>(object.get()); if (parse.isError()) { cerr << "Failed to parse protobuf: " << parse.error() << endl; abort(); } command = parse.get(); } else if (task->has_command()) { command = task->command(); } else { CHECK_SOME(override) << "Expecting task '" << task->task_id() << "' to have a command!"; } if (override.isNone()) { // TODO(jieyu): For now, we just fail the executor if the task's // CommandInfo is not valid. The framework will receive // TASK_FAILED for the task, and will most likely find out the // cause with some debugging. This is a temporary solution. A more // correct solution is to perform this validation at master side. if (command.shell()) { CHECK(command.has_value()) << "Shell command of task '" << task->task_id() << "' is not specified!"; } else { CHECK(command.has_value()) << "Executable of task '" << task->task_id() << "' is not specified!"; } } cout << "Starting task " << task->task_id() << endl; // TODO(benh): Clean this up with the new 'Fork' abstraction. // Use pipes to determine which child has successfully changed // session. This is needed as the setsid call can fail from other // processes having the same group id. int pipes[2]; if (pipe(pipes) < 0) { perror("Failed to create a pipe"); abort(); } // Set the FD_CLOEXEC flags on these pipes. Try<Nothing> cloexec = os::cloexec(pipes[0]); if (cloexec.isError()) { cerr << "Failed to cloexec(pipe[0]): " << cloexec.error() << endl; abort(); } cloexec = os::cloexec(pipes[1]); if (cloexec.isError()) { cerr << "Failed to cloexec(pipe[1]): " << cloexec.error() << endl; abort(); } if (rootfs.isSome()) { // The command executor is responsible for chrooting into the // root filesystem and changing the user before exec-ing the // user process. #ifdef __linux__ Result<string> user = os::user(); if (user.isError()) { cerr << "Failed to get current user: "******"Current username is not found" << endl; abort(); } else if (user.get() != "root") { cerr << "The command executor requires root with rootfs" << endl; abort(); } #else cerr << "Not expecting root volume with non-linux platform." << endl; abort(); #endif // __linux__ } // Prepare the argv before fork as it's not async signal safe. char **argv = new char*[command.arguments().size() + 1]; for (int i = 0; i < command.arguments().size(); i++) { argv[i] = (char*) command.arguments(i).c_str(); } argv[command.arguments().size()] = NULL; // Prepare the command log message. string commandString; if (override.isSome()) { char** argv = override.get(); // argv is guaranteed to be NULL terminated and we rely on // that fact to print command to be executed. for (int i = 0; argv[i] != NULL; i++) { commandString += string(argv[i]) + " "; } } else if (command.shell()) {
// Test that we can run the mesos-executor and specify an "override" // command to use via the --override argument. TEST_F(SlaveTest, MesosExecutorWithOverride) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); TestContainerizer containerizer; Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); CommandInfo command; command.set_value("sleep 10"); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); // Expect the launch and just assume it was sucessful since we'll be // launching the executor ourselves manually below. Future<Nothing> launch; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _)) .WillOnce(DoAll(FutureSatisfy(&launch), Return(true))); // Expect wait after launch is called but don't return anything // until after we've finished everything below. Future<Nothing> wait; process::Promise<containerizer::Termination> promise; EXPECT_CALL(containerizer, wait(_)) .WillOnce(DoAll(FutureSatisfy(&wait), Return(promise.future()))); driver.launchTasks(offers.get()[0].id(), tasks); // Once we get the launch the mesos-executor with --override. AWAIT_READY(launch); // Set up fake environment for executor. map<string, string> environment; environment["MESOS_SLAVE_PID"] = stringify(slave.get()); environment["MESOS_SLAVE_ID"] = stringify(offers.get()[0].slave_id()); environment["MESOS_FRAMEWORK_ID"] = stringify(offers.get()[0].framework_id()); environment["MESOS_EXECUTOR_ID"] = stringify(task.task_id()); environment["MESOS_DIRECTORY"] = ""; // Create temporary file to store validation string. If command is // succesfully replaced, this file will end up containing the string // 'Hello World\n'. Otherwise, the original task command i.e. // 'sleep' will be called and the test will fail. Try<std::string> file = os::mktemp(); ASSERT_SOME(file); string executorCommand = path::join(tests::flags.build_dir, "src", "mesos-executor") + " --override -- /bin/sh -c 'echo hello world >" + file.get() + "'"; // Expect two status updates, one for once the mesos-executor says // the task is running and one for after our overridden command // above finishes. Future<TaskStatus> status1, status2; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status1)) .WillOnce(FutureArg<1>(&status2)); Try<process::Subprocess> executor = process::subprocess( executorCommand, process::Subprocess::PIPE(), process::Subprocess::PIPE(), process::Subprocess::PIPE(), environment); ASSERT_SOME(executor); // Scheduler should receive the TASK_RUNNING update. AWAIT_READY(status1); ASSERT_EQ(TASK_RUNNING, status1.get().state()); AWAIT_READY(status2); ASSERT_EQ(TASK_FINISHED, status2.get().state()); AWAIT_READY(wait); containerizer::Termination termination; termination.set_killed(false); termination.set_message("Killed executor"); termination.set_status(0); promise.set(termination); driver.stop(); driver.join(); AWAIT_READY(executor.get().status()); // Verify file contents. Try<std::string> validate = os::read(file.get()); ASSERT_SOME(validate); EXPECT_EQ(validate.get(), "hello world\n"); os::rm(file.get()); Shutdown(); }
// This is an end-to-end test that verfies that the slave returns the // correct ResourceUsage based on the currently running executors, and // the values get from the statistics endpoint are as expected. TEST_F(MonitorIntegrationTest, RunningExecutor) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_FALSE(offers.get().empty()); const Offer& offer = offers.get()[0]; // Launch a task and wait until it is in RUNNING status. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:32").get(), "sleep 1000"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offer.id(), {task}); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Hit the statistics endpoint and expect the response contains the // resource statistics for the running container. UPID upid("monitor", process::address()); Future<http::Response> response = http::get(upid, "statistics"); AWAIT_READY(response); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ( "application/json", "Content-Type", response); // Verify that the statistics in the response contains the proper // resource limits for the container. Try<JSON::Value> value = JSON::parse(response.get().body); ASSERT_SOME(value); Try<JSON::Value> expected = JSON::parse(strings::format( "[{" "\"statistics\":{" "\"cpus_limit\":%g," "\"mem_limit_bytes\":%lu" "}" "}]", 1 + slave::DEFAULT_EXECUTOR_CPUS, (Megabytes(32) + slave::DEFAULT_EXECUTOR_MEM).bytes()).get()); ASSERT_SOME(expected); EXPECT_TRUE(value.get().contains(expected.get())); driver.stop(); driver.join(); Shutdown(); }
void launchTask(ExecutorDriver* driver, const TaskInfo& task) { CHECK_EQ(REGISTERED, state); if (launched) { TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_FAILED); status.set_message( "Attempted to run multiple tasks using a \"command\" executor"); driver->sendStatusUpdate(status); return; } // Capture the TaskID. taskId = task.task_id(); // Determine the command to launch the task. CommandInfo command; if (taskCommand.isSome()) { // Get CommandInfo from a JSON string. Try<JSON::Object> object = JSON::parse<JSON::Object>(taskCommand.get()); if (object.isError()) { cerr << "Failed to parse JSON: " << object.error() << endl; abort(); } Try<CommandInfo> parse = protobuf::parse<CommandInfo>(object.get()); if (parse.isError()) { cerr << "Failed to parse protobuf: " << parse.error() << endl; abort(); } command = parse.get(); } else if (task.has_command()) { command = task.command(); } else { CHECK_SOME(override) << "Expecting task '" << task.task_id() << "' to have a command!"; } if (override.isNone()) { // TODO(jieyu): For now, we just fail the executor if the task's // CommandInfo is not valid. The framework will receive // TASK_FAILED for the task, and will most likely find out the // cause with some debugging. This is a temporary solution. A more // correct solution is to perform this validation at master side. if (command.shell()) { CHECK(command.has_value()) << "Shell command of task '" << task.task_id() << "' is not specified!"; } else { CHECK(command.has_value()) << "Executable of task '" << task.task_id() << "' is not specified!"; } } cout << "Starting task " << task.task_id() << endl; // TODO(benh): Clean this up with the new 'Fork' abstraction. // Use pipes to determine which child has successfully changed // session. This is needed as the setsid call can fail from other // processes having the same group id. int pipes[2]; if (pipe(pipes) < 0) { perror("Failed to create a pipe"); abort(); } // Set the FD_CLOEXEC flags on these pipes. Try<Nothing> cloexec = os::cloexec(pipes[0]); if (cloexec.isError()) { cerr << "Failed to cloexec(pipe[0]): " << cloexec.error() << endl; abort(); } cloexec = os::cloexec(pipes[1]); if (cloexec.isError()) { cerr << "Failed to cloexec(pipe[1]): " << cloexec.error() << endl; abort(); } Option<string> rootfs; if (sandboxDirectory.isSome()) { // If 'sandbox_diretory' is specified, that means the user // task specifies a root filesystem, and that root filesystem has // already been prepared at COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH. // The command executor is responsible for mounting the sandbox // into the root filesystem, chrooting into it and changing the // user before exec-ing the user process. // // TODO(gilbert): Consider a better way to detect if a root // filesystem is specified for the command task. #ifdef __linux__ Result<string> user = os::user(); if (user.isError()) { cerr << "Failed to get current user: "******"Current username is not found" << endl; abort(); } else if (user.get() != "root") { cerr << "The command executor requires root with rootfs" << endl; abort(); } rootfs = path::join( os::getcwd(), COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH); string sandbox = path::join(rootfs.get(), sandboxDirectory.get()); if (!os::exists(sandbox)) { Try<Nothing> mkdir = os::mkdir(sandbox); if (mkdir.isError()) { cerr << "Failed to create sandbox mount point at '" << sandbox << "': " << mkdir.error() << endl; abort(); } } // Mount the sandbox into the container rootfs. // We need to perform a recursive mount because we want all the // volume mounts in the sandbox to be also mounted in the container // root filesystem. However, since the container root filesystem // is also mounted in the sandbox, after the recursive mount we // also need to unmount the root filesystem in the mounted sandbox. Try<Nothing> mount = fs::mount( os::getcwd(), sandbox, None(), MS_BIND | MS_REC, NULL); if (mount.isError()) { cerr << "Unable to mount the work directory into container " << "rootfs: " << mount.error() << endl;; abort(); } // Umount the root filesystem path in the mounted sandbox after // the recursive mount. Try<Nothing> unmountAll = fs::unmountAll(path::join( sandbox, COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH)); if (unmountAll.isError()) { cerr << "Unable to unmount rootfs under mounted sandbox: " << unmountAll.error() << endl; abort(); } #else cerr << "Not expecting root volume with non-linux platform." << endl; abort(); #endif // __linux__ } // Prepare the argv before fork as it's not async signal safe. char **argv = new char*[command.arguments().size() + 1]; for (int i = 0; i < command.arguments().size(); i++) { argv[i] = (char*) command.arguments(i).c_str(); } argv[command.arguments().size()] = NULL; // Prepare the command log message. string commandString; if (override.isSome()) { char** argv = override.get(); // argv is guaranteed to be NULL terminated and we rely on // that fact to print command to be executed. for (int i = 0; argv[i] != NULL; i++) { commandString += string(argv[i]) + " "; } } else if (command.shell()) {
// This test ensures that the command executor sends TASK_KILLING // to frameworks that support the capability. TEST_F(CommandExecutorTest, TaskKillingCapability) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); Future<TaskStatus> statusKilling, statusKilled; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }