Try<Owned<HealthChecker>> HealthChecker::create( const HealthCheck& check, const UPID& executor, const TaskID& taskID) { // Validate the 'HealthCheck' protobuf. if (check.has_http() && check.has_command()) { return Error("Both 'http' and 'command' health check requested"); } if (!check.has_http() && !check.has_command()) { return Error("Expecting one of 'http' or 'command' health check"); } Owned<HealthCheckerProcess> process(new HealthCheckerProcess( check, executor, taskID)); return Owned<HealthChecker>(new HealthChecker(process)); }
// This test ensures that a task will transition straight from `TASK_KILLING` to // `TASK_KILLED`, even if the health check begins to fail during the kill policy // grace period. // // TODO(gkleiman): this test takes about 7 seconds to run, consider using mock // tasks and health checkers to speed it up. TEST_P(CommandExecutorTest, NoTransitionFromKillingToRunning) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); const string command = strings::format( "%s %s --sleep_duration=15", getTestHelperPath("test-helper"), KillPolicyTestHelper::NAME).get(); TaskInfo task = createTask(offers->front(), command); // Create a health check that succeeds until a temporary file is removed. Try<string> temporaryPath = os::mktemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(temporaryPath); const string tmpPath = temporaryPath.get(); HealthCheck healthCheck; healthCheck.set_type(HealthCheck::COMMAND); healthCheck.mutable_command()->set_value("ls " + tmpPath + " >/dev/null"); healthCheck.set_delay_seconds(0); healthCheck.set_grace_period_seconds(0); healthCheck.set_interval_seconds(0); task.mutable_health_check()->CopyFrom(healthCheck); // Set the kill policy grace period to 5 seconds. KillPolicy killPolicy; killPolicy.mutable_grace_period()->set_nanoseconds(Seconds(5).ns()); task.mutable_kill_policy()->CopyFrom(killPolicy); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusHealthy; Future<TaskStatus> statusKilling; Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusHealthy)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.launchTasks(offers->front().id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusHealthy); EXPECT_EQ(TASK_RUNNING, statusHealthy.get().state()); EXPECT_TRUE(statusHealthy.get().has_healthy()); EXPECT_TRUE(statusHealthy.get().healthy()); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); EXPECT_FALSE(statusKilling.get().has_healthy()); // Remove the temporary file, so that the health check fails. os::rm(tmpPath); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); EXPECT_FALSE(statusKilled.get().has_healthy()); driver.stop(); driver.join(); }
Option<Error> healthCheck(const HealthCheck& check) { if (!check.has_type()) { return Error("HealthCheck must specify 'type'"); } switch (check.type()) { case HealthCheck::COMMAND: { if (!check.has_command()) { return Error("Expecting 'command' to be set for command health check"); } const CommandInfo& command = check.command(); if (!command.has_value()) { string commandType = (command.shell() ? "'shell command'" : "'executable path'"); return Error("Command health check must contain " + commandType); } break; } case HealthCheck::HTTP: { if (!check.has_http()) { return Error("Expecting 'http' to be set for HTTP health check"); } const HealthCheck::HTTPCheckInfo& http = check.http(); if (http.has_scheme() && http.scheme() != "http" && http.scheme() != "https") { return Error( "Unsupported HTTP health check scheme: '" + http.scheme() + "'"); } if (http.has_path() && !strings::startsWith(http.path(), '/')) { return Error( "The path '" + http.path() + "' of HTTP health check must start with '/'"); } break; } case HealthCheck::TCP: { if (!check.has_tcp()) { return Error("Expecting 'tcp' to be set for TCP health check"); } break; } case HealthCheck::UNKNOWN: { return Error( "'" + HealthCheck::Type_Name(check.type()) + "'" " is not a valid health check type"); } } return None(); }