// This test ensures that a task will transition straight from `TASK_KILLING` to // `TASK_KILLED`, even if the health check begins to fail during the kill policy // grace period. // // TODO(gkleiman): this test takes about 7 seconds to run, consider using mock // tasks and health checkers to speed it up. TEST_P(CommandExecutorTest, NoTransitionFromKillingToRunning) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); const string command = strings::format( "%s %s --sleep_duration=15", getTestHelperPath("test-helper"), KillPolicyTestHelper::NAME).get(); TaskInfo task = createTask(offers->front(), command); // Create a health check that succeeds until a temporary file is removed. Try<string> temporaryPath = os::mktemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(temporaryPath); const string tmpPath = temporaryPath.get(); HealthCheck healthCheck; healthCheck.set_type(HealthCheck::COMMAND); healthCheck.mutable_command()->set_value("ls " + tmpPath + " >/dev/null"); healthCheck.set_delay_seconds(0); healthCheck.set_grace_period_seconds(0); healthCheck.set_interval_seconds(0); task.mutable_health_check()->CopyFrom(healthCheck); // Set the kill policy grace period to 5 seconds. KillPolicy killPolicy; killPolicy.mutable_grace_period()->set_nanoseconds(Seconds(5).ns()); task.mutable_kill_policy()->CopyFrom(killPolicy); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusHealthy; Future<TaskStatus> statusKilling; Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusHealthy)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.launchTasks(offers->front().id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusHealthy); EXPECT_EQ(TASK_RUNNING, statusHealthy.get().state()); EXPECT_TRUE(statusHealthy.get().has_healthy()); EXPECT_TRUE(statusHealthy.get().healthy()); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); EXPECT_FALSE(statusKilling.get().has_healthy()); // Remove the temporary file, so that the health check fails. os::rm(tmpPath); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); EXPECT_FALSE(statusKilled.get().has_healthy()); driver.stop(); driver.join(); }
// This test ensures that the command executor sends TASK_KILLING // to frameworks that support the capability. // TODO(hausdorff): Enable test. The executor tests use the replicated log // by default. This is not currently supported on Windows, so they will all // fail until that changes. TEST_P_TEMP_DISABLED_ON_WINDOWS(CommandExecutorTest, TaskKillingCapability) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); Future<TaskStatus> statusKilling, statusKilled; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }
// Tests that the task fails when it attempts to write to a persistent volume // mounted as read-only. Note that although we use a shared persistent volume, // the behavior is the same for non-shared persistent volumes. TEST_F(LinuxFilesystemIsolatorMesosTest, ROOT_WriteAccessSharedPersistentVolumeReadOnlyMode) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.resources = "cpus:2;mem:128;disk(role1):128"; flags.isolation = "filesystem/linux,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, "role1"); frameworkInfo.add_capabilities()->set_type( FrameworkInfo::Capability::SHARED_RESOURCES); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); // We create a shared volume which shall be used by the task to // write to that volume. Resource volume = createPersistentVolume( Megabytes(4), "role1", "id1", "volume_path", None(), None(), frameworkInfo.principal(), true); // Shared volume. // The task uses the shared volume as read-only. Resource roVolume = volume; roVolume.mutable_disk()->mutable_volume()->set_mode(Volume::RO); Resources taskResources = Resources::parse("cpus:1;mem:64;disk(role1):1").get() + roVolume; TaskInfo task = createTask( offers.get()[0].slave_id(), taskResources, "echo hello > volume_path/file"); // The task fails to write to the volume since the task's resources // intends to use the volume as read-only. Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; Future<TaskStatus> statusFailed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFailed)); driver.acceptOffers( {offers.get()[0].id()}, {CREATE(volume), LAUNCH({task})}); AWAIT_READY(statusStarting); EXPECT_EQ(task.task_id(), statusStarting->task_id()); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFailed); EXPECT_EQ(task.task_id(), statusFailed->task_id()); EXPECT_EQ(TASK_FAILED, statusFailed->state()); driver.stop(); driver.join(); }
int main(int argc, char** argv) { Flags flags; Try<flags::Warnings> load = flags.load("MESOS_EXAMPLE_", argc, argv); if (load.isError()) { std::cerr << flags.usage(load.error()) << std::endl; return EXIT_FAILURE; } if (flags.help) { std::cout << flags.usage() << std::endl; return EXIT_SUCCESS; } mesos::internal::logging::initialize(argv[0], false); // Log any flag warnings (after logging is initialized). foreach (const flags::Warning& warning, load->warnings) { LOG(WARNING) << warning.message; } if (flags.qps <= 0.0) { EXIT(EXIT_FAILURE) << "Flag '--qps' needs to be greater than zero"; } LoadGeneratorScheduler scheduler(flags.qps, flags.duration); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_principal(flags.principal); framework.set_name(FRAMEWORK_NAME); framework.set_checkpoint(flags.checkpoint); framework.add_roles(flags.role); framework.add_capabilities()->set_type( FrameworkInfo::Capability::RESERVATION_REFINEMENT); framework.set_checkpoint(flags.checkpoint); if (flags.master == "local") { // Configure master. os::setenv("MESOS_ROLES", flags.role); os::setenv("MESOS_AUTHENTICATE_FRAMEWORKS", stringify(flags.authenticate)); ACLs acls; ACL::RegisterFramework* acl = acls.add_register_frameworks(); acl->mutable_principals()->set_type(ACL::Entity::ANY); acl->mutable_roles()->add_values("*"); os::setenv("MESOS_ACLS", stringify(JSON::protobuf(acls))); } MesosSchedulerDriver* driver; if (flags.authenticate) { LOG(INFO) << "Enabling authentication for the framework"; Credential credential; credential.set_principal(flags.principal); if (flags.secret.isSome()) { credential.set_secret(flags.secret.get()); } driver = new MesosSchedulerDriver( &scheduler, framework, flags.master, credential); } else { driver = new MesosSchedulerDriver( &scheduler, framework, flags.master); } int status = driver->run() == DRIVER_STOPPED ? EXIT_SUCCESS : EXIT_FAILURE; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
int main(int argc, char** argv) { Flags flags; Try<flags::Warnings> load = flags.load("MESOS_", argc, argv); if (load.isError()) { cerr << flags.usage(load.error()) << endl; return EXIT_FAILURE; } if (flags.help) { cout << flags.usage() << endl; return EXIT_SUCCESS; } if (flags.master.isNone()) { cerr << flags.usage("Missing required option --master") << endl; return EXIT_FAILURE; } if (flags.qps.isNone()) { cerr << flags.usage("Missing required option --qps") << endl; return EXIT_FAILURE; } if (flags.qps.get() <= 0) { cerr << flags.usage("--qps needs to be greater than zero") << endl; return EXIT_FAILURE; } // We want the logger to catch failure signals. mesos::internal::logging::initialize(argv[0], flags, true); // Log any flag warnings (after logging is initialized). foreach (const flags::Warning& warning, load->warnings) { LOG(WARNING) << warning.message; } LoadGeneratorScheduler scheduler(flags.qps.get(), flags.duration); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name(FRAMEWORK_NAME); framework.add_capabilities()->set_type( FrameworkInfo::Capability::RESERVATION_REFINEMENT); const Option<string> checkpoint = os::getenv("MESOS_CHECKPOINT"); if (checkpoint.isSome()) { framework.set_checkpoint( numify<bool>(checkpoint.get()).get()); } MesosSchedulerDriver* driver; if (flags.authenticate) { cout << "Enabling authentication for the framework" << endl; if (flags.secret.isNone()) { cerr << "Expecting --secret when --authenticate is set" << endl; return EXIT_FAILURE; } string secret = flags.secret.get(); Credential credential; credential.set_principal(flags.principal); credential.set_secret(strings::trim(secret)); framework.set_principal(flags.principal); driver = new MesosSchedulerDriver( &scheduler, framework, flags.master.get(), credential); } else { framework.set_principal(flags.principal); driver = new MesosSchedulerDriver( &scheduler, framework, flags.master.get()); } int status = driver->run() == DRIVER_STOPPED ? EXIT_SUCCESS : EXIT_FAILURE; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }