int main(int argc, char** argv) { if (argc != 2) { cerr << "Usage: " << argv[0] << " <master>" << endl; return -1; } DockerNoExecutorScheduler scheduler; FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name("Docker No Executor Framework (C++)"); framework.set_checkpoint(true); MesosSchedulerDriver* driver; if (os::getenv("MESOS_AUTHENTICATE_FRAMEWORKS").isSome()) { cout << "Enabling authentication for the framework" << endl; Option<string> value = os::getenv("DEFAULT_PRINCIPAL"); if (value.isNone()) { EXIT(EXIT_FAILURE) << "Expecting authentication principal in the environment"; } Credential credential; credential.set_principal(value.get()); framework.set_principal(value.get()); value = os::getenv("DEFAULT_SECRET"); if (value.isNone()) { EXIT(EXIT_FAILURE) << "Expecting authentication secret in the environment"; } credential.set_secret(value.get()); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1], credential); } else { framework.set_principal("no-executor-framework-cpp"); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1]); } int status = driver->run() == DRIVER_STOPPED ? 0 : 1; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
int main(int argc, char** argv) { if (argc != 2) { cerr << "Usage: " << argv[0] << " <master>" << endl; return -1; } NoExecutorScheduler scheduler; FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name("No Executor Framework (C++)"); // TODO(vinod): Make checkpointing the default when it is default // on the slave. if (os::hasenv("MESOS_CHECKPOINT")) { cout << "Enabling checkpoint for the framework" << endl; framework.set_checkpoint(true); } MesosSchedulerDriver* driver; if (os::hasenv("MESOS_AUTHENTICATE")) { cout << "Enabling authentication for the framework" << endl; if (!os::hasenv("DEFAULT_PRINCIPAL")) { EXIT(1) << "Expecting authentication principal in the environment"; } if (!os::hasenv("DEFAULT_SECRET")) { EXIT(1) << "Expecting authentication secret in the environment"; } Credential credential; credential.set_principal(getenv("DEFAULT_PRINCIPAL")); credential.set_secret(getenv("DEFAULT_SECRET")); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1], credential); } else { driver = new MesosSchedulerDriver( &scheduler, framework, argv[1]); } int status = driver->run() == DRIVER_STOPPED ? 0 : 1; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
// This test verifies that status update manager ignores // unexpected ACK for an earlier update when it is waiting // for an ACK for another update. We do this by dropping ACKs // for the original update and sending a random ACK to the slave. TEST_F(StatusUpdateManagerTest, IgnoreUnexpectedStatusUpdateAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), master.get(), _); // Drop the ACKs, so that status update manager // retries the update. DROP_PROTOBUFS(StatusUpdateAcknowledgementMessage(), _, _); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); StatusUpdate update = statusUpdateMessage.get().update(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); Future<Nothing> unexpectedAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Now send an ACK with a random UUID. process::dispatch( slave.get(), &Slave::statusUpdateAcknowledgement, update.slave_id(), frameworkId, update.status().task_id(), UUID::random().toBytes()); AWAIT_READY(unexpectedAck); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that status update manager ignores // duplicate ACK for an earlier update when it is waiting // for an ACK for a later update. This could happen when the // duplicate ACK is for a retried update. TEST_F(StatusUpdateManagerTest, IgnoreDuplicateStatusUpdateAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop the first update, so that status update manager // resends the update. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), master.get(), _); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); StatusUpdate update = statusUpdateMessage.get().update(); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // This is the ACK for the retried update. Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(ack); // Now send TASK_FINISHED update so that the status update manager // is waiting for its ACK, which it never gets because we drop the // update. DROP_PROTOBUFS(StatusUpdateMessage(), master.get(), _); Future<Nothing> update2 = FUTURE_DISPATCH(_, &Slave::_statusUpdate); TaskStatus status2 = status.get(); status2.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(status2); AWAIT_READY(update2); // This is to catch the duplicate ack for TASK_RUNNING. Future<Nothing> duplicateAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Now send a duplicate ACK for the TASK_RUNNING update. process::dispatch( slave.get(), &Slave::statusUpdateAcknowledgement, update.slave_id(), frameworkId, update.status().task_id(), update.uuid()); AWAIT_READY(duplicateAck); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
TEST_F(StatusUpdateManagerTest, RetryStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), master.get(), _); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that if master receives a status update // for an already terminated task it forwards it without // changing the state of the task. TEST_F(StatusUpdateManagerTest, DuplicatedTerminalStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave>> slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Future<Nothing> _statusUpdateAcknowledgement2 = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); Clock::pause(); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // Ensure the scheduler receives TASK_KILLED. AWAIT_READY(update); EXPECT_EQ(TASK_KILLED, update.get().state()); // Ensure the slave properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement2); // Verify the latest task status. Future<process::http::Response> tasks = process::http::get(master.get(), "tasks"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(process::http::OK().status, tasks); AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", tasks); Try<JSON::Object> parse = JSON::parse<JSON::Object>(tasks.get().body); ASSERT_SOME(parse); Result<JSON::String> state = parse.get().find<JSON::String>("tasks[0].state"); ASSERT_SOME_EQ(JSON::String("TASK_FINISHED"), state); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// Test that memory pressure listening is restarted after recovery. TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_SlaveRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; flags.agent_subsystems = None(); Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; // Enable checkpointing for the framework. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> running; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&running)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(running); EXPECT_EQ(task.task_id(), running.get().task_id()); EXPECT_EQ(TASK_RUNNING, running.get().state()); // Wait for the ACK to be checkpointed. AWAIT_READY_FOR(_statusUpdateAcknowledgement, Seconds(120)); // We restart the slave to let it recover. slave.get()->terminate(); // Set up so we can wait until the new slave updates the container's // resources (this occurs after the executor has re-registered). Future<Nothing> update = FUTURE_DISPATCH(_, &MesosContainerizerProcess::update); // Use the same flags. _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); containerizer.reset(_containerizer.get()); Future<SlaveReregisteredMessage> reregistered = FUTURE_PROTOBUF(SlaveReregisteredMessage(), master.get()->pid, _); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); AWAIT_READY(reregistered); // Wait until the containerizer is updated. AWAIT_READY(update); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers.get().size()); ContainerID containerId = *(containers.get().begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); if (usage.get().mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Pause the clock to ensure that the reaper doesn't reap the exited // command executor and inform the containerizer/slave. Clock::pause(); Clock::settle(); Future<TaskStatus> killed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&killed)); // Stop the memory-hammering task. driver.killTask(task.task_id()); AWAIT_READY_FOR(killed, Seconds(120)); EXPECT_EQ(task.task_id(), killed->task_id()); EXPECT_EQ(TASK_KILLED, killed->state()); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_low_pressure_counter(), usage.get().mem_medium_pressure_counter()); EXPECT_GE(usage.get().mem_medium_pressure_counter(), usage.get().mem_critical_pressure_counter()); Clock::resume(); driver.stop(); driver.join(); }
int main(int argc, char** argv) { if (argc != 2) { cerr << "Usage: " << argv[0] << " <master>" << endl; return -1; } // Find this executable's directory to locate executor. string path = os::realpath(dirname(argv[0])).get(); string uri = path + "/long-lived-executor"; if (getenv("MESOS_BUILD_DIR")) { uri = string(getenv("MESOS_BUILD_DIR")) + "/src/long-lived-executor"; } ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.mutable_command()->set_value(uri); executor.set_name("Long Lived Executor (C++)"); executor.set_source("cpp_long_lived_framework"); LongLivedScheduler scheduler(executor); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name("Long Lived Framework (C++)"); // TODO(vinod): Make checkpointing the default when it is default // on the slave. if (os::hasenv("MESOS_CHECKPOINT")) { cout << "Enabling checkpoint for the framework" << endl; framework.set_checkpoint(true); } MesosSchedulerDriver* driver; if (os::hasenv("MESOS_AUTHENTICATE")) { cout << "Enabling authentication for the framework" << endl; if (!os::hasenv("DEFAULT_PRINCIPAL")) { EXIT(1) << "Expecting authentication principal in the environment"; } if (!os::hasenv("DEFAULT_SECRET")) { EXIT(1) << "Expecting authentication secret in the environment"; } Credential credential; credential.set_principal(getenv("DEFAULT_PRINCIPAL")); credential.set_secret(getenv("DEFAULT_SECRET")); framework.set_principal(getenv("DEFAULT_PRINCIPAL")); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1], credential); } else { framework.set_principal("long-lived-framework-cpp"); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1]); } int status = driver->run() == DRIVER_STOPPED ? 0 : 1; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
TEST_F(StatusUpdateManagerTest, CheckpointStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); // Ensure that both the status update and its acknowledgement are // correctly checkpointed. Try<list<string> > found = os::find(flags.work_dir, TASK_UPDATES_FILE); ASSERT_SOME(found); ASSERT_EQ(1u, found.get().size()); Try<int> fd = os::open(found.get().front(), O_RDONLY); ASSERT_SOME(fd); int updates = 0; int acks = 0; string uuid; Result<StatusUpdateRecord> record = None(); while (true) { record = ::protobuf::read<StatusUpdateRecord>(fd.get()); ASSERT_FALSE(record.isError()); if (record.isNone()) { // Reached EOF. break; } if (record.get().type() == StatusUpdateRecord::UPDATE) { EXPECT_EQ(TASK_RUNNING, record.get().update().status().state()); uuid = record.get().update().uuid(); updates++; } else { EXPECT_EQ(uuid, record.get().uuid()); acks++; } } ASSERT_EQ(1, updates); ASSERT_EQ(1, acks); close(fd.get()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
int main(int argc, char** argv) { Flags flags; Try<flags::Warnings> load = flags.load("MESOS_EXAMPLE_", argc, argv); if (load.isError()) { std::cerr << flags.usage(load.error()) << std::endl; return EXIT_FAILURE; } if (flags.help) { std::cout << flags.usage() << std::endl; return EXIT_SUCCESS; } mesos::internal::logging::initialize(argv[0], false); // Log any flag warnings (after logging is initialized). foreach (const flags::Warning& warning, load->warnings) { LOG(WARNING) << warning.message; } if (flags.qps <= 0.0) { EXIT(EXIT_FAILURE) << "Flag '--qps' needs to be greater than zero"; } LoadGeneratorScheduler scheduler(flags.qps, flags.duration); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_principal(flags.principal); framework.set_name(FRAMEWORK_NAME); framework.set_checkpoint(flags.checkpoint); framework.add_roles(flags.role); framework.add_capabilities()->set_type( FrameworkInfo::Capability::RESERVATION_REFINEMENT); framework.set_checkpoint(flags.checkpoint); if (flags.master == "local") { // Configure master. os::setenv("MESOS_ROLES", flags.role); os::setenv("MESOS_AUTHENTICATE_FRAMEWORKS", stringify(flags.authenticate)); ACLs acls; ACL::RegisterFramework* acl = acls.add_register_frameworks(); acl->mutable_principals()->set_type(ACL::Entity::ANY); acl->mutable_roles()->add_values("*"); os::setenv("MESOS_ACLS", stringify(JSON::protobuf(acls))); } MesosSchedulerDriver* driver; if (flags.authenticate) { LOG(INFO) << "Enabling authentication for the framework"; Credential credential; credential.set_principal(flags.principal); if (flags.secret.isSome()) { credential.set_secret(flags.secret.get()); } driver = new MesosSchedulerDriver( &scheduler, framework, flags.master, credential); } else { driver = new MesosSchedulerDriver( &scheduler, framework, flags.master); } int status = driver->run() == DRIVER_STOPPED ? EXIT_SUCCESS : EXIT_FAILURE; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
int main(int argc, char** argv) { if (argc != 2) { cerr << "Usage: " << argv[0] << " <master>" << endl; return -1; } // Find this executable's directory to locate executor. string uri; Option<string> value = os::getenv("MESOS_BUILD_DIR"); if (value.isSome()) { uri = path::join(value.get(), "src", "long-lived-executor"); } else { uri = path::join( os::realpath(Path(argv[0]).dirname()).get(), "long-lived-executor"); } ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.mutable_command()->set_value(uri); executor.set_name("Long Lived Executor (C++)"); executor.set_source("cpp_long_lived_framework"); LongLivedScheduler scheduler(executor); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name("Long Lived Framework (C++)"); value = os::getenv("MESOS_CHECKPOINT"); if (value.isSome()) { framework.set_checkpoint( numify<bool>(value.get()).get()); } MesosSchedulerDriver* driver; if (os::getenv("MESOS_AUTHENTICATE").isSome()) { cout << "Enabling authentication for the framework" << endl; value = os::getenv("DEFAULT_PRINCIPAL"); if (value.isNone()) { EXIT(1) << "Expecting authentication principal in the environment"; } Credential credential; credential.set_principal(value.get()); framework.set_principal(value.get()); value = os::getenv("DEFAULT_SECRET"); if (value.isNone()) { EXIT(1) << "Expecting authentication secret in the environment"; } credential.set_secret(value.get()); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1], credential); } else { framework.set_principal("long-lived-framework-cpp"); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1]); } int status = driver->run() == DRIVER_STOPPED ? 0 : 1; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
// This test verifies that persistent volumes are unmounted properly // after a checkpointed framework disappears and the slave restarts. // // TODO(jieyu): Even though the command task specifies a new // filesystem root, the executor (command executor) itself does not // change filesystem root (uses the host filesystem). We need to add a // test to test the scenario that the executor itself changes rootfs. TEST_F(LinuxFilesystemIsolatorMesosTest, ROOT_RecoverOrphanedPersistentVolume) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.resources = "cpus:2;mem:1024;disk(role1):1024"; flags.isolation = "filesystem/linux,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Fetcher fetcher(flags); Try<MesosContainerizer*> create = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(create); Owned<Containerizer> containerizer(create.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave( detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, "role1"); frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; string dir1 = path::join(sandbox.get(), "dir1"); ASSERT_SOME(os::mkdir(dir1)); Resource persistentVolume = createPersistentVolume( Megabytes(64), "role1", "id1", "path1", None(), None(), frameworkInfo.principal()); // Create a task that does nothing for a long time. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:512").get() + persistentVolume, "sleep 1000"); task.mutable_container()->CopyFrom(createContainerInfo( "test_image", {createVolumeHostPath("/tmp", dir1, Volume::RW)})); Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillRepeatedly(DoDefault()); Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Create the persistent volumes and launch task via `acceptOffers`. driver.acceptOffers( {offer.id()}, {CREATE(persistentVolume), LAUNCH({task})}); AWAIT_READY(statusStarting); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // Wait for the ACK to be checkpointed. AWAIT_READY(ack); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); ContainerID containerId = *containers->begin(); // Restart the slave. slave.get()->terminate(); // Wipe the slave meta directory so that the slave will treat the // above running task as an orphan. ASSERT_SOME(os::rmdir(slave::paths::getMetaRootDir(flags.work_dir))); Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover); // Recreate the containerizer using the same helper as above. containerizer.reset(); create = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(create); containerizer.reset(create.get()); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); // Wait until slave recovery is complete. AWAIT_READY(_recover); // Wait until the orphan containers are cleaned up. AWAIT_READY(containerizer->wait(containerId)); Try<fs::MountInfoTable> table = fs::MountInfoTable::read(); ASSERT_SOME(table); // All mount targets should be under this directory. string directory = slave::paths::getSandboxRootDir(flags.work_dir); // Verify that the orphaned container's persistent volume and // the rootfs are unmounted. foreach (const fs::MountInfoTable::Entry& entry, table->entries) { EXPECT_FALSE(strings::contains(entry.target, directory)) << "Target was not unmounted: " << entry.target; } driver.stop(); driver.join(); }
int main(int argc, char** argv) { Flags flags; Try<Nothing> load = flags.load("MESOS_", argc, argv); if (load.isError()) { cerr << flags.usage(load.error()) << endl; return EXIT_FAILURE; } if (flags.help) { cout << flags.usage() << endl; return EXIT_SUCCESS; } if (flags.master.isNone()) { cerr << flags.usage( "Missing required option --master") << endl; return EXIT_FAILURE; } if (flags.qps.isNone()) { cerr << flags.usage("Missing required option --qps") << endl; return EXIT_FAILURE; } if (flags.qps.get() <= 0) { cerr << flags.usage("--qps needs to be greater than zero") << endl; return EXIT_FAILURE; } // We want the logger to catch failure signals. mesos::internal::logging::initialize(argv[0], flags, true); LoadGeneratorScheduler scheduler(flags.qps.get(), flags.duration); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name("Load Generator Framework (C++)"); const Option<string> checkpoint = os::getenv("MESOS_CHECKPOINT"); if (checkpoint.isSome()) { framework.set_checkpoint( numify<bool>(checkpoint.get()).get()); } MesosSchedulerDriver* driver; if (flags.authenticate) { cout << "Enabling authentication for the framework" << endl; if (flags.secret.isNone()) { cerr << "Expecting --secret when --authenticate is set" << endl; return EXIT_FAILURE; } string secret = flags.secret.get(); Credential credential; credential.set_principal(flags.principal); credential.set_secret(strings::trim(secret)); framework.set_principal(flags.principal); driver = new MesosSchedulerDriver( &scheduler, framework, flags.master.get(), credential); } else { framework.set_principal(flags.principal); driver = new MesosSchedulerDriver( &scheduler, framework, flags.master.get()); } int status = driver->run() == DRIVER_STOPPED ? EXIT_SUCCESS : EXIT_SUCCESS; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
int main(int argc, char** argv) { // Find this executable's directory to locate executor. string path = os::realpath(dirname(argv[0])).get(); string uri = path + "/test-executor"; if (getenv("MESOS_BUILD_DIR")) { uri = string(getenv("MESOS_BUILD_DIR")) + "/src/test-executor"; } mesos::internal::logging::Flags flags; string role; flags.add(&role, "role", "Role to use when registering", "*"); Option<string> master; flags.add(&master, "master", "ip:port of master to connect"); Try<Nothing> load = flags.load(None(), argc, argv); if (load.isError()) { cerr << load.error() << endl; usage(argv[0], flags); exit(1); } else if (master.isNone()) { cerr << "Missing --master" << endl; usage(argv[0], flags); exit(1); } ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.mutable_command()->set_value(uri); executor.set_name("Test Executor (C++)"); executor.set_source("cpp_test"); TestScheduler scheduler(executor, role); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name("Test Framework (C++)"); framework.set_role(role); // TODO(vinod): Make checkpointing the default when it is default // on the slave. if (os::hasenv("MESOS_CHECKPOINT")) { cout << "Enabling checkpoint for the framework" << endl; framework.set_checkpoint(true); } MesosSchedulerDriver* driver; if (os::hasenv("MESOS_AUTHENTICATE")) { cout << "Enabling authentication for the framework" << endl; if (!os::hasenv("DEFAULT_PRINCIPAL")) { EXIT(1) << "Expecting authentication principal in the environment"; } if (!os::hasenv("DEFAULT_SECRET")) { EXIT(1) << "Expecting authentication secret in the environment"; } Credential credential; credential.set_principal(getenv("DEFAULT_PRINCIPAL")); credential.set_secret(getenv("DEFAULT_SECRET")); driver = new MesosSchedulerDriver( &scheduler, framework, master.get(), credential); } else { driver = new MesosSchedulerDriver( &scheduler, framework, master.get()); } int status = driver->run() == DRIVER_STOPPED ? 0 : 1; delete driver; return status; }
TEST_F(StatusUpdateManagerTest, CheckpointStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); // Require flags to retrieve work_dir when recovering // the checkpointed data. slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); // Ensure that both the status update and its acknowledgement are // correctly checkpointed. Result<slave::state::State> state = slave::state::recover(slave::paths::getMetaRootDir(flags.work_dir), true); ASSERT_SOME(state); ASSERT_SOME(state.get().slave); ASSERT_TRUE(state.get().slave.get().frameworks.contains(frameworkId.get())); slave::state::FrameworkState frameworkState = state.get().slave.get().frameworks.get(frameworkId.get()).get(); ASSERT_EQ(1u, frameworkState.executors.size()); slave::state::ExecutorState executorState = frameworkState.executors.begin()->second; ASSERT_EQ(1u, executorState.runs.size()); slave::state::RunState runState = executorState.runs.begin()->second; ASSERT_EQ(1u, runState.tasks.size()); slave::state::TaskState taskState = runState.tasks.begin()->second; EXPECT_EQ(1u, taskState.updates.size()); EXPECT_EQ(1u, taskState.acks.size()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the slave and status update manager // properly handle duplicate terminal status updates, when the // second update is received after the ACK for the first update. // The proper behavior here is for the status update manager to // forward the duplicate update to the scheduler. TEST_F(StatusUpdateManagerTest, DuplicateTerminalUpdateAfterAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Future<Nothing> _statusUpdateAcknowledgement2 = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); Clock::pause(); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // Ensure the scheduler receives TASK_KILLED. AWAIT_READY(update); EXPECT_EQ(TASK_KILLED, update.get().state()); // Ensure the slave properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement2); Clock::settle(); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the slave and status update manager // properly handle duplicate status updates, when the second // update with the same UUID is received before the ACK for the // first update. The proper behavior here is for the status update // manager to drop the duplicate update. TEST_F(StatusUpdateManagerTest, DuplicateUpdateBeforeAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Capture the first status update message. Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), _, _); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // Drop the first ACK from the scheduler to the slave. Future<StatusUpdateAcknowledgementMessage> statusUpdateAcknowledgementMessage = DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(), _, slave.get()); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(statusUpdateAcknowledgementMessage); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdate); // Now resend the TASK_RUNNING update. process::post(slave.get(), statusUpdateMessage.get()); // At this point the status update manager has handled // the duplicate status update. AWAIT_READY(_statusUpdate); // After we advance the clock, the status update manager should // retry the TASK_RUNING update and the scheduler should receive // and acknowledge it. Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL); Clock::settle(); // Ensure the scheduler receives TASK_FINISHED. AWAIT_READY(update); EXPECT_EQ(TASK_RUNNING, update.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// In this test, the framework is checkpointed so we expect the executor to // persist across the slave restart and to have the same resource usage before // and after. TEST_F(ROOT_XFS_QuotaTest, CheckpointRecovery) { slave::Flags flags = CreateSlaveFlags(); Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), CreateSlaveFlags()); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128;disk:1").get(), "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000"); Future<TaskStatus> startingStatus; Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&startingStatus)) .WillOnce(FutureArg<1>(&runningStatus)); driver.launchTasks(offer.id(), {task}); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), startingStatus->task_id()); EXPECT_EQ(TASK_STARTING, startingStatus->state()); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), runningStatus->task_id()); EXPECT_EQ(TASK_RUNNING, runningStatus->state()); Future<ResourceUsage> usage1 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage1); // We should have 1 executor using resources. ASSERT_EQ(1, usage1->executors().size()); // Restart the slave. slave.get()->terminate(); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Wait for the slave to re-register. AWAIT_READY(slaveReregisteredMessage); Future<ResourceUsage> usage2 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage2); // We should have still have 1 executor using resources. ASSERT_EQ(1, usage1->executors().size()); Try<std::list<string>> sandboxes = os::glob(path::join( slave::paths::getSandboxRootDir(mountPoint.get()), "*", "frameworks", "*", "executors", "*", "runs", "*")); ASSERT_SOME(sandboxes); // One sandbox and one symlink. ASSERT_EQ(2u, sandboxes->size()); // Scan the remaining sandboxes. We ought to still have project IDs // assigned to them all. foreach (const string& sandbox, sandboxes.get()) { // Skip the "latest" symlink. if (os::stat::islink(sandbox)) { continue; } EXPECT_SOME(xfs::getProjectId(sandbox)); } driver.stop(); driver.join(); }
// This test launches a command task which has checkpoint enabled, and // agent is terminated when the task is running, after agent is restarted, // kill the task and then verify we can receive TASK_KILLED for the task. TEST_F(CniIsolatorTest, ROOT_SlaveRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "network/cni"; flags.network_cni_plugins_dir = cniPluginDir; flags.network_cni_config_dir = cniConfigDir; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; // Enable checkpointing for the framework. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers->size()); const Offer& offer = offers.get()[0]; CommandInfo command; command.set_value("sleep 1000"); TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128").get(), command); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); // Make sure the container join the mock CNI network. container->add_network_infos()->set_name("__MESOS_TEST__"); Future<TaskStatus> statusRunning; Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusKilled)); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offer.id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // Wait for the ACK to be checkpointed. AWAIT_READY(ack); // Stop the slave after TASK_RUNNING is received. slave.get()->terminate(); // Restart the slave. slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Kill the task. driver.killTask(task.task_id()); AWAIT_READY(statusKilled); EXPECT_EQ(task.task_id(), statusKilled->task_id()); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }
// In this test, the agent initially doesn't enable disk isolation // but then restarts with XFS disk isolation enabled. We verify that // the old container launched before the agent restart is // successfully recovered. TEST_F(ROOT_XFS_QuotaTest, RecoverOldContainers) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); // `CreateSlaveFlags()` enables `disk/xfs` so here we reset // `isolation` to empty. flags.isolation.clear(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128;disk:1").get(), "dd if=/dev/zero of=file bs=1024 count=1; sleep 1000"); Future<TaskStatus> startingStatus; Future<TaskStatus> runningstatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&startingStatus)) .WillOnce(FutureArg<1>(&runningstatus)); driver.launchTasks(offer.id(), {task}); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), startingStatus->task_id()); EXPECT_EQ(TASK_STARTING, startingStatus->state()); AWAIT_READY(runningstatus); EXPECT_EQ(task.task_id(), runningstatus->task_id()); EXPECT_EQ(TASK_RUNNING, runningstatus->state()); { Future<ResourceUsage> usage = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage); // We should have 1 executor using resources but it doesn't have // disk limit enabled. ASSERT_EQ(1, usage->executors().size()); const ResourceUsage_Executor& executor = usage->executors().Get(0); ASSERT_TRUE(executor.has_statistics()); ASSERT_FALSE(executor.statistics().has_disk_limit_bytes()); } // Restart the slave. slave.get()->terminate(); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // This time use the agent flags that include XFS disk isolation. slave = StartSlave(detector.get(), CreateSlaveFlags()); ASSERT_SOME(slave); // Wait for the slave to re-register. AWAIT_READY(slaveReregisteredMessage); { Future<ResourceUsage> usage = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage); // We should still have 1 executor using resources but it doesn't // have disk limit enabled. ASSERT_EQ(1, usage->executors().size()); const ResourceUsage_Executor& executor = usage->executors().Get(0); ASSERT_TRUE(executor.has_statistics()); ASSERT_FALSE(executor.statistics().has_disk_limit_bytes()); } driver.stop(); driver.join(); }
int main(int argc, char** argv) { string master = MASTER; string k3binary; string paramfile; int total_peers; YAML::Node k3vars; // Parse Command Line options string path = os::realpath(dirname(argv[0])).get(); namespace po = boost::program_options; vector <string> optionalVars; po::options_description desc("K3 Run options"); desc.add_options() ("program", po::value<string>(&k3binary)->required(), "K3 executable program filename") ("numpeers", po::value<int>(&total_peers)->required(), "# of K3 Peers to launch") ("help,h", "Print help message") ("param,p", po::value<string>(¶mfile)->required(), "YAML Formatted input file"); po::positional_options_description positionalOptions; positionalOptions.add("program", 1); positionalOptions.add("numpeers", 1); po::variables_map vm; try { po::store(po::command_line_parser(argc, argv).options(desc) .positional(positionalOptions).run(), vm); if (vm.count("help") || vm.empty()) { cout << "K3 Distributed program framework backed by Mesos cluser" << endl; cout << desc << endl; return 0; } po::notify(vm); k3vars = YAML::LoadFile(paramfile); } catch (boost::program_options::required_option& e) { cerr << " ERROR: " << e.what() << endl << endl; cout << desc << endl; return 1; } catch (boost::program_options::error& e) { cerr << " ERROR: " << e.what() << endl << endl; cout << desc << endl; return 1; } KDScheduler scheduler(k3binary, total_peers, k3vars, path); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name(k3binary + "-" + stringify(total_peers)); framework.mutable_id()->set_value(k3binary); if (os::hasenv("MESOS_CHECKPOINT")) { cout << "Enabling checkpoint for the framework" << endl; framework.set_checkpoint(true); } MesosSchedulerDriver* driver; if (os::hasenv("MESOS_AUTHENTICATE")) { cout << "Enabling authentication for the framework" << endl; if (!os::hasenv("DEFAULT_PRINCIPAL")) { EXIT(1) << "Expecting authentication principal in the environment"; } if (!os::hasenv("DEFAULT_SECRET")) { EXIT(1) << "Expecting authentication secret in the environment"; } Credential credential; credential.set_principal(getenv("DEFAULT_PRINCIPAL")); credential.set_secret(getenv("DEFAULT_SECRET")); framework.set_principal(getenv("DEFAULT_PRINCIPAL")); driver = new MesosSchedulerDriver( &scheduler, framework, master, credential); } else { framework.set_principal("k3-docker-no-executor-framework-cpp"); driver = new MesosSchedulerDriver(&scheduler, framework, master); } int status = driver->run() == DRIVER_STOPPED ? 0 : 1; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
// This test verifies that disk quota isolator recovers properly after // the slave restarts. TEST_F(DiskQuotaTest, SlaveRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/cpu,posix/mem,disk/du"; flags.container_disk_watch_interval = Milliseconds(1); Fetcher fetcher(flags); Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; // Enable checkpointing for the framework. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); const Offer& offer = offers.get()[0]; // Create a task that uses 2MB disk. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128;disk:3").get(), "dd if=/dev/zero of=file bs=1048576 count=2 && sleep 1000"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore subsequent updates. driver.launchTasks(offer.id(), {task}); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status->task_id()); EXPECT_EQ(TASK_RUNNING, status->state()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); const ContainerID& containerId = *(containers->begin()); // Stop the slave. slave.get()->terminate(); Future<ReregisterExecutorMessage> reregisterExecutorMessage = FUTURE_PROTOBUF(ReregisterExecutorMessage(), _, _); Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover); _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); containerizer.reset(_containerizer.get()); detector = master.get()->createDetector(); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); Clock::pause(); AWAIT_READY(_recover); // Wait for slave to schedule reregister timeout. Clock::settle(); // Ensure the executor re-registers before completing recovery. AWAIT_READY(reregisterExecutorMessage); // Ensure the slave considers itself recovered. Clock::advance(flags.executor_reregistration_timeout); // NOTE: We resume the clock because we need the reaper to reap the // 'du' subprocess. Clock::resume(); // Wait until disk usage can be retrieved. Duration elapsed = Duration::zero(); while (true) { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); ASSERT_TRUE(usage->has_disk_limit_bytes()); EXPECT_EQ(Megabytes(3), Bytes(usage->disk_limit_bytes())); if (usage->has_disk_used_bytes()) { EXPECT_LE(usage->disk_used_bytes(), usage->disk_limit_bytes()); // NOTE: This is to capture the regression in MESOS-2452. The data // stored in the executor meta directory should be less than 64K. if (usage->disk_used_bytes() > Kilobytes(64).bytes()) { break; } } ASSERT_LT(elapsed, Seconds(15)); os::sleep(Milliseconds(1)); elapsed += Milliseconds(1); } driver.stop(); driver.join(); }
int main(int argc, char** argv) { if (argc != 3) { std::cerr << "Usage: " << argv[0] << " <master> <balloon limit in MB>" << std::endl; return -1; } // Verify the balloon limit. Try<size_t> limit = numify<size_t>(argv[2]); if (limit.isError()) { std::cerr << "Balloon limit is not a valid number" << std::endl; return -1; } if (limit.get() < EXECUTOR_MEMORY_MB) { std::cerr << "Please use a balloon limit bigger than " << EXECUTOR_MEMORY_MB << " MB" << std::endl; } // Find this executable's directory to locate executor. std::string path = os::realpath(::dirname(argv[0])).get(); std::string uri = path + "/balloon-executor"; if (getenv("MESOS_BUILD_DIR")) { uri = std::string(::getenv("MESOS_BUILD_DIR")) + "/src/balloon-executor"; } ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.mutable_command()->set_value(uri); executor.set_name("Balloon Executor"); executor.set_source("balloon_test"); Resource* mem = executor.add_resources(); mem->set_name("mem"); mem->set_type(Value::SCALAR); mem->mutable_scalar()->set_value(EXECUTOR_MEMORY_MB); BalloonScheduler scheduler(executor, limit.get()); FrameworkInfo framework; framework.set_user(""); // Have Mesos fill in the current user. framework.set_name("Balloon Framework (C++)"); // TODO(vinod): Make checkpointing the default when it is default // on the slave. if (os::hasenv("MESOS_CHECKPOINT")) { cout << "Enabling checkpoint for the framework" << endl; framework.set_checkpoint(true); } MesosSchedulerDriver* driver; if (os::hasenv("MESOS_AUTHENTICATE")) { cout << "Enabling authentication for the framework" << endl; if (!os::hasenv("DEFAULT_PRINCIPAL")) { EXIT(1) << "Expecting authentication principal in the environment"; } if (!os::hasenv("DEFAULT_SECRET")) { EXIT(1) << "Expecting authentication secret in the environment"; } Credential credential; credential.set_principal(getenv("DEFAULT_PRINCIPAL")); credential.set_secret(getenv("DEFAULT_SECRET")); framework.set_principal(getenv("DEFAULT_PRINCIPAL")); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1], credential); } else { framework.set_principal("balloon-framework-cpp"); driver = new MesosSchedulerDriver( &scheduler, framework, argv[1]); } int status = driver->run() == DRIVER_STOPPED ? 0 : 1; // Ensure that the driver process terminates. driver->stop(); delete driver; return status; }
// This test verifies that the slave and status update manager // properly handle duplicate terminal status updates, when the // second update is received before the ACK for the first update. // The proper behavior here is for the status update manager to // drop the duplicate update. TEST_F(StatusUpdateManagerTest, DuplicateTerminalUpdateBeforeAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // Drop the first ACK from the scheduler to the slave. Future<StatusUpdateAcknowledgementMessage> statusUpdateAckMessage = DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(), _, slave.get()); Future<Nothing> __statusUpdate = FUTURE_DISPATCH(slave.get(), &Slave::__statusUpdate); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(statusUpdateAckMessage); // At this point the status update manager has enqueued // TASK_FINISHED update. AWAIT_READY(__statusUpdate); Future<Nothing> __statusUpdate2 = FUTURE_DISPATCH(slave.get(), &Slave::__statusUpdate); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // At this point the status update manager has enqueued // TASK_FINISHED and TASK_KILLED updates. AWAIT_READY(__statusUpdate2); // After we advance the clock, the scheduler should receive // the retried TASK_FINISHED update and acknowledge it. The // TASK_KILLED update should be dropped by the status update // manager, as the stream is already terminated. Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); Clock::settle(); // Ensure the scheduler receives TASK_FINISHED. AWAIT_READY(update); EXPECT_EQ(TASK_FINISHED, update.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }