virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; Resources remaining = offer.resources(); static Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify<float>(CPUS_PER_TASK) + ";mem:" + stringify<size_t>(MEM_PER_TASK)).get(); size_t maxTasks = 0; while (remaining.flatten().contains(TASK_RESOURCES)) { maxTasks++; remaining -= TASK_RESOURCES; } // Launch tasks. vector<TaskInfo> tasks; for (size_t i = 0; i < maxTasks / 2 && crawlQueue.size() > 0; i++) { string url = crawlQueue.front(); crawlQueue.pop(); string urlId = "C" + stringify<size_t>(processed[url]); TaskInfo task; task.set_name("Crawler " + urlId); task.mutable_task_id()->set_value(urlId); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(crawler); task.mutable_resources()->MergeFrom(TASK_RESOURCES); task.set_data(url); tasks.push_back(task); tasksLaunched++; cout << "Crawler " << urlId << " " << url << endl; } for (size_t i = maxTasks/2; i < maxTasks && renderQueue.size() > 0; i++) { string url = renderQueue.front(); renderQueue.pop(); string urlId = "R" + stringify<size_t>(processed[url]); TaskInfo task; task.set_name("Renderer " + urlId); task.mutable_task_id()->set_value(urlId); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(renderer); task.mutable_resources()->MergeFrom(TASK_RESOURCES); task.set_data(url); tasks.push_back(task); tasksLaunched++; cout << "Renderer " << urlId << " " << url << endl; } driver->launchTasks(offer.id(), tasks); } }
// This test checks that a scheduler exit shuts down the executor. TEST_F(FaultToleranceTest, SchedulerExit) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); AWAIT_READY(offers); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
void resourceOffers(const vector<Offer>& offers) { foreach (const Offer& offer, offers) { cout << "Received offer " << offer.id() << " with " << Resources(offer.resources()) << endl; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && remaining.flatten().contains(TASK_RESOURCES)) { int taskId = tasksLaunched++; cout << "Launching task " << taskId << " using offer " << offer.id() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value( lexical_cast<string>(taskId)); task.mutable_agent_id()->MergeFrom(offer.agent_id()); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES.flatten(framework.role())); CHECK_SOME(resources); task.mutable_resources()->CopyFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } Call call; CHECK(framework.has_id()); call.mutable_framework_id()->CopyFrom(framework.id()); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); Offer::Operation* operation = accept->add_operations(); operation->set_type(Offer::Operation::LAUNCH); foreach (const TaskInfo& taskInfo, tasks) { operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); }
void offers(const vector<Offer>& offers) { CHECK_EQ(SUBSCRIBED, state); static const Try<Resources> TASK_RESOURCES = Resources::parse(resources); if (TASK_RESOURCES.isError()) { EXIT(EXIT_FAILURE) << "Failed to parse resources '" << resources << "': " << TASK_RESOURCES.error(); } foreach (const Offer& offer, offers) { Resources offered = offer.resources(); if (!launched && offered.flatten().contains(TASK_RESOURCES.get())) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(name); task.mutable_agent_id()->MergeFrom(offer.agent_id()); // Takes resources first from the specified role, then from '*'. Option<Resources> resources = offered.find(TASK_RESOURCES.get().flatten(frameworkInfo.role())); CHECK_SOME(resources); task.mutable_resources()->CopyFrom(resources.get()); CommandInfo* commandInfo = task.mutable_command(); if (shell) { CHECK_SOME(command); commandInfo->set_shell(true); commandInfo->set_value(command.get()); } else { // TODO(gilbert): Treat 'command' as executable value and arguments. commandInfo->set_shell(false); } if (environment.isSome()) { Environment* environment_ = commandInfo->mutable_environment(); foreachpair ( const string& name, const string& value, environment.get()) { Environment::Variable* environmentVariable = environment_->add_variables(); environmentVariable->set_name(name); environmentVariable->set_value(value); } }
// TODO(benh): Move this into utils, make more generic, and use in // other tests. vector<TaskInfo> createTasks(const Offer& offer) { TaskInfo task; task.set_name("test-task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->MergeFrom(offer.resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); return tasks; }
virtual void resourceOffers( SchedulerDriver* driver, const vector<Offer>& offers) { static const Try<Resources> TASK_RESOURCES = Resources::parse(resources); if (TASK_RESOURCES.isError()) { cerr << "Failed to parse resources '" << resources << "': " << TASK_RESOURCES.error() << endl; driver->abort(); return; } foreach (const Offer& offer, offers) { if (!launched && Resources(offer.resources()).contains(TASK_RESOURCES.get())) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(name); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(TASK_RESOURCES.get()); task.mutable_command()->set_value(command); if (uri.isSome()) { task.mutable_command()->add_uris()->set_value(uri.get()); } if (dockerImage.isSome()) { ContainerInfo containerInfo; containerInfo.set_type(ContainerInfo::DOCKER); ContainerInfo::DockerInfo dockerInfo; dockerInfo.set_image(dockerImage.get()); containerInfo.mutable_docker()->CopyFrom(dockerInfo); task.mutable_container()->CopyFrom(containerInfo); } vector<TaskInfo> tasks; tasks.push_back(task); driver->launchTasks(offer.id(), tasks); cout << "task " << name << " submitted to slave " << offer.slave_id() << endl; launched = true; } else { driver->declineOffer(offer.id()); } } }
inline TaskInfo createTask( const Offer& offer, const std::string& command, const std::string& name = "test-task", const std::string& id = UUID::random().toString()) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(id); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->MergeFrom(offer.resources()); task.mutable_command()->set_value(command); return task; }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { foreach (const Offer& offer, offers) { cout << "Received offer " << offer.id() << " with " << offer.resources() << endl; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && remaining.flatten().contains(TASK_RESOURCES)) { int taskId = tasksLaunched++; cout << "Launching task " << taskId << " using offer " << offer.id() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value(lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Try<Resources> flattened = TASK_RESOURCES.flatten(role); CHECK_SOME(flattened); Option<Resources> resources = remaining.find(flattened.get()); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } driver->launchTasks(offer.id(), tasks); }
// For use with a MockScheduler, for example: // EXPECT_CALL(sched, resourceOffers(_, _)) // .WillOnce(LaunchTasks(TASKS, CPUS, MEM)); // Launches up to TASKS no-op tasks, if possible, // each with CPUS cpus and MEM memory. ACTION_P4(LaunchTasks, tasks, cpus, mem, role) { SchedulerDriver* driver = arg0; std::vector<Offer> offers = arg1; int numTasks = tasks; int launched = 0; for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(cpus) + ";mem:" + stringify(mem)).get(); int nextTaskId = 0; std::vector<TaskInfo> tasks; Resources remaining = offer.resources(); while (TASK_RESOURCES <= remaining.flatten() && launched < numTasks) { TaskInfo task; task.set_name("TestTask"); task.mutable_task_id()->set_value(stringify(nextTaskId++)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.mutable_command()->set_value(":"); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES, role); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); launched++; } driver->launchTasks(offer.id(), tasks); } }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { cout << "." << flush; for (size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; static const Resources TASK_RESOURCES = Resources::parse( "cpus:" + stringify(CPUS_PER_TASK) + ";mem:" + stringify(MEM_PER_TASK)).get(); Resources remaining = offer.resources(); // Launch tasks. vector<TaskInfo> tasks; while (tasksLaunched < totalTasks && TASK_RESOURCES <= remaining.flatten()) { int taskId = tasksLaunched++; cout << "Starting task " << taskId << " on " << offer.hostname() << endl; TaskInfo task; task.set_name("Task " + lexical_cast<string>(taskId)); task.mutable_task_id()->set_value(lexical_cast<string>(taskId)); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_executor()->MergeFrom(executor); Option<Resources> resources = remaining.find(TASK_RESOURCES, role); CHECK_SOME(resources); task.mutable_resources()->MergeFrom(resources.get()); remaining -= resources.get(); tasks.push_back(task); } driver->launchTasks(offer.id(), tasks); } }
inline TaskInfo createTask( const Offer& offer, const std::string& command, const Option<mesos::ExecutorID>& executorId = None(), const std::string& name = "test-task", const std::string& id = UUID::random().toString()) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(id); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(offer.resources()); if (executorId.isSome()) { ExecutorInfo executor; executor.mutable_executor_id()->CopyFrom(executorId.get()); executor.mutable_command()->set_value(command); task.mutable_executor()->CopyFrom(executor); } else { task.mutable_command()->set_value(command); } return task; }
vector<TaskInfo> populateTasks( const string& cmd, CommandInfo healthCommand, const Offer& offer, int gracePeriodSeconds = 0, const Option<int>& consecutiveFailures = None(), const Option<map<string, string> >& env = None()) { TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(offer.resources()); CommandInfo command; command.set_value(cmd); Environment::Variable* variable = command.mutable_environment()->add_variables(); // We need to set the correct directory to launch health check process // instead of the default for tests. variable->set_name("MESOS_LAUNCHER_DIR"); variable->set_value(path::join(tests::flags.build_dir, "src")); task.mutable_command()->CopyFrom(command); HealthCheck healthCheck; if (env.isSome()) { foreachpair (const string& name, const string value, env.get()) { Environment::Variable* variable = healthCommand.mutable_environment()->mutable_variables()->Add(); variable->set_name(name); variable->set_value(value); } }
// This test runs a command without the command user field set. The // command will verify the assumption that the command is run as the // slave user (in this case, root). TEST_F(SlaveTest, ROOT_RunTaskWithCommandInfoWithoutUser) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Need flags for 'executor_registration_timeout'. slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/cpu,posix/mem"; Try<MesosContainerizer*> containerizer = MesosContainerizer::create(flags, false); CHECK_SOME(containerizer); Try<PID<Slave> > slave = StartSlave(containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); Result<string> user = os::user(); CHECK_SOME(user) << "Failed to get current user name" << (user.isError() ? ": " + user.error() : ""); // Command executor will run as user running test. CommandInfo command; command.set_value("test `whoami` = " + user.get()); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished.get().state()); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test verifies that when an executor terminates before // registering with slave, it is properly cleaned up. TEST_F(SlaveTest, RemoveUnregisteredTerminatedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); // Drop the registration message from the executor to the slave. Future<process::Message> registerExecutorMessage = DROP_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(registerExecutorMessage); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Now kill the executor. containerizer.destroy(offers.get()[0].framework_id(), DEFAULT_EXECUTOR_ID); AWAIT_READY(status); EXPECT_EQ(TASK_LOST, status.get().state()); // We use 'gc.schedule' as a signal for the executor being cleaned // up by the slave. AWAIT_READY(schedule); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// Test that we can run the mesos-executor and specify an "override" // command to use via the --override argument. TEST_F(SlaveTest, MesosExecutorWithOverride) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); TestContainerizer containerizer; Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); CommandInfo command; command.set_value("sleep 10"); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); // Expect the launch and just assume it was sucessful since we'll be // launching the executor ourselves manually below. Future<Nothing> launch; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _)) .WillOnce(DoAll(FutureSatisfy(&launch), Return(true))); // Expect wait after launch is called but don't return anything // until after we've finished everything below. Future<Nothing> wait; process::Promise<containerizer::Termination> promise; EXPECT_CALL(containerizer, wait(_)) .WillOnce(DoAll(FutureSatisfy(&wait), Return(promise.future()))); driver.launchTasks(offers.get()[0].id(), tasks); // Once we get the launch the mesos-executor with --override. AWAIT_READY(launch); // Set up fake environment for executor. map<string, string> environment; environment["MESOS_SLAVE_PID"] = stringify(slave.get()); environment["MESOS_SLAVE_ID"] = stringify(offers.get()[0].slave_id()); environment["MESOS_FRAMEWORK_ID"] = stringify(offers.get()[0].framework_id()); environment["MESOS_EXECUTOR_ID"] = stringify(task.task_id()); environment["MESOS_DIRECTORY"] = ""; // Create temporary file to store validation string. If command is // succesfully replaced, this file will end up containing the string // 'Hello World\n'. Otherwise, the original task command i.e. // 'sleep' will be called and the test will fail. Try<std::string> file = os::mktemp(); ASSERT_SOME(file); string executorCommand = path::join(tests::flags.build_dir, "src", "mesos-executor") + " --override -- /bin/sh -c 'echo hello world >" + file.get() + "'"; // Expect two status updates, one for once the mesos-executor says // the task is running and one for after our overridden command // above finishes. Future<TaskStatus> status1, status2; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status1)) .WillOnce(FutureArg<1>(&status2)); Try<process::Subprocess> executor = process::subprocess( executorCommand, process::Subprocess::PIPE(), process::Subprocess::PIPE(), process::Subprocess::PIPE(), environment); ASSERT_SOME(executor); // Scheduler should receive the TASK_RUNNING update. AWAIT_READY(status1); ASSERT_EQ(TASK_RUNNING, status1.get().state()); AWAIT_READY(status2); ASSERT_EQ(TASK_FINISHED, status2.get().state()); AWAIT_READY(wait); containerizer::Termination termination; termination.set_killed(false); termination.set_message("Killed executor"); termination.set_status(0); promise.set(termination); driver.stop(); driver.join(); AWAIT_READY(executor.get().status()); // Verify file contents. Try<std::string> validate = os::read(file.get()); ASSERT_SOME(validate); EXPECT_EQ(validate.get(), "hello world\n"); os::rm(file.get()); Shutdown(); }
void ChapelScheduler::resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { // offers only contain resources describing a single node -> for more details read include/mesos/mesos.proto // cout << "***\tProcessing Offers!" << endl; const int remainingCpusReq = cpusReq - launchedTsks.size(); if(remainingCpusReq == 0) { for(size_t k = 0; k < offers.size(); k++) { const Offer& offer = offers[k]; driver->declineOffer(offer.id()); } cout << "\t\tChapelScheduler declined offer because resource requirements satisfied" << endl; } // cycle through all the offers and resource a task // each offer corresponds to a single compute node // const static Resources TASK_RESOURCES = Resources::parse(mesosReq).get(); vector<TaskInfo> tsks; for(size_t i = 0; i < offers.size(); i++) { const Offer& offer = offers[i]; if(tsks.size() == remainingCpusReq) { driver->declineOffer(offer.id()); continue; // need to cycle through the remaining offers and decline them } Resources remaining = offer.resources(); /* attempting to exercise multi-tenancy capabilities in mesos * given an offer from a node, try to maximize the number of jobs * that can be allocated to that node given the job's resource * requirements * * if the desired number of nodes and jobs are met, then launch * all the jobs on that node's offer * * this means some nodes will get multiple tasks assigned for * execution */ vector<TaskInfo> tol; while(remaining.flatten().contains(TASK_RESOUCES) && ((remainingCpusReq-tsks.size()) > 0)) { const string tid = stringify<size_t>(tsks.size()); TaskInfo task; task.set_name("Chapel Remote Program Task\t" + tid); task.mutable_task_id()->set_value(tid); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_command()->MergeFrom(chplCmdInfo); task.mutable_resources()->MergeFrom(TASK_RESOURCES); task.set_data(remoteCmd); tol.push_back(task); // tol means "to launch" tsks.push_back(task); // tsks tracks tasks launched for framework termination purposes remaining-=TASK_RESOURCES; tasksLaunched+=1; cout << "\t\t+++\tLaunching # of Tasks!\t" << tol.size() << " of " << tasksLaunched << endl; } // after all the tasks for this offer have been "resourced" // launch the tasks using this offer.id // driver->launchTasks(offer.id(), tol); } const size_t pendingTsksSize = tsks.size(); cout << endl << "\tAcquired # tasks " << pendingTsksSize << " required # of tasks " << cpusReq << " remaining required # tasks " << remainingCpusReq << endl << endl; if(pendingTsksSize > 0) { for(vector<TaskInfo>::iterator i = tsks.begin(); i != tsks.end(); i++) { launchedTsks.insert(make_pair(i->task_id().value(), *i)); } } }
// This test ensures that a killTask() can happen between runTask() // and _runTask() and then gets "handled properly". This means that // the task never gets started, but also does not get lost. The end // result is status TASK_KILLED. Essentially, killing the task is // realized while preparing to start it. See MESOS-947. // Temporarily disabled due to MESOS-1945. TEST_F(SlaveTest, DISABLED_KillTaskBetweenRunTaskParts) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()); MockSlave slave(CreateSlaveFlags(), &detector, &containerizer); process::spawn(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(0); EXPECT_CALL(exec, launchTask(_, _)) .Times(0); EXPECT_CALL(exec, shutdown(_)) .Times(0); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillRepeatedly(FutureArg<1>(&status)); EXPECT_CALL(slave, runTask(_, _, _, _, _)) .WillOnce(Invoke(&slave, &MockSlave::unmocked_runTask)); // Saved arguments from Slave::_runTask(). Future<bool> future; FrameworkInfo frameworkInfo; FrameworkID frameworkId; // Skip what Slave::_runTask() normally does, save its arguments for // later, tie reaching the critical moment when to kill the task to // a future. Future<Nothing> _runTask; EXPECT_CALL(slave, _runTask(_, _, _, _, _)) .WillOnce(DoAll(FutureSatisfy(&_runTask), SaveArg<0>(&future), SaveArg<1>(&frameworkInfo), SaveArg<2>(&frameworkId))); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(_runTask); Future<Nothing> killTask; EXPECT_CALL(slave, killTask(_, _, _)) .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_killTask), FutureSatisfy(&killTask))); driver.killTask(task.task_id()); // Since this is the only task ever for this framework, the // framework should get removed in Slave::_runTask(). // Thus we can observe that this happens before Shutdown(). Future<Nothing> removeFramework; EXPECT_CALL(slave, removeFramework(_)) .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_removeFramework), FutureSatisfy(&removeFramework))); AWAIT_READY(killTask); slave.unmocked__runTask( future, frameworkInfo, frameworkId, master.get(), task); AWAIT_READY(removeFramework); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); process::terminate(slave); process::wait(slave); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send exited executor messages, // we send a ShutdownMessage to the slave. Why? Because during a // network partition, the master will remove a partitioned slave, thus // sending its tasks to LOST. At this point, when the partition is // removed, the slave may attempt to send exited executor messages if // it was unaware that the master removed it. We've already // notified frameworks that the tasks under the executors were LOST, // so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveExitedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId));\ Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This allows us to have the slave send an // ExitedExecutorMessage. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop all the status updates from the slave, so that we can // ensure the ExitedExecutorMessage is what triggers the slave // shutdown. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()); driver.launchTasks(offers.get()[0].id(), tasks); // Drop the first shutdown message from the master (simulated // partition) and allow the second shutdown message to pass when // triggered by the ExitedExecutorMessage. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Induce an ExitedExecutorMessage from the slave. containerizer.destroy( frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id()); // Upon receiving the message, the master will shutdown the slave. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// This test has been temporarily disabled due to MESOS-1257. TEST_F(ExternalContainerizerTest, DISABLED_Launch) { Try<PID<Master> > master = this->StartMaster(); ASSERT_SOME(master); Flags testFlags; slave::Flags flags = this->CreateSlaveFlags(); flags.isolation = "external"; flags.containerizer_path = testFlags.build_dir + "/src/examples/python/test-containerizer"; MockExternalContainerizer containerizer(flags); Try<PID<Slave> > slave = this->StartSlave(&containerizer, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("isolator_test"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); Resources resources(offers.get()[0].resources()); Option<Bytes> mem = resources.mem(); ASSERT_SOME(mem); Option<double> cpus = resources.cpus(); ASSERT_SOME(cpus); const std::string& file = path::join(flags.work_dir, "ready"); // This task induces user/system load in a child process by // running top in a child process for ten seconds. task.mutable_command()->set_value( #ifdef __APPLE__ // Use logging mode with 30,000 samples with no interval. "top -l 30000 -s 0 2>&1 > /dev/null & " #else // Batch mode, with 30,000 samples with no interval. "top -b -d 0 -n 30000 2>&1 > /dev/null & " #endif "touch " + file + "; " // Signals that the top command is running. "sleep 60"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore rest for now. Future<ContainerID> containerId; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _, _)) .WillOnce(DoAll(FutureArg<0>(&containerId), Invoke(&containerizer, &MockExternalContainerizer::_launch))); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(containerId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Wait for the task to begin inducing cpu time. while (!os::exists(file)); ExecutorID executorId; executorId.set_value(task.task_id().value()); // We'll wait up to 10 seconds for the child process to induce // 1/8 of a second of user and system cpu time in total. // TODO(bmahler): Also induce rss memory consumption, by re-using // the balloon framework. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer.usage(containerId.get()); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! // NOTE: We are currently getting dummy-data from the test- // containerizer python script matching these expectations. // TODO(tillt): Consider working with real data. if (statistics.cpus_user_time_secs() >= 0.120 && statistics.cpus_system_time_secs() >= 0.05 && statistics.mem_rss_bytes() >= 1024u) { break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(10)); EXPECT_GE(statistics.cpus_user_time_secs(), 0.120); EXPECT_GE(statistics.cpus_system_time_secs(), 0.05); EXPECT_EQ(statistics.cpus_limit(), cpus.get()); EXPECT_GE(statistics.mem_rss_bytes(), 1024u); EXPECT_EQ(statistics.mem_limit_bytes(), mem.get().bytes()); EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.killTask(task.task_id()); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); this->Shutdown(); }
// This test verifies that the slave run task label decorator can add // and remove labels from a task during the launch sequence. A task // with two labels ("foo":"bar" and "bar":"baz") is launched and will // get modified by the slave hook to strip the "foo":"bar" pair and // add a new "baz":"qux" pair. TEST_F(HookTest, VerifySlaveRunTaskHook) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); task.mutable_executor()->CopyFrom(DEFAULT_EXECUTOR_INFO); // Add two labels: (1) will be removed by the hook to ensure that // runTaskHook can remove labels (2) will be preserved to ensure // that the framework can add labels to the task and have those be // available by the end of the launch task sequence when hooks are // used (to protect against hooks removing labels completely). Labels* labels = task.mutable_labels(); labels->add_labels()->CopyFrom(createLabel("foo", "bar")); labels->add_labels()->CopyFrom(createLabel("bar", "baz")); EXPECT_CALL(exec, registered(_, _, _, _)); Future<TaskInfo> taskInfo; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(DoAll( FutureArg<1>(&taskInfo), SendStatusUpdateFromTask(TASK_RUNNING))); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(taskInfo); // The master hook will hang an extra label off. const Labels& labels_ = taskInfo.get().labels(); ASSERT_EQ(3, labels_.labels_size()); // The slave run task hook will prepend a new "baz":"qux" label. EXPECT_EQ("baz", labels_.labels(0).key()); EXPECT_EQ("qux", labels_.labels(0).value()); // Master launch task hook will still hang off test label. EXPECT_EQ(testLabelKey, labels_.labels(1).key()); EXPECT_EQ(testLabelValue, labels_.labels(1).value()); // And lastly, we only expect the "foo":"bar" pair to be stripped by // the module. The last pair should be the original "bar":"baz" // pair set by the test. EXPECT_EQ("bar", labels_.labels(2).key()); EXPECT_EQ("baz", labels_.labels(2).value()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// Test that the prepare launch docker hook execute before launch // a docker container. Test hook create a file "foo" in the sandbox // directory. When the docker container launched, the sandbox directory // is mounted to the docker container. We validate the hook by verifying // the "foo" file exists in the docker container or not. TEST_F(HookTest, ROOT_DOCKER_VerifySlavePreLaunchDockerHook) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockDocker* mockDocker = new MockDocker(tests::flags.docker, tests::flags.docker_socket); Shared<Docker> docker(mockDocker); slave::Flags flags = CreateSlaveFlags(); Fetcher fetcher; Try<ContainerLogger*> logger = ContainerLogger::create(flags.container_logger); ASSERT_SOME(logger); MockDockerContainerizer containerizer( flags, &fetcher, Owned<ContainerLogger>(logger.get()), docker); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); const Offer& offer = offers.get()[0]; SlaveID slaveId = offer.slave_id(); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(offer.resources()); CommandInfo command; command.set_value("test -f " + path::join(flags.sandbox_directory, "foo")); ContainerInfo containerInfo; containerInfo.set_type(ContainerInfo::DOCKER); // TODO(tnachen): Use local image to test if possible. ContainerInfo::DockerInfo dockerInfo; dockerInfo.set_image("alpine"); containerInfo.mutable_docker()->CopyFrom(dockerInfo); task.mutable_command()->CopyFrom(command); task.mutable_container()->CopyFrom(containerInfo); vector<TaskInfo> tasks; tasks.push_back(task); Future<ContainerID> containerId; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _, _)) .WillOnce(DoAll(FutureArg<0>(&containerId), Invoke(&containerizer, &MockDockerContainerizer::_launch))); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)) .WillRepeatedly(DoDefault()); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY_FOR(containerId, Seconds(60)); AWAIT_READY_FOR(statusRunning, Seconds(60)); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY_FOR(statusFinished, Seconds(60)); EXPECT_EQ(TASK_FINISHED, statusFinished.get().state()); Future<containerizer::Termination> termination = containerizer.wait(containerId.get()); driver.stop(); driver.join(); AWAIT_READY(termination); Future<list<Docker::Container>> containers = docker.get()->ps(true, slave::DOCKER_NAME_PREFIX); AWAIT_READY(containers); // Cleanup all mesos launched containers. foreach (const Docker::Container& container, containers.get()) { AWAIT_READY_FOR(docker.get()->rm(container.id, true), Seconds(30)); } }
// This test verifies that docker image default entrypoint is executed // correctly using registry puller. This corresponds to the case in runtime // isolator logic table: sh=0, value=0, argv=1, entrypoint=1, cmd=0. TEST_F(DockerRuntimeIsolatorTest, ROOT_CURL_INTERNET_DockerDefaultEntryptRegistryPuller) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "docker/runtime,filesystem/linux"; flags.image_providers = "docker"; flags.docker_store_dir = path::join(os::getcwd(), "store"); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers->size()); const Offer& offer = offers.get()[0]; TaskInfo task; task.set_name("test-task"); task.mutable_task_id()->set_value(UUID::random().toString()); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(Resources::parse("cpus:1;mem:128").get()); task.mutable_command()->set_shell(false); task.mutable_command()->add_arguments("hello world"); Image image; image.set_type(Image::DOCKER); // 'mesosphere/inky' image is used in docker containerizer test, which // contains entrypoint as 'echo' and cmd as null. image.mutable_docker()->set_name("mesosphere/inky"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); container->mutable_mesos()->mutable_image()->CopyFrom(image); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offer.id(), {task}); AWAIT_READY_FOR(statusRunning, Seconds(60)); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(task.task_id(), statusFinished->task_id()); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); driver.stop(); driver.join(); }
// Test executor environment decorator hook and remove executor hook // for slave. We expect the environment-decorator hook to create a // temporary file and the remove-executor hook to delete that file. TEST_F(HookTest, VerifySlaveLaunchExecutorHook) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); slave::Flags slaveFlags = CreateSlaveFlags(); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); task.mutable_executor()->CopyFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Executor shutdown would force the Slave to execute the // remove-executor hook. EXPECT_CALL(exec, shutdown(_)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // On successful completion of the "slaveLaunchExecutorHook", the // test hook will send a HookExecuted message to itself. We wait // until that message is intercepted by the testing infrastructure. Future<HookExecuted> hookFuture = FUTURE_PROTOBUF(HookExecuted(), _, _); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(status); driver.stop(); driver.join(); // The scheduler shutdown from above forces the executor to // shutdown. This in turn should force the Slave to execute // the remove-executor hook. // Here, we wait for the hook to finish execution. AWAIT_READY(hookFuture); }
// This test verifies that docker image default cmd is executed correctly. // This corresponds to the case in runtime isolator logic table: sh=0, // value=0, argv=1, entrypoint=0, cmd=1. TEST_F(DockerRuntimeIsolatorTest, ROOT_DockerDefaultCmdLocalPuller) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); const string directory = path::join(os::getcwd(), "archives"); Future<Nothing> testImage = DockerArchive::create(directory, "alpine", "null", "[\"sh\"]"); AWAIT_READY(testImage); ASSERT_TRUE(os::exists(path::join(directory, "alpine.tar"))); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "docker/runtime,filesystem/linux"; flags.image_providers = "docker"; flags.docker_registry = directory; // Make docker store directory as a temparary directory. Because the // manifest of the test image is changeable, the image cached on // previous tests should never be used. flags.docker_store_dir = path::join(os::getcwd(), "store"); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers->size()); const Offer& offer = offers.get()[0]; TaskInfo task; task.set_name("test-task"); task.mutable_task_id()->set_value(UUID::random().toString()); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(Resources::parse("cpus:1;mem:128").get()); task.mutable_command()->set_shell(false); task.mutable_command()->add_arguments("-c"); task.mutable_command()->add_arguments("echo 'hello world'"); Image image; image.set_type(Image::DOCKER); image.mutable_docker()->set_name("alpine"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); container->mutable_mesos()->mutable_image()->CopyFrom(image); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offer.id(), {task}); AWAIT_READY_FOR(statusRunning, Seconds(60)); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(task.task_id(), statusFinished->task_id()); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); driver.stop(); driver.join(); }
// This test verifies that an authorized task launch is successful. TEST_F(MasterAuthorizationTest, AuthorizedTask) { // Setup ACLs so that the framework can launch tasks as "foo". ACLs acls; mesos::ACL::RunTasks* acl = acls.add_run_tasks(); acl->mutable_principals()->add_values(DEFAULT_FRAMEWORK_INFO.principal()); acl->mutable_users()->add_values("foo"); master::Flags flags = CreateMasterFlags(); flags.acls = acls; Try<PID<Master> > master = StartMaster(flags); ASSERT_SOME(master); // Create an authorized executor. ExecutorInfo executor; // Bug in gcc 4.1.*, must assign on next line. executor = CREATE_EXECUTOR_INFO("test-executor", "exit 1"); executor.mutable_command()->set_user("foo"); MockExecutor exec(executor.executor_id()); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Create an authorized task. TaskInfo task; task.set_name("test"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(executor); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
virtual void resourceOffers( SchedulerDriver* driver, const vector<Offer>& offers) { static const Try<Resources> TASK_RESOURCES = Resources::parse(resources); if (TASK_RESOURCES.isError()) { cerr << "Failed to parse resources '" << resources << "': " << TASK_RESOURCES.error() << endl; driver->abort(); return; } foreach (const Offer& offer, offers) { if (!launched && Resources(offer.resources()).contains(TASK_RESOURCES.get())) { TaskInfo task; task.set_name(name); task.mutable_task_id()->set_value(name); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(TASK_RESOURCES.get()); CommandInfo* commandInfo = task.mutable_command(); commandInfo->set_value(command); if (environment.isSome()) { Environment* environment_ = commandInfo->mutable_environment(); foreachpair (const std::string& name, const std::string& value, environment.get()) { Environment_Variable* environmentVariable = environment_->add_variables(); environmentVariable->set_name(name); environmentVariable->set_value(value); } } if (uri.isSome()) { task.mutable_command()->add_uris()->set_value(uri.get()); } if (dockerImage.isSome()) { ContainerInfo containerInfo; if (containerizer == "mesos") { containerInfo.set_type(ContainerInfo::MESOS); ContainerInfo::MesosInfo mesosInfo; Image mesosImage; mesosImage.set_type(Image::DOCKER); mesosImage.mutable_docker()->set_name(dockerImage.get()); mesosInfo.mutable_image()->CopyFrom(mesosImage); containerInfo.mutable_mesos()->CopyFrom(mesosInfo); } else if (containerizer == "docker") { containerInfo.set_type(ContainerInfo::DOCKER); ContainerInfo::DockerInfo dockerInfo; dockerInfo.set_image(dockerImage.get()); containerInfo.mutable_docker()->CopyFrom(dockerInfo); } else { cerr << "Unsupported containerizer: " << containerizer << endl;; driver->abort(); return; } task.mutable_container()->CopyFrom(containerInfo); } vector<TaskInfo> tasks; tasks.push_back(task); driver->launchTasks(offer.id(), tasks); cout << "task " << name << " submitted to slave " << offer.slave_id() << endl; launched = true; } else {
// This test runs a command _with_ the command user field set. The // command will verify the assumption that the command is run as the // specified user. We use (and assume the precense) of the // unprivileged 'nobody' user which should be available on both Linux // and Mac OS X. TEST_F(SlaveTest, DISABLED_ROOT_RunTaskWithCommandInfoWithUser) { // TODO(nnielsen): Introduce STOUT abstraction for user verification // instead of flat getpwnam call. const string testUser = "******"; if (::getpwnam(testUser.c_str()) == NULL) { LOG(WARNING) << "Cannot run ROOT_RunTaskWithCommandInfoWithUser test:" << " user '" << testUser << "' is not present"; return; } Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Need flags for 'executor_registration_timeout'. slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/cpu,posix/mem"; Try<MesosContainerizer*> containerizer = MesosContainerizer::create(flags, false); CHECK_SOME(containerizer); Try<PID<Slave> > slave = StartSlave(containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); CommandInfo command; command.set_value("test `whoami` = " + testUser); command.set_user(testUser); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished.get().state()); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
TYPED_TEST(IsolatorTest, Usage) { Try<PID<Master> > master = this->StartMaster(); ASSERT_SOME(master); TypeParam isolator; slave::Flags flags = this->CreateSlaveFlags(); Try<PID<Slave> > slave = this->StartSlave(&isolator, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("isolator_test"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); Resources resources(offers.get()[0].resources()); Option<Bytes> mem = resources.mem(); ASSERT_SOME(mem); Option<double> cpus = resources.cpus(); ASSERT_SOME(cpus); const std::string& file = path::join(flags.work_dir, "ready"); // This task induces user/system load in a child process by // running top in a child process for ten seconds. task.mutable_command()->set_value( #ifdef __APPLE__ // Use logging mode with 30,000 samples with no interval. "top -l 30000 -s 0 2>&1 > /dev/null & " #else // Batch mode, with 30,000 samples with no interval. "top -b -d 0 -n 30000 2>&1 > /dev/null & " #endif "touch " + file + "; " // Signals that the top command is running. "sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Wait for the task to begin inducing cpu time. while (!os::exists(file)); ExecutorID executorId; executorId.set_value(task.task_id().value()); // We'll wait up to 10 seconds for the child process to induce // 1/8 of a second of user and system cpu time in total. // TODO(bmahler): Also induce rss memory consumption, by re-using // the balloon framework. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = process::dispatch( (Isolator*) &isolator, // TODO(benh): Fix after reaper changes. &Isolator::usage, frameworkId.get(), executorId); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! if (statistics.cpus_user_time_secs() >= 0.125 && statistics.cpus_system_time_secs() >= 0.125 && statistics.mem_rss_bytes() >= 1024u) { break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(10)); EXPECT_GE(statistics.cpus_user_time_secs(), 0.125); EXPECT_GE(statistics.cpus_system_time_secs(), 0.125); EXPECT_EQ(statistics.cpus_limit(), cpus.get()); EXPECT_GE(statistics.mem_rss_bytes(), 1024u); EXPECT_EQ(statistics.mem_limit_bytes(), mem.get().bytes()); EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.killTask(task.task_id()); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); this->Shutdown(); // Must shutdown before 'isolator' gets deallocated. }
TEST_F(SlaveTest, ShutdownUnregisteredExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Need flags for 'executor_registration_timeout'. slave::Flags flags = CreateSlaveFlags(); // Set the isolation flag so we know a MesoContainerizer will be created. flags.isolation = "posix/cpu,posix/mem"; Try<MesosContainerizer*> containerizer = MesosContainerizer::create(flags, false); CHECK_SOME(containerizer); Try<PID<Slave> > slave = StartSlave(containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task with the command executor. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); CommandInfo command; command.set_value("sleep 10"); task.mutable_command()->MergeFrom(command); vector<TaskInfo> tasks; tasks.push_back(task); // Drop the registration message from the executor to the slave. Future<process::Message> registerExecutor = DROP_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(registerExecutor); Clock::pause(); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Ensure that the slave times out and kills the executor. Future<Nothing> destroyExecutor = FUTURE_DISPATCH(_, &MesosContainerizerProcess::destroy); Clock::advance(flags.executor_registration_timeout); AWAIT_READY(destroyExecutor); Clock::settle(); // Wait for Containerizer::destroy to complete. // Now advance time until the reaper reaps the executor. while (status.isPending()) { Clock::advance(Seconds(1)); Clock::settle(); } AWAIT_READY(status); ASSERT_EQ(TASK_FAILED, status.get().state()); Clock::resume(); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); StandaloneMasterDetector detector(master.get()); Try<PID<Slave> > slave = StartSlave(&exec, &detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Have the slave re-register with the master. detector.appoint(master.get()); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); Shutdown(); }