Exemplo n.º 1
0
// TODO(bmahler): Expose the executor name / source.
JSON::Object model(
    const TaskInfo& task,
    const FrameworkID& frameworkId,
    const TaskState& state,
    const vector<TaskStatus>& statuses)
{
  JSON::Object object;
  object.values["id"] = task.task_id().value();
  object.values["name"] = task.name();
  object.values["framework_id"] = frameworkId.value();

  if (task.has_executor()) {
    object.values["executor_id"] = task.executor().executor_id().value();
  } else {
    object.values["executor_id"] = "";
  }

  object.values["slave_id"] = task.slave_id().value();
  object.values["state"] = TaskState_Name(state);
  object.values["resources"] = model(task.resources());

  JSON::Array array;
  foreach (const TaskStatus& status, statuses) {
    array.values.push_back(model(status));
  }
Exemplo n.º 2
0
Task createTask(
    const TaskInfo& task,
    const TaskState& state,
    const FrameworkID& frameworkId)
{
  Task t;
  t.mutable_framework_id()->MergeFrom(frameworkId);
  t.set_state(state);
  t.set_name(task.name());
  t.mutable_task_id()->MergeFrom(task.task_id());
  t.mutable_slave_id()->MergeFrom(task.slave_id());
  t.mutable_resources()->MergeFrom(task.resources());

  if (task.has_executor()) {
    t.mutable_executor_id()->CopyFrom(task.executor().executor_id());
  }

  t.mutable_labels()->MergeFrom(task.labels());

  if (task.has_discovery()) {
    t.mutable_discovery()->MergeFrom(task.discovery());
  }

  return t;
}
Exemplo n.º 3
0
  void launch(const TaskInfo& task)
  {
    cout << "Starting task " << task.task_id().value() << endl;

    tasks[task.task_id()] = task;

    std::thread thread([=]() {
      os::sleep(Seconds(random() % 10));

      process::dispatch(self(), &Self::update, task, TaskState::TASK_FINISHED);
    });

    thread.detach();

    update(task, TaskState::TASK_RUNNING);
  }
Exemplo n.º 4
0
  void update(const TaskInfo& task, const TaskState& state)
  {
    UUID uuid = UUID::random();

    TaskStatus status;
    status.mutable_task_id()->CopyFrom(task.task_id());
    status.mutable_executor_id()->CopyFrom(executorId);
    status.set_state(state);
    status.set_source(TaskStatus::SOURCE_EXECUTOR);
    status.set_timestamp(process::Clock::now().secs());
    status.set_uuid(uuid.toBytes());

    Call call;
    call.mutable_framework_id()->CopyFrom(frameworkId);
    call.mutable_executor_id()->CopyFrom(executorId);

    call.set_type(Call::UPDATE);

    call.mutable_update()->mutable_status()->CopyFrom(status);

    // Capture the status update.
    updates[uuid] = call.update();

    mesos->send(call);
  }
Exemplo n.º 5
0
// This test ensures that the command executor does not send
// TASK_KILLING to frameworks that do not support the capability.
TEST_P_TEMP_DISABLED_ON_WINDOWS(CommandExecutorTest, NoTaskKillingCapability)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  Owned<MasterDetector> detector = master.get()->createDetector();

  slave::Flags flags = CreateSlaveFlags();
  flags.http_command_executor = GetParam();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  // Start the framework without the task killing capability.
  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_EQ(1u, offers->size());

  // Launch a task with the command executor.
  TaskInfo task = createTask(
      offers->front().slave_id(),
      offers->front().resources(),
      "sleep 1000");

  Future<TaskStatus> statusRunning;
  EXPECT_CALL(sched, statusUpdate(_, _))
    .WillOnce(FutureArg<1>(&statusRunning));

  driver.launchTasks(offers->front().id(), {task});

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  // There should only be a TASK_KILLED update.
  Future<TaskStatus> statusKilled;
  EXPECT_CALL(sched, statusUpdate(_, _))
    .WillOnce(FutureArg<1>(&statusKilled));

  driver.killTask(task.task_id());

  AWAIT_READY(statusKilled);
  EXPECT_EQ(TASK_KILLED, statusKilled->state());

  driver.stop();
  driver.join();
}
Exemplo n.º 6
0
TEST_F(ResourceOffersTest, TaskUsesMoreResourcesThanOffered)
{
  Try<PID<Master> > master = StartMaster();
  ASSERT_SOME(master);

  Try<PID<Slave> > slave = StartSlave();
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get());

  EXPECT_CALL(sched, registered(&driver, _, _))
    .Times(1);

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  TaskInfo task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id());
  task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);

  Resource* cpus = task.add_resources();
  cpus->set_name("cpus");
  cpus->set_type(Value::SCALAR);
  cpus->mutable_scalar()->set_value(2.01);

  vector<TaskInfo> tasks;
  tasks.push_back(task);

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status));

  driver.launchTasks(offers.get()[0].id(), tasks);

  AWAIT_READY(status);

  EXPECT_EQ(task.task_id(), status.get().task_id());
  EXPECT_EQ(TASK_LOST, status.get().state());
  EXPECT_TRUE(status.get().has_message());
  EXPECT_EQ("Task uses more resources than offered", status.get().message());

  driver.stop();
  driver.join();

  Shutdown();
}
Exemplo n.º 7
0
    virtual void launchTask(ExecutorDriver* driver, const TaskInfo& task)
    {
        cout << "Starting task " << task.task_id().value() << endl;

        TaskStatus status;
        status.mutable_task_id()->MergeFrom(task.task_id());
        status.set_state(TASK_RUNNING);

        driver->sendStatusUpdate(status);

        sleep(1);

        cout << "Finishing task " << task.task_id().value() << endl;

        status.mutable_task_id()->MergeFrom(task.task_id());
        status.set_state(TASK_FINISHED);

        driver->sendStatusUpdate(status);
    }
Exemplo n.º 8
0
  virtual void launchTask(ExecutorDriver* driver, const TaskInfo& task)
  {
    cout << "Starting task " << task.task_id().value() << endl;

    TaskStatus status;
    status.mutable_task_id()->MergeFrom(task.task_id());
    status.set_state(TASK_RUNNING);

    driver->sendStatusUpdate(status);

    // This is where one would perform the requested task.

    cout << "Finishing task " << task.task_id().value() << endl;

    status.mutable_task_id()->MergeFrom(task.task_id());
    status.set_state(TASK_FINISHED);

    driver->sendStatusUpdate(status);
  }
Exemplo n.º 9
0
void run(ExecutorDriver* driver, const TaskInfo& task)
{
  os::sleep(Seconds(random() % 10));

  TaskStatus status;
  status.mutable_task_id()->MergeFrom(task.task_id());
  status.set_state(TASK_FINISHED);

  driver->sendStatusUpdate(status);
}
Exemplo n.º 10
0
        void operator()() {
          TaskStatus status;
          status.mutable_task_id()->MergeFrom(task.task_id());
	  // Currently, just call the K3 executable with the generated command line from task.data()
          try {
		  FILE* pipe = popen(k3_cmd.c_str(), "r");
		  if (!pipe) {
			  status.set_state(TASK_FAILED);
			  driver->sendStatusUpdate(status);
			  cout << "Failed to open subprocess" << endl;
		  }
		  char buffer[256];
		  while (!feof(pipe)) {
			  if (fgets(buffer, 256, pipe) != NULL) {
				  std::string s = std::string(buffer);
				  if (this->isMaster) {
	  	                  	driver->sendFrameworkMessage(s);
					cout << s << endl;
				  }
				  else {
			               cout << s << endl;
				  }
			  }
		  }
		  int k3 = pclose(pipe);

	          if (k3 == 0) {
	          	status.set_state(TASK_FINISHED);
	          	cout << "Task " << task.task_id().value() << " Completed!" << endl;
                        driver->sendStatusUpdate(status);
	          }
	          else {
	          	status.set_state(TASK_FAILED);
	          	cout << "K3 Task " << task.task_id().value() << " returned error code: " << k3 << endl;
                        driver->sendStatusUpdate(status);
	          }
          }
          catch (...) {
            status.set_state(TASK_FAILED);
            driver->sendStatusUpdate(status);
          }
	  //-------------  END OF TASK  -------------------
        }
Exemplo n.º 11
0
    void launchTask (ExecutorDriver* driver, const TaskInfo& task) override {
      cout << "Starting task " << task.task_id().value() << endl;

      TaskStatus status;
      status.mutable_task_id()->MergeFrom(task.task_id());

      StartInfo* info = new StartInfo(driver, task);

      pthread_t pthread;
      int res = pthread_create(&pthread, NULL, &RunProcess, info);

      if (res != 0) {
        status.set_state(TASK_FAILED);
        delete info;
      } 
      else {
        pthread_detach(pthread);
        status.set_state(TASK_RUNNING);
      }

      driver->sendStatusUpdate(status);
    }
Exemplo n.º 12
0
  virtual void launchTask(ExecutorDriver* driver, const TaskInfo& task)
  {
    cout << "Starting task " << task.task_id().value() << endl;

    lambda::function<void(void)>* thunk =
      new lambda::function<void(void)>(lambda::bind(&run, driver, task));

    pthread_t pthread;
    if (pthread_create(&pthread, NULL, &start, thunk) != 0) {
      TaskStatus status;
      status.mutable_task_id()->MergeFrom(task.task_id());
      status.set_state(TASK_FAILED);

      driver->sendStatusUpdate(status);
    } else {
      pthread_detach(pthread);

      TaskStatus status;
      status.mutable_task_id()->MergeFrom(task.task_id());
      status.set_state(TASK_RUNNING);

      driver->sendStatusUpdate(status);
    }
  }
Exemplo n.º 13
0
inline Task createTask(const TaskInfo& task,
                       const TaskState& state,
                       const ExecutorID& executorId,
                       const FrameworkID& frameworkId)
{
  Task t;
  t.mutable_framework_id()->MergeFrom(frameworkId);
  t.set_state(state);
  t.set_name(task.name());
  t.mutable_task_id()->MergeFrom(task.task_id());
  t.mutable_slave_id()->MergeFrom(task.slave_id());
  t.mutable_resources()->MergeFrom(task.resources());

  if (!task.has_command()) {
    t.mutable_executor_id()->MergeFrom(executorId);
  }

  return t;
}
Exemplo n.º 14
0
Task createTask(
    const TaskInfo& task,
    const TaskState& state,
    const FrameworkID& frameworkId)
{
  Task t;
  t.mutable_framework_id()->CopyFrom(frameworkId);
  t.set_state(state);
  t.set_name(task.name());
  t.mutable_task_id()->CopyFrom(task.task_id());
  t.mutable_slave_id()->CopyFrom(task.slave_id());
  t.mutable_resources()->CopyFrom(task.resources());

  if (task.has_executor()) {
    t.mutable_executor_id()->CopyFrom(task.executor().executor_id());
  }

  if (task.has_labels()) {
    t.mutable_labels()->CopyFrom(task.labels());
  }

  if (task.has_discovery()) {
    t.mutable_discovery()->CopyFrom(task.discovery());
  }

  if (task.has_container()) {
    t.mutable_container()->CopyFrom(task.container());
  }

  // Copy `user` if set.
  if (task.has_command() && task.command().has_user()) {
    t.set_user(task.command().user());
  } else if (task.has_executor() && task.executor().command().has_user()) {
    t.set_user(task.executor().command().user());
  }

  return t;
}
Exemplo n.º 15
0
// This test ensures that a killTask() can happen between runTask()
// and _runTask() and then gets "handled properly". This means that
// the task never gets started, but also does not get lost. The end
// result is status TASK_KILLED. Essentially, killing the task is
// realized while preparing to start it. See MESOS-947.
// Temporarily disabled due to MESOS-1945.
TEST_F(SlaveTest, DISABLED_KillTaskBetweenRunTaskParts)
{
  Try<PID<Master> > master = StartMaster();
  ASSERT_SOME(master);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);

  TestContainerizer containerizer(&exec);

  StandaloneMasterDetector detector(master.get());

  MockSlave slave(CreateSlaveFlags(), &detector, &containerizer);
  process::spawn(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _))
    .Times(1);

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  TaskInfo task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id());
  task.mutable_resources()->MergeFrom(offers.get()[0].resources());
  task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);

  vector<TaskInfo> tasks;
  tasks.push_back(task);

  EXPECT_CALL(exec, registered(_, _, _, _))
    .Times(0);

  EXPECT_CALL(exec, launchTask(_, _))
    .Times(0);

  EXPECT_CALL(exec, shutdown(_))
    .Times(0);

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillRepeatedly(FutureArg<1>(&status));

  EXPECT_CALL(slave, runTask(_, _, _, _, _))
    .WillOnce(Invoke(&slave, &MockSlave::unmocked_runTask));

  // Saved arguments from Slave::_runTask().
  Future<bool> future;
  FrameworkInfo frameworkInfo;
  FrameworkID frameworkId;

  // Skip what Slave::_runTask() normally does, save its arguments for
  // later, tie reaching the critical moment when to kill the task to
  // a future.
  Future<Nothing> _runTask;
  EXPECT_CALL(slave, _runTask(_, _, _, _, _))
    .WillOnce(DoAll(FutureSatisfy(&_runTask),
                    SaveArg<0>(&future),
                    SaveArg<1>(&frameworkInfo),
                    SaveArg<2>(&frameworkId)));

  driver.launchTasks(offers.get()[0].id(), tasks);

  AWAIT_READY(_runTask);

  Future<Nothing> killTask;
  EXPECT_CALL(slave, killTask(_, _, _))
    .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_killTask),
                    FutureSatisfy(&killTask)));
  driver.killTask(task.task_id());

  // Since this is the only task ever for this framework, the
  // framework should get removed in Slave::_runTask().
  // Thus we can observe that this happens before Shutdown().
  Future<Nothing> removeFramework;
  EXPECT_CALL(slave, removeFramework(_))
    .WillOnce(DoAll(Invoke(&slave, &MockSlave::unmocked_removeFramework),
                    FutureSatisfy(&removeFramework)));

  AWAIT_READY(killTask);
  slave.unmocked__runTask(
      future, frameworkInfo, frameworkId, master.get(), task);

  AWAIT_READY(removeFramework);

  AWAIT_READY(status);
  EXPECT_EQ(TASK_KILLED, status.get().state());

  driver.stop();
  driver.join();

  process::terminate(slave);
  process::wait(slave);

  Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
// This test confirms that if a task exceeds configured resource
// limits it is forcibly terminated.
TEST_F(PosixRLimitsIsolatorTest, TaskExceedingLimit)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();
  flags.isolation = "posix/rlimits";

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched,
      DEFAULT_FRAMEWORK_INFO,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;

  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  // The task attempts to use an infinite amount of CPU time.
  TaskInfo task = createTask(
      offers.get()[0].slave_id(),
      offers.get()[0].resources(),
      "while true; do true; done");

  ContainerInfo* container = task.mutable_container();
  container->set_type(ContainerInfo::MESOS);

  // Limit the process to use maximally 1 second of CPU time.
  RLimitInfo rlimitInfo;
  RLimitInfo::RLimit* cpuLimit = rlimitInfo.add_rlimits();
  cpuLimit->set_type(RLimitInfo::RLimit::RLMT_CPU);
  cpuLimit->set_soft(1);
  cpuLimit->set_hard(1);

  container->mutable_rlimit_info()->CopyFrom(rlimitInfo);

  Future<TaskStatus> statusRunning;
  Future<TaskStatus> statusFailed;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillOnce(FutureArg<1>(&statusFailed));

  driver.launchTasks(offers.get()[0].id(), {task});

  AWAIT_READY(statusRunning);
  EXPECT_EQ(task.task_id(), statusRunning->task_id());
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  AWAIT_READY(statusFailed);
  EXPECT_EQ(task.task_id(), statusFailed->task_id());
  EXPECT_EQ(TASK_FAILED, statusFailed->state());

  driver.stop();
  driver.join();
}
Exemplo n.º 17
0
// This test has been temporarily disabled due to MESOS-1257.
TEST_F(ExternalContainerizerTest, DISABLED_Launch)
{
  Try<PID<Master> > master = this->StartMaster();
  ASSERT_SOME(master);

  Flags testFlags;

  slave::Flags flags = this->CreateSlaveFlags();

  flags.isolation = "external";
  flags.containerizer_path =
    testFlags.build_dir + "/src/examples/python/test-containerizer";

  MockExternalContainerizer containerizer(flags);

  Try<PID<Slave> > slave = this->StartSlave(&containerizer, flags);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(frameworkId);
  AWAIT_READY(offers);

  EXPECT_NE(0u, offers.get().size());

  TaskInfo task;
  task.set_name("isolator_test");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id());
  task.mutable_resources()->CopyFrom(offers.get()[0].resources());

  Resources resources(offers.get()[0].resources());
  Option<Bytes> mem = resources.mem();
  ASSERT_SOME(mem);
  Option<double> cpus = resources.cpus();
  ASSERT_SOME(cpus);

  const std::string& file = path::join(flags.work_dir, "ready");

  // This task induces user/system load in a child process by
  // running top in a child process for ten seconds.
  task.mutable_command()->set_value(
#ifdef __APPLE__
      // Use logging mode with 30,000 samples with no interval.
      "top -l 30000 -s 0 2>&1 > /dev/null & "
#else
      // Batch mode, with 30,000 samples with no interval.
      "top -b -d 0 -n 30000 2>&1 > /dev/null & "
#endif
      "touch " + file +  "; " // Signals that the top command is running.
      "sleep 60");

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status))
    .WillRepeatedly(Return()); // Ignore rest for now.

  Future<ContainerID> containerId;
  EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _, _))
    .WillOnce(DoAll(FutureArg<0>(&containerId),
                    Invoke(&containerizer,
                           &MockExternalContainerizer::_launch)));

  driver.launchTasks(offers.get()[0].id(), {task});

  AWAIT_READY(containerId);

  AWAIT_READY(status);

  EXPECT_EQ(TASK_RUNNING, status.get().state());

  // Wait for the task to begin inducing cpu time.
  while (!os::exists(file));

  ExecutorID executorId;
  executorId.set_value(task.task_id().value());

  // We'll wait up to 10 seconds for the child process to induce
  // 1/8 of a second of user and system cpu time in total.
  // TODO(bmahler): Also induce rss memory consumption, by re-using
  // the balloon framework.
  ResourceStatistics statistics;
  Duration waited = Duration::zero();
  do {
    Future<ResourceStatistics> usage = containerizer.usage(containerId.get());
    AWAIT_READY(usage);

    statistics = usage.get();

    // If we meet our usage expectations, we're done!
    // NOTE: We are currently getting dummy-data from the test-
    // containerizer python script matching these expectations.
    // TODO(tillt): Consider working with real data.
    if (statistics.cpus_user_time_secs() >= 0.120 &&
        statistics.cpus_system_time_secs() >= 0.05 &&
        statistics.mem_rss_bytes() >= 1024u) {
      break;
    }

    os::sleep(Milliseconds(100));
    waited += Milliseconds(100);
  } while (waited < Seconds(10));

  EXPECT_GE(statistics.cpus_user_time_secs(), 0.120);
  EXPECT_GE(statistics.cpus_system_time_secs(), 0.05);
  EXPECT_EQ(statistics.cpus_limit(), cpus.get());
  EXPECT_GE(statistics.mem_rss_bytes(), 1024u);
  EXPECT_EQ(statistics.mem_limit_bytes(), mem.get().bytes());

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status));

  driver.killTask(task.task_id());

  AWAIT_READY(status);

  EXPECT_EQ(TASK_KILLED, status.get().state());

  driver.stop();
  driver.join();

  this->Shutdown();
}
Exemplo n.º 18
0
// This test launches a container which has an image and joins host
// network, and then verifies that the container can access Internet.
TEST_F(CniIsolatorTest, ROOT_INTERNET_CURL_LaunchContainerInHostNetwork)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();
  flags.isolation = "docker/runtime,filesystem/linux";
  flags.image_providers = "docker";
  flags.docker_store_dir = path::join(sandbox.get(), "store");

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_EQ(1u, offers->size());

  const Offer& offer = offers.get()[0];

  // NOTE: We use a non-shell command here because 'sh' might not be
  // in the PATH. 'alpine' does not specify env PATH in the image.
  CommandInfo command;
  command.set_shell(false);
  command.set_value("/bin/ping");
  command.add_arguments("/bin/ping");
  command.add_arguments("-c1");
  command.add_arguments("google.com");

  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128").get(),
      command);

  Image image;
  image.set_type(Image::DOCKER);
  image.mutable_docker()->set_name("alpine");

  ContainerInfo* container = task.mutable_container();
  container->set_type(ContainerInfo::MESOS);
  container->mutable_mesos()->mutable_image()->CopyFrom(image);

  Future<TaskStatus> statusRunning;
  Future<TaskStatus> statusFinished;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillOnce(FutureArg<1>(&statusFinished));

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY_FOR(statusRunning, Seconds(60));
  EXPECT_EQ(task.task_id(), statusRunning->task_id());
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  AWAIT_READY(statusFinished);
  EXPECT_EQ(task.task_id(), statusFinished->task_id());
  EXPECT_EQ(TASK_FINISHED, statusFinished->state());

  driver.stop();
  driver.join();
}
Exemplo n.º 19
0
// This test ensures that a task will transition straight from `TASK_KILLING` to
// `TASK_KILLED`, even if the health check begins to fail during the kill policy
// grace period.
//
// TODO(gkleiman): this test takes about 7 seconds to run, consider using mock
// tasks and health checkers to speed it up.
TEST_P(CommandExecutorTest, NoTransitionFromKillingToRunning)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  Owned<MasterDetector> detector = master.get()->createDetector();

  slave::Flags flags = CreateSlaveFlags();
  flags.http_command_executor = GetParam();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  // Start the framework with the task killing capability.
  FrameworkInfo::Capability capability;
  capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE);

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.add_capabilities()->CopyFrom(capability);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_EQ(1u, offers->size());

  const string command = strings::format(
      "%s %s --sleep_duration=15",
      getTestHelperPath("test-helper"),
      KillPolicyTestHelper::NAME).get();

  TaskInfo task = createTask(offers->front(), command);

  // Create a health check that succeeds until a temporary file is removed.
  Try<string> temporaryPath = os::mktemp(path::join(os::getcwd(), "XXXXXX"));
  ASSERT_SOME(temporaryPath);
  const string tmpPath = temporaryPath.get();

  HealthCheck healthCheck;
  healthCheck.set_type(HealthCheck::COMMAND);
  healthCheck.mutable_command()->set_value("ls " + tmpPath + " >/dev/null");
  healthCheck.set_delay_seconds(0);
  healthCheck.set_grace_period_seconds(0);
  healthCheck.set_interval_seconds(0);

  task.mutable_health_check()->CopyFrom(healthCheck);

  // Set the kill policy grace period to 5 seconds.
  KillPolicy killPolicy;
  killPolicy.mutable_grace_period()->set_nanoseconds(Seconds(5).ns());

  task.mutable_kill_policy()->CopyFrom(killPolicy);

  vector<TaskInfo> tasks;
  tasks.push_back(task);

  Future<TaskStatus> statusRunning;
  Future<TaskStatus> statusHealthy;
  Future<TaskStatus> statusKilling;
  Future<TaskStatus> statusKilled;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillOnce(FutureArg<1>(&statusHealthy))
    .WillOnce(FutureArg<1>(&statusKilling))
    .WillOnce(FutureArg<1>(&statusKilled));

  driver.launchTasks(offers->front().id(), tasks);

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning.get().state());

  AWAIT_READY(statusHealthy);
  EXPECT_EQ(TASK_RUNNING, statusHealthy.get().state());
  EXPECT_TRUE(statusHealthy.get().has_healthy());
  EXPECT_TRUE(statusHealthy.get().healthy());

  driver.killTask(task.task_id());

  AWAIT_READY(statusKilling);
  EXPECT_EQ(TASK_KILLING, statusKilling->state());
  EXPECT_FALSE(statusKilling.get().has_healthy());

  // Remove the temporary file, so that the health check fails.
  os::rm(tmpPath);

  AWAIT_READY(statusKilled);
  EXPECT_EQ(TASK_KILLED, statusKilled->state());
  EXPECT_FALSE(statusKilled.get().has_healthy());

  driver.stop();
  driver.join();
}
Exemplo n.º 20
0
// This test verifies that a reconciliation request that comes before
// '_launchTasks()' is ignored.
TEST_F(MasterAuthorizationTest, ReconcileTask)
{
  MockAuthorizer authorizer;
  Try<PID<Master> > master = StartMaster(&authorizer);
  ASSERT_SOME(master);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);

  Try<PID<Slave> > slave = StartSlave(&exec);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _))
    .Times(1);

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID);
  vector<TaskInfo> tasks;
  tasks.push_back(task);

  // Return a pending future from authorizer.
  Future<Nothing> future;
  Promise<bool> promise;
  EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTasks&>()))
    .WillOnce(DoAll(FutureSatisfy(&future),
                    Return(promise.future())));

  driver.launchTasks(offers.get()[0].id(), tasks);

  // Wait until authorization is in progress.
  AWAIT_READY(future);

  // Scheduler shouldn't get an update from reconciliation.
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .Times(0);

  Future<ReconcileTasksMessage> reconcileTasksMessage =
    FUTURE_PROTOBUF(ReconcileTasksMessage(), _, _);

  vector<TaskStatus> statuses;

  TaskStatus status;
  status.mutable_task_id()->CopyFrom(task.task_id());
  status.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id());
  status.set_state(TASK_STAGING);

  statuses.push_back(status);

  driver.reconcileTasks(statuses);

  AWAIT_READY(reconcileTasksMessage);

  // Make sure the framework doesn't receive any update.
  Clock::pause();
  Clock::settle();

  // Now stop the framework.
  driver.stop();
  driver.join();

  Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
Exemplo n.º 21
0
TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_Statistics)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();

  // We only care about memory cgroup for this test.
  flags.isolation = "cgroups/mem";
  flags.agent_subsystems = None();

  Fetcher fetcher;

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);
  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());      // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  Offer offer = offers.get()[0];

  // Run a task that triggers memory pressure event. We request 1G
  // disk because we are going to write a 512 MB file repeatedly.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:256;disk:1024").get(),
      "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done");

  Future<TaskStatus> running;
  Future<TaskStatus> killed;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&running))
    .WillOnce(FutureArg<1>(&killed))
    .WillRepeatedly(Return());       // Ignore subsequent updates.

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(running);
  EXPECT_EQ(task.task_id(), running.get().task_id());
  EXPECT_EQ(TASK_RUNNING, running.get().state());

  Future<hashset<ContainerID>> containers = containerizer->containers();
  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers.get().size());

  ContainerID containerId = *(containers.get().begin());

  // Wait a while for some memory pressure events to occur.
  Duration waited = Duration::zero();
  do {
    Future<ResourceStatistics> usage = containerizer->usage(containerId);
    AWAIT_READY(usage);

    if (usage.get().mem_low_pressure_counter() > 0) {
      // We will check the correctness of the memory pressure counters
      // later, because the memory-hammering task is still active
      // and potentially incrementing these counters.
      break;
    }

    os::sleep(Milliseconds(100));
    waited += Milliseconds(100);
  } while (waited < Seconds(5));

  EXPECT_LE(waited, Seconds(5));

  // Pause the clock to ensure that the reaper doesn't reap the exited
  // command executor and inform the containerizer/slave.
  Clock::pause();
  Clock::settle();

  // Stop the memory-hammering task.
  driver.killTask(task.task_id());

  AWAIT_READY_FOR(killed, Seconds(120));
  EXPECT_EQ(task.task_id(), killed->task_id());
  EXPECT_EQ(TASK_KILLED, killed->state());

  // Now check the correctness of the memory pressure counters.
  Future<ResourceStatistics> usage = containerizer->usage(containerId);
  AWAIT_READY(usage);

  EXPECT_GE(usage.get().mem_low_pressure_counter(),
            usage.get().mem_medium_pressure_counter());
  EXPECT_GE(usage.get().mem_medium_pressure_counter(),
            usage.get().mem_critical_pressure_counter());

  Clock::resume();

  driver.stop();
  driver.join();
}
Exemplo n.º 22
0
// This test ensures that reconciliation requests for tasks that are
// pending are exposed in reconciliation.
TEST_F(ReconciliationTest, PendingTask)
{
  MockAuthorizer authorizer;
  Try<PID<Master> > master = StartMaster(&authorizer);
  ASSERT_SOME(master);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);

  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);

  Try<PID<Slave> > slave = StartSlave();
  ASSERT_SOME(slave);

  // Wait for the slave to register and get the slave id.
  AWAIT_READY(slaveRegisteredMessage);
  const SlaveID slaveId = slaveRegisteredMessage.get().slave_id();

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _))
    .Times(1);

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  // Return a pending future from authorizer.
  Future<Nothing> authorize;
  Promise<bool> promise;
  EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTask&>()))
    .WillOnce(DoAll(FutureSatisfy(&authorize),
                    Return(promise.future())));

  TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID);
  vector<TaskInfo> tasks;
  tasks.push_back(task);

  driver.launchTasks(offers.get()[0].id(), tasks);

  // Wait until authorization is in progress.
  AWAIT_READY(authorize);

  // First send an implicit reconciliation request for this task.
  Future<TaskStatus> update;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&update));

  vector<TaskStatus> statuses;
  driver.reconcileTasks(statuses);

  AWAIT_READY(update);
  EXPECT_EQ(TASK_STAGING, update.get().state());
  EXPECT_TRUE(update.get().has_slave_id());

  // Now send an explicit reconciliation request for this task.
  Future<TaskStatus> update2;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&update2));

  TaskStatus status;
  status.mutable_task_id()->CopyFrom(task.task_id());
  status.mutable_slave_id()->CopyFrom(slaveId);
  status.set_state(TASK_STAGING);
  statuses.push_back(status);

  driver.reconcileTasks(statuses);

  AWAIT_READY(update2);
  EXPECT_EQ(TASK_STAGING, update2.get().state());
  EXPECT_TRUE(update2.get().has_slave_id());

  driver.stop();
  driver.join();

  Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
Exemplo n.º 23
0
// This test verifies that a 'killTask()' that comes before
// '_launchTasks()' is called results in TASK_KILLED.
TEST_F(MasterAuthorizationTest, KillTask)
{
  MockAuthorizer authorizer;
  Try<PID<Master> > master = StartMaster(&authorizer);
  ASSERT_SOME(master);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);

  Try<PID<Slave> > slave = StartSlave(&exec);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _))
    .Times(1);

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID);
  vector<TaskInfo> tasks;
  tasks.push_back(task);

  // Return a pending future from authorizer.
  Future<Nothing> future;
  Promise<bool> promise;
  EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTasks&>()))
    .WillOnce(DoAll(FutureSatisfy(&future),
                    Return(promise.future())));

  driver.launchTasks(offers.get()[0].id(), tasks);

  // Wait until authorization is in progress.
  AWAIT_READY(future);

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status));

  // Now kill the task.
  driver.killTask(task.task_id());

  // Framework should get a TASK_KILLED right away.
  AWAIT_READY(status);
  EXPECT_EQ(TASK_KILLED, status.get().state());

  Future<Nothing> resourcesUnused =
    FUTURE_DISPATCH(_, &AllocatorProcess::resourcesUnused);

  // Now complete authorization.
  promise.set(true);

  // No task launch should happen resulting in all resources being
  // returned to the allocator.
  AWAIT_READY(resourcesUnused);

  driver.stop();
  driver.join();

  Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
Exemplo n.º 24
0
  void launch(const TaskInfo& _task)
  {
    CHECK_EQ(SUBSCRIBED, state);

    if (launched) {
      update(
          _task.task_id(),
          TASK_FAILED,
          None(),
          "Attempted to run multiple tasks using a \"command\" executor");
      return;
    }

    // Capture the task.
    task = _task;

    // Capture the TaskID.
    taskId = task->task_id();

    // Capture the kill policy.
    if (task->has_kill_policy()) {
      killPolicy = task->kill_policy();
    }

    // Determine the command to launch the task.
    CommandInfo command;

    if (taskCommand.isSome()) {
      // Get CommandInfo from a JSON string.
      Try<JSON::Object> object = JSON::parse<JSON::Object>(taskCommand.get());
      if (object.isError()) {
        ABORT("Failed to parse JSON: " + object.error());
      }

      Try<CommandInfo> parse = protobuf::parse<CommandInfo>(object.get());
      if (parse.isError()) {
        ABORT("Failed to parse protobuf: " + parse.error());
      }

      command = parse.get();
    } else if (task->has_command()) {
      command = task->command();
    } else {
      LOG(FATAL) << "Expecting task '" << task->task_id() << "' "
                 << "to have a command";
    }

    // TODO(jieyu): For now, we just fail the executor if the task's
    // CommandInfo is not valid. The framework will receive
    // TASK_FAILED for the task, and will most likely find out the
    // cause with some debugging. This is a temporary solution. A more
    // correct solution is to perform this validation at master side.
    if (command.shell()) {
      CHECK(command.has_value())
        << "Shell command of task '" << task->task_id()
        << "' is not specified!";
    } else {
      CHECK(command.has_value())
        << "Executable of task '" << task->task_id()
        << "' is not specified!";
    }

    cout << "Starting task " << task->task_id() << endl;

    // Prepare the argv before fork as it's not async signal safe.
    char **argv = new char*[command.arguments().size() + 1];
    for (int i = 0; i < command.arguments().size(); i++) {
      argv[i] = (char*) command.arguments(i).c_str();
    }
    argv[command.arguments().size()] = nullptr;

#ifndef __WINDOWS__
    pid = launchTaskPosix(
        task.get(),
        command,
        user,
        argv,
        rootfs,
        sandboxDirectory,
        workingDirectory);
#else
    // A Windows process is started using the `CREATE_SUSPENDED` flag
    // and is part of a job object. While the process handle is kept
    // open the reap function will work.
    PROCESS_INFORMATION processInformation = launchTaskWindows(
        task.get(),
        command,
        argv,
        rootfs);

    pid = processInformation.dwProcessId;
    ::ResumeThread(processInformation.hThread);
    CloseHandle(processInformation.hThread);
    processHandle = processInformation.hProcess;
#endif

    delete[] argv;

    cout << "Forked command at " << pid << endl;

    if (task->has_health_check()) {
      launchHealthCheck(task.get());
    }

    // Monitor this process.
    process::reap(pid)
      .onAny(defer(self(), &Self::reaped, pid, lambda::_1));

    update(task->task_id(), TASK_RUNNING);

    launched = true;
  }
Exemplo n.º 25
0
//when the task before starting,
//it should check task.data() to determin
//what it will do, whether copy config?
//whether start fileserver?
//
//task.data() here format is :
//<isInitialMonNode>.<TaskType>
void CephExecutor::launchTask(ExecutorDriver* driver, const TaskInfo& task)
{
  //set class member localSharedConfDirRoot
  string cmd = "echo ~";
  string r = runShellCommand(cmd);
  localSharedConfigDirRoot = r == " " ? r :"/root";
  LOG(INFO) << "localSharedConfigDirRoot is " << localSharedConfigDirRoot;

  bool needCopyConfig = true;
  bool needStartFileServer = false;
  int taskType;
  if (task.has_data()){
    LOG(INFO) << "Got TaskInfo data: " << task.data();
    vector<string> tokens = StringUtil::explode(task.data(),'.');
    //split by '.', the first part is isInitialMonNode,
    //second part is used for task type
    if (tokens[0] == "1"){
      needCopyConfig = false;
    }
    taskType = lexical_cast<int>(tokens[1]);
  }
  string localMountDir = localSharedConfigDirRoot +
      "/" +localConfigDirName;
  TaskStatus status;
  status.mutable_task_id()->MergeFrom(task.task_id());

  //make local shared dir, all type of task need this:
  //TODO: check if already exists valid dirctory tree
  if (!createLocalSharedConfigDir(localConfigDirName)) {
    LOG(INFO) << "created local shared directory failed!";
    status.set_state(TASK_FAILED);
    driver->sendStatusUpdate(status);
    return;
  }
  LOG(INFO) << "Create directory tree done.";
  //mount shared local dir
  if (needCopyConfig) {
    string abPath = localMountDir + "/"
        + "/etc/ceph/";
    if (!copySharedConfigDir(abPath)) {
      LOG(INFO) << "Copy shared config file failed!";
      status.set_state(TASK_FAILED);
      driver->sendStatusUpdate(status);
      return;
    }
    LOG(INFO) << "Copy config files done.";
  }


  //run docker command for MON and RADOSGW
  string cName = getContainerName(task.task_id().value());
  //set class member containerName, and myTaskId
  //TODO: see if put these in registed is more proper
  containerName = cName;
  myTaskId = task.task_id();
  //TODO: kill existing container in case conflict
  runShellCommand("docker rm -f " + containerName);

  string dockerCommand;
  switch (taskType) {
    case static_cast<int>(TaskType::MON):
      needStartFileServer = true;
      dockerCommand = constructMonCommand(
          localMountDir,
          cName);
      downloadDockerImage("ceph/mon");
      break;
    case static_cast<int>(TaskType::OSD):
      downloadDockerImage("ceph/osd");
      //Will get osdId in FrameworkMessage
      dockerCommand = "";
      status.set_state(TASK_STARTING);
      driver->sendStatusUpdate(status);
      return;
    case static_cast<int>(TaskType::RADOSGW):
      downloadDockerImage("ceph/radosgw");
      dockerCommand = constructRADOSGWCommand(
          localMountDir,
          cName);
      break;
  }

  if (needStartFileServer) {
    thread fileServerThread(fileServer,
        7777,
        localSharedConfigDirRoot + "/" + localConfigDirName + "/etc/ceph/");
    fileServerThread.detach();
    LOG(INFO) << "Mon fileserver started";
  }

  LOG(INFO) << "Stating container with command: ";
  LOG(INFO) << dockerCommand;

  //fork a thread to enable docker long running.
  //TODO: <thread> here seems not working, figure it out
  //to find a better way

  myPID = fork();
  if (0 == myPID){
    //child long running docker thread
    //TODO: we use fork here. Need to check why below line will hung the executor
    //thread(&CephExecutor::startLongRunning,*this,"docker", dockerCommand).detach();
    startLongRunning("docker",dockerCommand);
  } else {
    //parent thread
    //check if started normally
    bool started = block_until_started(cName, "30");
    if (started) {
      LOG(INFO) << "Starting task " << task.task_id().value();
      status.set_state(TASK_RUNNING);
    } else {
      LOG(INFO) << "Failed to start task " << task.task_id().value();
      status.set_state(TASK_FAILED);
    }
    driver->sendStatusUpdate(status);
  }
}
Exemplo n.º 26
0
  void launch(const TaskInfo& _task)
  {
    CHECK_EQ(SUBSCRIBED, state);

    if (launched) {
      update(
          _task.task_id(),
          TASK_FAILED,
          None(),
          "Attempted to run multiple tasks using a \"command\" executor");
      return;
    }

    // Capture the task.
    task = _task;

    // Capture the TaskID.
    taskId = task->task_id();

    // Capture the kill policy.
    if (task->has_kill_policy()) {
      killPolicy = task->kill_policy();
    }

    // Determine the command to launch the task.
    CommandInfo command;

    if (taskCommand.isSome()) {
      // Get CommandInfo from a JSON string.
      Try<JSON::Object> object = JSON::parse<JSON::Object>(taskCommand.get());
      if (object.isError()) {
        cerr << "Failed to parse JSON: " << object.error() << endl;
        abort();
      }

      Try<CommandInfo> parse = protobuf::parse<CommandInfo>(object.get());
      if (parse.isError()) {
        cerr << "Failed to parse protobuf: " << parse.error() << endl;
        abort();
      }

      command = parse.get();
    } else if (task->has_command()) {
      command = task->command();
    } else {
      CHECK_SOME(override)
        << "Expecting task '" << task->task_id()
        << "' to have a command!";
    }

    if (override.isNone()) {
      // TODO(jieyu): For now, we just fail the executor if the task's
      // CommandInfo is not valid. The framework will receive
      // TASK_FAILED for the task, and will most likely find out the
      // cause with some debugging. This is a temporary solution. A more
      // correct solution is to perform this validation at master side.
      if (command.shell()) {
        CHECK(command.has_value())
          << "Shell command of task '" << task->task_id()
          << "' is not specified!";
      } else {
        CHECK(command.has_value())
          << "Executable of task '" << task->task_id()
          << "' is not specified!";
      }
    }

    cout << "Starting task " << task->task_id() << endl;

    // TODO(benh): Clean this up with the new 'Fork' abstraction.
    // Use pipes to determine which child has successfully changed
    // session. This is needed as the setsid call can fail from other
    // processes having the same group id.
    int pipes[2];
    if (pipe(pipes) < 0) {
      perror("Failed to create a pipe");
      abort();
    }

    // Set the FD_CLOEXEC flags on these pipes.
    Try<Nothing> cloexec = os::cloexec(pipes[0]);
    if (cloexec.isError()) {
      cerr << "Failed to cloexec(pipe[0]): " << cloexec.error() << endl;
      abort();
    }

    cloexec = os::cloexec(pipes[1]);
    if (cloexec.isError()) {
      cerr << "Failed to cloexec(pipe[1]): " << cloexec.error() << endl;
      abort();
    }

    if (rootfs.isSome()) {
      // The command executor is responsible for chrooting into the
      // root filesystem and changing the user before exec-ing the
      // user process.
#ifdef __linux__
      Result<string> user = os::user();
      if (user.isError()) {
        cerr << "Failed to get current user: "******"Current username is not found" << endl;
        abort();
      } else if (user.get() != "root") {
        cerr << "The command executor requires root with rootfs" << endl;
        abort();
      }
#else
      cerr << "Not expecting root volume with non-linux platform." << endl;
      abort();
#endif // __linux__
    }

    // Prepare the argv before fork as it's not async signal safe.
    char **argv = new char*[command.arguments().size() + 1];
    for (int i = 0; i < command.arguments().size(); i++) {
      argv[i] = (char*) command.arguments(i).c_str();
    }
    argv[command.arguments().size()] = NULL;

    // Prepare the command log message.
    string commandString;
    if (override.isSome()) {
      char** argv = override.get();
      // argv is guaranteed to be NULL terminated and we rely on
      // that fact to print command to be executed.
      for (int i = 0; argv[i] != NULL; i++) {
        commandString += string(argv[i]) + " ";
      }
    } else if (command.shell()) {
Exemplo n.º 27
0
// Test that we can run the mesos-executor and specify an "override"
// command to use via the --override argument.
TEST_F(SlaveTest, MesosExecutorWithOverride)
{
  Try<PID<Master> > master = StartMaster();
  ASSERT_SOME(master);

  TestContainerizer containerizer;

  Try<PID<Slave> > slave = StartSlave(&containerizer);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _))
    .Times(1);

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  // Launch a task with the command executor.
  TaskInfo task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id());
  task.mutable_resources()->MergeFrom(offers.get()[0].resources());

  CommandInfo command;
  command.set_value("sleep 10");

  task.mutable_command()->MergeFrom(command);

  vector<TaskInfo> tasks;
  tasks.push_back(task);

  // Expect the launch and just assume it was sucessful since we'll be
  // launching the executor ourselves manually below.
  Future<Nothing> launch;
  EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _))
    .WillOnce(DoAll(FutureSatisfy(&launch),
                    Return(true)));

  // Expect wait after launch is called but don't return anything
  // until after we've finished everything below.
  Future<Nothing> wait;
  process::Promise<containerizer::Termination> promise;
  EXPECT_CALL(containerizer, wait(_))
    .WillOnce(DoAll(FutureSatisfy(&wait),
                    Return(promise.future())));

  driver.launchTasks(offers.get()[0].id(), tasks);

  // Once we get the launch the mesos-executor with --override.
  AWAIT_READY(launch);

  // Set up fake environment for executor.
  map<string, string> environment;
  environment["MESOS_SLAVE_PID"] = stringify(slave.get());
  environment["MESOS_SLAVE_ID"] = stringify(offers.get()[0].slave_id());
  environment["MESOS_FRAMEWORK_ID"] = stringify(offers.get()[0].framework_id());
  environment["MESOS_EXECUTOR_ID"] = stringify(task.task_id());
  environment["MESOS_DIRECTORY"] = "";

  // Create temporary file to store validation string. If command is
  // succesfully replaced, this file will end up containing the string
  // 'Hello World\n'. Otherwise, the original task command i.e.
  // 'sleep' will be called and the test will fail.
  Try<std::string> file = os::mktemp();
  ASSERT_SOME(file);

  string executorCommand =
    path::join(tests::flags.build_dir, "src", "mesos-executor") +
    " --override -- /bin/sh -c 'echo hello world >" + file.get() + "'";

  // Expect two status updates, one for once the mesos-executor says
  // the task is running and one for after our overridden command
  // above finishes.
  Future<TaskStatus> status1, status2;
  EXPECT_CALL(sched, statusUpdate(_, _))
    .WillOnce(FutureArg<1>(&status1))
    .WillOnce(FutureArg<1>(&status2));

  Try<process::Subprocess> executor =
    process::subprocess(
        executorCommand,
        process::Subprocess::PIPE(),
        process::Subprocess::PIPE(),
        process::Subprocess::PIPE(),
        environment);

  ASSERT_SOME(executor);

  // Scheduler should receive the TASK_RUNNING update.
  AWAIT_READY(status1);
  ASSERT_EQ(TASK_RUNNING, status1.get().state());

  AWAIT_READY(status2);
  ASSERT_EQ(TASK_FINISHED, status2.get().state());

  AWAIT_READY(wait);

  containerizer::Termination termination;
  termination.set_killed(false);
  termination.set_message("Killed executor");
  termination.set_status(0);
  promise.set(termination);

  driver.stop();
  driver.join();

  AWAIT_READY(executor.get().status());

  // Verify file contents.
  Try<std::string> validate = os::read(file.get());
  ASSERT_SOME(validate);

  EXPECT_EQ(validate.get(), "hello world\n");

  os::rm(file.get());

  Shutdown();
}
Exemplo n.º 28
0
// This is an end-to-end test that verfies that the slave returns the
// correct ResourceUsage based on the currently running executors, and
// the values get from the statistics endpoint are as expected.
TEST_F(MonitorIntegrationTest, RunningExecutor)
{
  Try<PID<Master>> master = StartMaster();
  ASSERT_SOME(master);

  Try<PID<Slave>> slave = StartSlave();
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());        // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_FALSE(offers.get().empty());

  const Offer& offer = offers.get()[0];

  // Launch a task and wait until it is in RUNNING status.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:32").get(),
      "sleep 1000");

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status));

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(status);
  EXPECT_EQ(task.task_id(), status.get().task_id());
  EXPECT_EQ(TASK_RUNNING, status.get().state());

  // Hit the statistics endpoint and expect the response contains the
  // resource statistics for the running container.
  UPID upid("monitor", process::address());

  Future<http::Response> response = http::get(upid, "statistics");
  AWAIT_READY(response);

  AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::OK().status, response);
  AWAIT_EXPECT_RESPONSE_HEADER_EQ(
      "application/json",
      "Content-Type",
      response);

  // Verify that the statistics in the response contains the proper
  // resource limits for the container.
  Try<JSON::Value> value = JSON::parse(response.get().body);
  ASSERT_SOME(value);

  Try<JSON::Value> expected = JSON::parse(strings::format(
      "[{"
          "\"statistics\":{"
              "\"cpus_limit\":%g,"
              "\"mem_limit_bytes\":%lu"
          "}"
      "}]",
      1 + slave::DEFAULT_EXECUTOR_CPUS,
      (Megabytes(32) + slave::DEFAULT_EXECUTOR_MEM).bytes()).get());

  ASSERT_SOME(expected);
  EXPECT_TRUE(value.get().contains(expected.get()));

  driver.stop();
  driver.join();

  Shutdown();
}
Exemplo n.º 29
0
  void launchTask(ExecutorDriver* driver, const TaskInfo& task)
  {
    CHECK_EQ(REGISTERED, state);

    if (launched) {
      TaskStatus status;
      status.mutable_task_id()->MergeFrom(task.task_id());
      status.set_state(TASK_FAILED);
      status.set_message(
          "Attempted to run multiple tasks using a \"command\" executor");

      driver->sendStatusUpdate(status);
      return;
    }

    // Capture the TaskID.
    taskId = task.task_id();

    // Determine the command to launch the task.
    CommandInfo command;

    if (taskCommand.isSome()) {
      // Get CommandInfo from a JSON string.
      Try<JSON::Object> object = JSON::parse<JSON::Object>(taskCommand.get());
      if (object.isError()) {
        cerr << "Failed to parse JSON: " << object.error() << endl;
        abort();
      }

      Try<CommandInfo> parse = protobuf::parse<CommandInfo>(object.get());
      if (parse.isError()) {
        cerr << "Failed to parse protobuf: " << parse.error() << endl;
        abort();
      }

      command = parse.get();
    } else if (task.has_command()) {
      command = task.command();
    } else {
      CHECK_SOME(override)
        << "Expecting task '" << task.task_id()
        << "' to have a command!";
    }

    if (override.isNone()) {
      // TODO(jieyu): For now, we just fail the executor if the task's
      // CommandInfo is not valid. The framework will receive
      // TASK_FAILED for the task, and will most likely find out the
      // cause with some debugging. This is a temporary solution. A more
      // correct solution is to perform this validation at master side.
      if (command.shell()) {
        CHECK(command.has_value())
          << "Shell command of task '" << task.task_id()
          << "' is not specified!";
      } else {
        CHECK(command.has_value())
          << "Executable of task '" << task.task_id()
          << "' is not specified!";
      }
    }

    cout << "Starting task " << task.task_id() << endl;

    // TODO(benh): Clean this up with the new 'Fork' abstraction.
    // Use pipes to determine which child has successfully changed
    // session. This is needed as the setsid call can fail from other
    // processes having the same group id.
    int pipes[2];
    if (pipe(pipes) < 0) {
      perror("Failed to create a pipe");
      abort();
    }

    // Set the FD_CLOEXEC flags on these pipes.
    Try<Nothing> cloexec = os::cloexec(pipes[0]);
    if (cloexec.isError()) {
      cerr << "Failed to cloexec(pipe[0]): " << cloexec.error() << endl;
      abort();
    }

    cloexec = os::cloexec(pipes[1]);
    if (cloexec.isError()) {
      cerr << "Failed to cloexec(pipe[1]): " << cloexec.error() << endl;
      abort();
    }

    Option<string> rootfs;
    if (sandboxDirectory.isSome()) {
      // If 'sandbox_diretory' is specified, that means the user
      // task specifies a root filesystem, and that root filesystem has
      // already been prepared at COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH.
      // The command executor is responsible for mounting the sandbox
      // into the root filesystem, chrooting into it and changing the
      // user before exec-ing the user process.
      //
      // TODO(gilbert): Consider a better way to detect if a root
      // filesystem is specified for the command task.
#ifdef __linux__
      Result<string> user = os::user();
      if (user.isError()) {
        cerr << "Failed to get current user: "******"Current username is not found" << endl;
        abort();
      } else if (user.get() != "root") {
        cerr << "The command executor requires root with rootfs" << endl;
        abort();
      }

      rootfs = path::join(
          os::getcwd(), COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH);

      string sandbox = path::join(rootfs.get(), sandboxDirectory.get());
      if (!os::exists(sandbox)) {
        Try<Nothing> mkdir = os::mkdir(sandbox);
        if (mkdir.isError()) {
          cerr << "Failed to create sandbox mount point  at '"
               << sandbox << "': " << mkdir.error() << endl;
          abort();
        }
      }

      // Mount the sandbox into the container rootfs.
      // We need to perform a recursive mount because we want all the
      // volume mounts in the sandbox to be also mounted in the container
      // root filesystem. However, since the container root filesystem
      // is also mounted in the sandbox, after the recursive mount we
      // also need to unmount the root filesystem in the mounted sandbox.
      Try<Nothing> mount = fs::mount(
          os::getcwd(),
          sandbox,
          None(),
          MS_BIND | MS_REC,
          NULL);

      if (mount.isError()) {
        cerr << "Unable to mount the work directory into container "
             << "rootfs: " << mount.error() << endl;;
        abort();
      }

      // Umount the root filesystem path in the mounted sandbox after
      // the recursive mount.
      Try<Nothing> unmountAll = fs::unmountAll(path::join(
          sandbox,
          COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH));
      if (unmountAll.isError()) {
        cerr << "Unable to unmount rootfs under mounted sandbox: "
             << unmountAll.error() << endl;
        abort();
      }
#else
      cerr << "Not expecting root volume with non-linux platform." << endl;
      abort();
#endif // __linux__
    }

    // Prepare the argv before fork as it's not async signal safe.
    char **argv = new char*[command.arguments().size() + 1];
    for (int i = 0; i < command.arguments().size(); i++) {
      argv[i] = (char*) command.arguments(i).c_str();
    }
    argv[command.arguments().size()] = NULL;

    // Prepare the command log message.
    string commandString;
    if (override.isSome()) {
      char** argv = override.get();
      // argv is guaranteed to be NULL terminated and we rely on
      // that fact to print command to be executed.
      for (int i = 0; argv[i] != NULL; i++) {
        commandString += string(argv[i]) + " ";
      }
    } else if (command.shell()) {
Exemplo n.º 30
0
// This test ensures that the command executor sends TASK_KILLING
// to frameworks that support the capability.
TEST_F(CommandExecutorTest, TaskKillingCapability)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
  ASSERT_SOME(slave);

  // Start the framework with the task killing capability.
  FrameworkInfo::Capability capability;
  capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE);

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.add_capabilities()->CopyFrom(capability);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_EQ(1u, offers->size());

  // Launch a task with the command executor.
  TaskInfo task = createTask(
      offers->front().slave_id(),
      offers->front().resources(),
      "sleep 1000");

  Future<TaskStatus> statusRunning;
  EXPECT_CALL(sched, statusUpdate(_, _))
    .WillOnce(FutureArg<1>(&statusRunning));

  driver.launchTasks(offers->front().id(), {task});

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  Future<TaskStatus> statusKilling, statusKilled;
  EXPECT_CALL(sched, statusUpdate(_, _))
    .WillOnce(FutureArg<1>(&statusKilling))
    .WillOnce(FutureArg<1>(&statusKilled));

  driver.killTask(task.task_id());

  AWAIT_READY(statusKilling);
  EXPECT_EQ(TASK_KILLING, statusKilling->state());

  AWAIT_READY(statusKilled);
  EXPECT_EQ(TASK_KILLED, statusKilled->state());

  driver.stop();
  driver.join();
}