Esempio n. 1
0
// This test verifies the slave will destroy a container if, when
// receiving a terminal status task update, updating the container's
// resources fails.
TEST_F(SlaveTest, TerminalTaskContainerizerUpdateFails)
{
  // Start a master.
  Try<PID<Master> > master = StartMaster();
  ASSERT_SOME(master);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);
  EXPECT_CALL(exec, registered(_, _, _, _));

  TestContainerizer containerizer(&exec);

  // Start a slave.
  Try<PID<Slave> > slave = StartSlave(&containerizer);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer> > offers;

  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());
  Offer offer = offers.get()[0];

  // Start two tasks.
  vector<TaskInfo> tasks;

  tasks.push_back(createTask(
      offer.slave_id(),
      Resources::parse("cpus:0.1;mem:32").get(),
      "sleep 1000",
      exec.id));

  tasks.push_back(createTask(
      offer.slave_id(),
      Resources::parse("cpus:0.1;mem:32").get(),
      "sleep 1000",
      exec.id));

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));

  Future<TaskStatus> status1, status2, status3, status4;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status1))
    .WillOnce(FutureArg<1>(&status2))
    .WillOnce(FutureArg<1>(&status3))
    .WillOnce(FutureArg<1>(&status4));

  driver.launchTasks(offer.id(), tasks);

  AWAIT_READY(status1);
  EXPECT_EQ(TASK_RUNNING, status1.get().state());

  AWAIT_READY(status2);
  EXPECT_EQ(TASK_RUNNING, status2.get().state());

  // Set up the containerizer so the next update() will fail.
  EXPECT_CALL(containerizer, update(_, _))
    .WillOnce(Return(process::Failure("update() failed")))
    .WillRepeatedly(Return(Nothing()));

  EXPECT_CALL(exec, killTask(_, _))
    .WillOnce(SendStatusUpdateFromTaskID(TASK_KILLED));

  // Kill one of the tasks. The failed update should result in the
  // second task going lost when the container is destroyed.
  driver.killTask(tasks[0].task_id());

  AWAIT_READY(status3);
  EXPECT_EQ(TASK_KILLED, status3.get().state());

  AWAIT_READY(status4);
  EXPECT_EQ(TASK_LOST, status4.get().state());

  driver.stop();
  driver.join();

  Shutdown();
}
Esempio n. 2
0
// In this test, the agent initially doesn't enable disk isolation
// but then restarts with XFS disk isolation enabled. We verify that
// the old container launched before the agent restart is
// successfully recovered.
TEST_F(ROOT_XFS_QuotaTest, RecoverOldContainers)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  Owned<MasterDetector> detector = master.get()->createDetector();

  slave::Flags flags = CreateSlaveFlags();

  // `CreateSlaveFlags()` enables `disk/xfs` so here we reset
  // `isolation` to empty.
  flags.isolation.clear();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_checkpoint(true);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128;disk:1").get(),
      "dd if=/dev/zero of=file bs=1024 count=1; sleep 1000");

  Future<TaskStatus> startingStatus;
  Future<TaskStatus> runningstatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&startingStatus))
    .WillOnce(FutureArg<1>(&runningstatus));

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(startingStatus);
  EXPECT_EQ(task.task_id(), startingStatus->task_id());
  EXPECT_EQ(TASK_STARTING, startingStatus->state());

  AWAIT_READY(runningstatus);
  EXPECT_EQ(task.task_id(), runningstatus->task_id());
  EXPECT_EQ(TASK_RUNNING, runningstatus->state());

  {
    Future<ResourceUsage> usage =
      process::dispatch(slave.get()->pid, &Slave::usage);
    AWAIT_READY(usage);

    // We should have 1 executor using resources but it doesn't have
    // disk limit enabled.
    ASSERT_EQ(1, usage->executors().size());
    const ResourceUsage_Executor& executor = usage->executors().Get(0);
    ASSERT_TRUE(executor.has_statistics());
    ASSERT_FALSE(executor.statistics().has_disk_limit_bytes());
  }

  // Restart the slave.
  slave.get()->terminate();

  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);

  // This time use the agent flags that include XFS disk isolation.
  slave = StartSlave(detector.get(), CreateSlaveFlags());
  ASSERT_SOME(slave);

  // Wait for the slave to re-register.
  AWAIT_READY(slaveReregisteredMessage);

  {
    Future<ResourceUsage> usage =
      process::dispatch(slave.get()->pid, &Slave::usage);
    AWAIT_READY(usage);

    // We should still have 1 executor using resources but it doesn't
    // have disk limit enabled.
    ASSERT_EQ(1, usage->executors().size());
    const ResourceUsage_Executor& executor = usage->executors().Get(0);
    ASSERT_TRUE(executor.has_statistics());
    ASSERT_FALSE(executor.statistics().has_disk_limit_bytes());
  }

  driver.stop();
  driver.join();
}
Esempio n. 3
0
TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_Statistics)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();

  // We only care about memory cgroup for this test.
  flags.isolation = "cgroups/mem";
  flags.agent_subsystems = None();

  Fetcher fetcher;

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);
  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());      // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  Offer offer = offers.get()[0];

  // Run a task that triggers memory pressure event. We request 1G
  // disk because we are going to write a 512 MB file repeatedly.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:256;disk:1024").get(),
      "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done");

  Future<TaskStatus> running;
  Future<TaskStatus> killed;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&running))
    .WillOnce(FutureArg<1>(&killed))
    .WillRepeatedly(Return());       // Ignore subsequent updates.

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(running);
  EXPECT_EQ(task.task_id(), running.get().task_id());
  EXPECT_EQ(TASK_RUNNING, running.get().state());

  Future<hashset<ContainerID>> containers = containerizer->containers();
  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers.get().size());

  ContainerID containerId = *(containers.get().begin());

  // Wait a while for some memory pressure events to occur.
  Duration waited = Duration::zero();
  do {
    Future<ResourceStatistics> usage = containerizer->usage(containerId);
    AWAIT_READY(usage);

    if (usage.get().mem_low_pressure_counter() > 0) {
      // We will check the correctness of the memory pressure counters
      // later, because the memory-hammering task is still active
      // and potentially incrementing these counters.
      break;
    }

    os::sleep(Milliseconds(100));
    waited += Milliseconds(100);
  } while (waited < Seconds(5));

  EXPECT_LE(waited, Seconds(5));

  // Pause the clock to ensure that the reaper doesn't reap the exited
  // command executor and inform the containerizer/slave.
  Clock::pause();
  Clock::settle();

  // Stop the memory-hammering task.
  driver.killTask(task.task_id());

  AWAIT_READY_FOR(killed, Seconds(120));
  EXPECT_EQ(task.task_id(), killed->task_id());
  EXPECT_EQ(TASK_KILLED, killed->state());

  // Now check the correctness of the memory pressure counters.
  Future<ResourceStatistics> usage = containerizer->usage(containerId);
  AWAIT_READY(usage);

  EXPECT_GE(usage.get().mem_low_pressure_counter(),
            usage.get().mem_medium_pressure_counter());
  EXPECT_GE(usage.get().mem_medium_pressure_counter(),
            usage.get().mem_critical_pressure_counter());

  Clock::resume();

  driver.stop();
  driver.join();
}
Esempio n. 4
0
// In this test, the framework is not checkpointed. This ensures that when we
// stop the slave, the executor is killed and we will need to recover the
// working directories without getting any checkpointed recovery state.
TEST_F(ROOT_XFS_QuotaTest, NoCheckpointRecovery)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();

  Fetcher fetcher(flags);
  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);

  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(
      detector.get(),
      containerizer.get(),
      flags);

  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128;disk:1").get(),
      "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000");

  Future<TaskStatus> runningStatus;
  Future<TaskStatus> startingStatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&startingStatus))
    .WillOnce(FutureArg<1>(&runningStatus))
    .WillOnce(Return());

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(startingStatus);
  EXPECT_EQ(task.task_id(), startingStatus->task_id());
  EXPECT_EQ(TASK_STARTING, startingStatus->state());

  AWAIT_READY(runningStatus);
  EXPECT_EQ(task.task_id(), runningStatus->task_id());
  EXPECT_EQ(TASK_RUNNING, runningStatus->state());

  Future<ResourceUsage> usage1 =
    process::dispatch(slave.get()->pid, &Slave::usage);
  AWAIT_READY(usage1);

  // We should have 1 executor using resources.
  ASSERT_EQ(1, usage1->executors().size());

  Future<hashset<ContainerID>> containers = containerizer->containers();

  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *containers->begin();

  // Restart the slave.
  slave.get()->terminate();

  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);

  _containerizer = MesosContainerizer::create(flags, true, &fetcher);
  ASSERT_SOME(_containerizer);

  containerizer.reset(_containerizer.get());

  slave = StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  // Wait until slave recovery is complete.
  Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
  AWAIT_READY_FOR(_recover, Seconds(60));

  // Wait until the orphan containers are cleaned up.
  AWAIT_READY_FOR(containerizer.get()->wait(containerId), Seconds(60));
  AWAIT_READY(slaveReregisteredMessage);

  Future<ResourceUsage> usage2 =
    process::dispatch(slave.get()->pid, &Slave::usage);
  AWAIT_READY(usage2);

  // We should have no executors left because we didn't checkpoint.
  ASSERT_TRUE(usage2->executors().empty());

  Try<std::list<string>> sandboxes = os::glob(path::join(
      slave::paths::getSandboxRootDir(mountPoint.get()),
      "*",
      "frameworks",
      "*",
      "executors",
      "*",
      "runs",
      "*"));

  ASSERT_SOME(sandboxes);

  // One sandbox and one symlink.
  ASSERT_EQ(2u, sandboxes->size());

  // Scan the remaining sandboxes and make sure that no projects are assigned.
  foreach (const string& sandbox, sandboxes.get()) {
    // Skip the "latest" symlink.
    if (os::stat::islink(sandbox)) {
      continue;
    }

    EXPECT_NONE(xfs::getProjectId(sandbox));
  }

  driver.stop();
  driver.join();
}
Esempio n. 5
0
// In this test, the framework is checkpointed so we expect the executor to
// persist across the slave restart and to have the same resource usage before
// and after.
TEST_F(ROOT_XFS_QuotaTest, CheckpointRecovery)
{
  slave::Flags flags = CreateSlaveFlags();
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), CreateSlaveFlags());
  ASSERT_SOME(slave);

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_checkpoint(true);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128;disk:1").get(),
      "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000");

  Future<TaskStatus> startingStatus;
  Future<TaskStatus> runningStatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&startingStatus))
    .WillOnce(FutureArg<1>(&runningStatus));

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(startingStatus);
  EXPECT_EQ(task.task_id(), startingStatus->task_id());
  EXPECT_EQ(TASK_STARTING, startingStatus->state());

  AWAIT_READY(startingStatus);
  EXPECT_EQ(task.task_id(), runningStatus->task_id());
  EXPECT_EQ(TASK_RUNNING, runningStatus->state());

  Future<ResourceUsage> usage1 =
    process::dispatch(slave.get()->pid, &Slave::usage);
  AWAIT_READY(usage1);

  // We should have 1 executor using resources.
  ASSERT_EQ(1, usage1->executors().size());

  // Restart the slave.
  slave.get()->terminate();

  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);

  slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  // Wait for the slave to re-register.
  AWAIT_READY(slaveReregisteredMessage);

  Future<ResourceUsage> usage2 =
    process::dispatch(slave.get()->pid, &Slave::usage);
  AWAIT_READY(usage2);

  // We should have still have 1 executor using resources.
  ASSERT_EQ(1, usage1->executors().size());

  Try<std::list<string>> sandboxes = os::glob(path::join(
      slave::paths::getSandboxRootDir(mountPoint.get()),
      "*",
      "frameworks",
      "*",
      "executors",
      "*",
      "runs",
      "*"));

  ASSERT_SOME(sandboxes);

  // One sandbox and one symlink.
  ASSERT_EQ(2u, sandboxes->size());

  // Scan the remaining sandboxes. We ought to still have project IDs
  // assigned to them all.
  foreach (const string& sandbox, sandboxes.get()) {
    // Skip the "latest" symlink.
    if (os::stat::islink(sandbox)) {
      continue;
    }

    EXPECT_SOME(xfs::getProjectId(sandbox));
  }

  driver.stop();
  driver.join();
}
// This test verifies that persistent volumes are unmounted properly
// after a checkpointed framework disappears and the slave restarts.
//
// TODO(jieyu): Even though the command task specifies a new
// filesystem root, the executor (command executor) itself does not
// change filesystem root (uses the host filesystem). We need to add a
// test to test the scenario that the executor itself changes rootfs.
TEST_F(LinuxFilesystemIsolatorMesosTest,
       ROOT_RecoverOrphanedPersistentVolume)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  string registry = path::join(sandbox.get(), "registry");
  AWAIT_READY(DockerArchive::create(registry, "test_image"));

  slave::Flags flags = CreateSlaveFlags();
  flags.resources = "cpus:2;mem:1024;disk(role1):1024";
  flags.isolation = "filesystem/linux,docker/runtime";
  flags.docker_registry = registry;
  flags.docker_store_dir = path::join(sandbox.get(), "store");
  flags.image_providers = "docker";

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> create =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(create);

  Owned<Containerizer> containerizer(create.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(
      detector.get(),
      containerizer.get(),
      flags);

  ASSERT_SOME(slave);

  MockScheduler sched;
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, "role1");
  frameworkInfo.set_checkpoint(true);

  MesosSchedulerDriver driver(
      &sched,
      frameworkInfo,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  string dir1 = path::join(sandbox.get(), "dir1");
  ASSERT_SOME(os::mkdir(dir1));

  Resource persistentVolume = createPersistentVolume(
      Megabytes(64),
      "role1",
      "id1",
      "path1",
      None(),
      None(),
      frameworkInfo.principal());

  // Create a task that does nothing for a long time.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:512").get() + persistentVolume,
      "sleep 1000");

  task.mutable_container()->CopyFrom(createContainerInfo(
      "test_image",
      {createVolumeHostPath("/tmp", dir1, Volume::RW)}));

  Future<TaskStatus> statusStarting;
  Future<TaskStatus> statusRunning;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusStarting))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillRepeatedly(DoDefault());

  Future<Nothing> ack =
    FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement);

  // Create the persistent volumes and launch task via `acceptOffers`.
  driver.acceptOffers(
      {offer.id()},
      {CREATE(persistentVolume), LAUNCH({task})});

  AWAIT_READY(statusStarting);
  EXPECT_EQ(TASK_STARTING, statusStarting->state());

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  // Wait for the ACK to be checkpointed.
  AWAIT_READY(ack);

  Future<hashset<ContainerID>> containers = containerizer->containers();

  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *containers->begin();

  // Restart the slave.
  slave.get()->terminate();

  // Wipe the slave meta directory so that the slave will treat the
  // above running task as an orphan.
  ASSERT_SOME(os::rmdir(slave::paths::getMetaRootDir(flags.work_dir)));

  Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);

  // Recreate the containerizer using the same helper as above.
  containerizer.reset();

  create = MesosContainerizer::create(flags, true, &fetcher);
  ASSERT_SOME(create);

  containerizer.reset(create.get());

  slave = StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  // Wait until slave recovery is complete.
  AWAIT_READY(_recover);

  // Wait until the orphan containers are cleaned up.
  AWAIT_READY(containerizer->wait(containerId));

  Try<fs::MountInfoTable> table = fs::MountInfoTable::read();
  ASSERT_SOME(table);

  // All mount targets should be under this directory.
  string directory = slave::paths::getSandboxRootDir(flags.work_dir);

  // Verify that the orphaned container's persistent volume and
  // the rootfs are unmounted.
  foreach (const fs::MountInfoTable::Entry& entry, table->entries) {
    EXPECT_FALSE(strings::contains(entry.target, directory))
      << "Target was not unmounted: " << entry.target;
  }

  driver.stop();
  driver.join();
}
// This test verifies that the framework can launch a command task
// that specifies both container image and persistent volumes.
TEST_F(LinuxFilesystemIsolatorMesosTest,
       ROOT_ChangeRootFilesystemCommandExecutorPersistentVolume)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  string registry = path::join(sandbox.get(), "registry");
  AWAIT_READY(DockerArchive::create(registry, "test_image"));

  slave::Flags flags = CreateSlaveFlags();
  flags.resources = "cpus:2;mem:1024;disk(role1):1024";
  flags.isolation = "filesystem/linux,docker/runtime";
  flags.docker_registry = registry;
  flags.docker_store_dir = path::join(sandbox.get(), "store");
  flags.image_providers = "docker";

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, "role1");

  MesosSchedulerDriver driver(
      &sched,
      frameworkInfo,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(frameworkId);

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  string dir1 = path::join(sandbox.get(), "dir1");
  ASSERT_SOME(os::mkdir(dir1));

  Resource persistentVolume = createPersistentVolume(
      Megabytes(64),
      "role1",
      "id1",
      "path1",
      None(),
      None(),
      frameworkInfo.principal());

  // We use the filter explicitly here so that the resources will not
  // be filtered for 5 seconds (the default).
  Filters filters;
  filters.set_refuse_seconds(0);

  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:512").get() + persistentVolume,
      "echo abc > path1/file");

  task.mutable_container()->CopyFrom(createContainerInfo(
      "test_image",
      {createVolumeHostPath("/tmp", dir1, Volume::RW)}));

  // Create the persistent volumes and launch task via `acceptOffers`.
  driver.acceptOffers(
      {offer.id()},
      {CREATE(persistentVolume), LAUNCH({task})},
      filters);

  Future<TaskStatus> statusStarting;
  Future<TaskStatus> statusRunning;
  Future<TaskStatus> statusFinished;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusStarting))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillOnce(FutureArg<1>(&statusFinished));

  AWAIT_READY(statusStarting);
  EXPECT_EQ(TASK_STARTING, statusStarting->state());

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  AWAIT_READY(statusFinished);
  EXPECT_EQ(TASK_FINISHED, statusFinished->state());

  // NOTE: The command executor's id is the same as the task id.
  ExecutorID executorId;
  executorId.set_value(task.task_id().value());

  string directory = slave::paths::getExecutorLatestRunPath(
      flags.work_dir,
      offer.slave_id(),
      frameworkId.get(),
      executorId);

  EXPECT_FALSE(os::exists(path::join(directory, "path1")));

  string volumePath = slave::paths::getPersistentVolumePath(
      flags.work_dir,
      "role1",
      "id1");

  EXPECT_SOME_EQ("abc\n", os::read(path::join(volumePath, "file")));

  driver.stop();
  driver.join();
}
Esempio n. 8
0
// This test verifies that a task group is launched on the agent if the executor
// provides a valid authentication token specifying its own ContainerID.
TEST_F(ExecutorAuthorizationTest, RunTaskGroup)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  // Start an agent with permissive ACLs so that a task can be launched.
  ACLs acls;
  acls.set_permissive(true);

  slave::Flags flags = CreateSlaveFlags();
  flags.acls = acls;

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(frameworkId);

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:0.5;mem:32").get(),
      "sleep 1000");

  Future<TaskStatus> status;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status));

  Resources executorResources =
    allocatedResources(Resources::parse("cpus:0.1;mem:32;disk:32").get(), "*");

  ExecutorInfo executor;
  executor.mutable_executor_id()->set_value("default");
  executor.set_type(ExecutorInfo::DEFAULT);
  executor.mutable_framework_id()->CopyFrom(frameworkId.get());
  executor.mutable_resources()->CopyFrom(executorResources);

  TaskGroupInfo taskGroup;
  taskGroup.add_tasks()->CopyFrom(task);

  driver.acceptOffers({offer.id()}, {LAUNCH_GROUP(executor, taskGroup)});

  AWAIT_READY(status);

  ASSERT_EQ(task.task_id(), status->task_id());
  EXPECT_EQ(TASK_STARTING, status->state());

  driver.stop();
  driver.join();
}
Esempio n. 9
0
// This test verifies that a task is launched on the agent if the task
// user is authorized based on `run_tasks` ACL configured on the agent
// to only allow whitelisted users to run tasks on the agent.
TYPED_TEST(SlaveAuthorizerTest, AuthorizeRunTaskOnAgent)
{
  // Get the current user.
  Result<string> user = os::user();
  ASSERT_SOME(user) << "Failed to get the current user name"
                    << (user.isError() ? ": " + user.error() : "");

  Try<Owned<cluster::Master>> master = this->StartMaster();
  ASSERT_SOME(master);

  // Start a slave with `bar` and the current user being the only authorized
  // users to launch tasks on the agent.
  ACLs acls;
  acls.set_permissive(false); // Restrictive.
  mesos::ACL::RunTask* acl = acls.add_run_tasks();
  acl->mutable_principals()->set_type(ACL::Entity::ANY);
  acl->mutable_users()->add_values("bar");
  acl->mutable_users()->add_values(user.get());

  slave::Flags slaveFlags = this->CreateSlaveFlags();
  slaveFlags.acls = acls;

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = this->StartSlave(
      detector.get(), slaveFlags);
  ASSERT_SOME(slave);

  // Create a framework with user `foo`.
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_user("foo");

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  // Framework is registered since the master admits frameworks of any user.
  AWAIT_READY(frameworkId);

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  // Launch the first task with no user, so it defaults to the
  // framework user `foo`.
  TaskInfo task1 = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:32").get(),
      "sleep 1000");

  // Launch the second task as the current user.
  TaskInfo task2 = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:32").get(),
      "sleep 1000");
  task2.mutable_command()->set_user(user.get());

  // The first task should fail since the task user `foo` is not an
  // authorized user that can launch a task. However, the second task
  // should succeed.
  Future<TaskStatus> status0;
  Future<TaskStatus> status1;
  Future<TaskStatus> status2;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status0))
    .WillOnce(FutureArg<1>(&status1))
    .WillOnce(FutureArg<1>(&status2));

  driver.acceptOffers(
      {offer.id()},
      {LAUNCH({task1, task2})});

  // Wait for TASK_ERROR for 1st task, and TASK_STARTING followed by
  // TASK_RUNNING for 2nd task.
  AWAIT_READY(status0);
  AWAIT_READY(status1);
  AWAIT_READY(status2);

  // Validate both the statuses. Note that the order of receiving the
  // status updates for the 2 tasks is not deterministic, but we know
  // that task2's TASK_RUNNING arrives after TASK_STARTING.
  hashmap<TaskID, TaskStatus> statuses;
  statuses[status0->task_id()] = status0.get();
  statuses[status1->task_id()] = status1.get();
  statuses[status2->task_id()] = status2.get();

  ASSERT_TRUE(statuses.contains(task1.task_id()));
  EXPECT_EQ(TASK_ERROR, statuses.at(task1.task_id()).state());
  EXPECT_EQ(TaskStatus::SOURCE_SLAVE, statuses.at(task1.task_id()).source());
  EXPECT_EQ(TaskStatus::REASON_TASK_UNAUTHORIZED,
            statuses.at(task1.task_id()).reason());

  ASSERT_TRUE(statuses.contains(task2.task_id()));
  EXPECT_EQ(TASK_RUNNING, statuses.at(task2.task_id()).state());

  driver.stop();
  driver.join();
}
Esempio n. 10
0
TEST_F(FaultToleranceTest, ForwardStatusUpdateUnknownExecutor)
{
  Try<PID<Master> > master = StartMaster();
  ASSERT_SOME(master);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);

  Try<PID<Slave> > slave = StartSlave(&exec);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get());

  FrameworkID frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(SaveArg<1>(&frameworkId));

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers));

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());
  Offer offer = offers.get()[0];

  TaskInfo task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offer.slave_id());
  task.mutable_resources()->MergeFrom(offer.resources());
  task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);

  vector<TaskInfo> tasks;
  tasks.push_back(task);

  Future<Nothing> statusUpdate;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureSatisfy(&statusUpdate));    // TASK_RUNNING of task1.

  EXPECT_CALL(exec, registered(_, _, _, _));

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));

  driver.launchTasks(offer.id(), tasks);

  // Wait until TASK_RUNNING of task1 is received.
  AWAIT_READY(statusUpdate);

  // Simulate the slave receiving status update from an unknown
  // (e.g. exited) executor of the given framework.
  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status));           // TASK_RUNNING of task2.

  TaskID taskId;
  taskId.set_value("task2");

  StatusUpdate statusUpdate2 = createStatusUpdate(
      frameworkId, offer.slave_id(), taskId, TASK_RUNNING, "Dummy update");

  process::dispatch(slave.get(), &Slave::statusUpdate, statusUpdate2);

  // Ensure that the scheduler receives task2's update.
  AWAIT_READY(status);
  EXPECT_EQ(taskId, status.get().task_id());
  EXPECT_EQ(TASK_RUNNING, status.get().state());

  EXPECT_CALL(exec, shutdown(_))
    .Times(AtMost(1));

  driver.stop();
  driver.join();

  Shutdown();
}