TEST_P(CpuIsolatorTest, ROOT_UserCpuUsage)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();
  flags.isolation = GetParam();

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);

  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(
      detector.get(),
      containerizer.get());

  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched,
      DEFAULT_FRAMEWORK_INFO,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  // Max out a single core in userspace. This will run for at most one
  // second.
  TaskInfo task = createTask(
      offers.get()[0],
      "while true ; do true ; done & sleep 60");

  Future<TaskStatus> statusRunning;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusRunning));

  driver.launchTasks(offers.get()[0].id(), {task});

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  Future<hashset<ContainerID>> containers = containerizer->containers();
  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *(containers->begin());

  // Wait up to 1 second for the child process to induce 1/8 of a
  // second of user cpu time.
  ResourceStatistics statistics;
  Duration waited = Duration::zero();
  do {
    Future<ResourceStatistics> usage = containerizer->usage(containerId);
    AWAIT_READY(usage);

    statistics = usage.get();

    // If we meet our usage expectations, we're done!
    if (statistics.cpus_user_time_secs() >= 0.125) {
      break;
    }

    os::sleep(Milliseconds(200));
    waited += Milliseconds(200);
  } while (waited < Seconds(1));

  EXPECT_LE(0.125, statistics.cpus_user_time_secs());

  driver.stop();
  driver.join();
}
TEST_F(DiskQuotaTest, ResourceStatistics)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();
  flags.isolation = "posix/cpu,posix/mem,disk/du";

  flags.resources = strings::format("disk(%s):10", DEFAULT_TEST_ROLE).get();

  // NOTE: We can't pause the clock because we need the reaper to reap
  // the 'du' subprocess.
  flags.container_disk_watch_interval = Milliseconds(1);

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);

  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), containerizer.get(), flags);

  ASSERT_SOME(slave);

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_role(DEFAULT_TEST_ROLE);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched,
      frameworkInfo,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());      // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  const Offer& offer = offers.get()[0];

  Resource volume = createPersistentVolume(
      Megabytes(4),
      DEFAULT_TEST_ROLE,
      "id1",
      "path1",
      None(),
      None(),
      DEFAULT_CREDENTIAL.principal());

  Resources taskResources = Resources::parse("cpus:1;mem:128").get();

  taskResources += createDiskResource(
      "3",
      DEFAULT_TEST_ROLE,
      None(),
      None());

  taskResources += volume;

  // Create a task that uses 2MB disk.
  TaskInfo task = createTask(
      offer.slave_id(),
      taskResources,
      "dd if=/dev/zero of=file bs=1048576 count=2 && "
      "dd if=/dev/zero of=path1/file bs=1048576 count=2 && "
      "sleep 1000");

  Future<TaskStatus> status1;
  Future<TaskStatus> status2;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status1))
    .WillOnce(FutureArg<1>(&status2))
    .WillRepeatedly(Return());       // Ignore subsequent updates.

  driver.acceptOffers(
      {offer.id()},
      {CREATE(volume),
       LAUNCH({task})});

  AWAIT_READY(status1);
  EXPECT_EQ(task.task_id(), status1->task_id());
  EXPECT_EQ(TASK_RUNNING, status1->state());

  Future<hashset<ContainerID>> containers = containerizer->containers();

  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  const ContainerID& containerId = *(containers->begin());

  // Wait until disk usage can be retrieved.
  Duration elapsed = Duration::zero();
  while (true) {
    Future<ResourceStatistics> usage = containerizer->usage(containerId);
    AWAIT_READY(usage);

    ASSERT_TRUE(usage->has_disk_limit_bytes());
    EXPECT_EQ(Megabytes(3), Bytes(usage->disk_limit_bytes()));

    if (usage->has_disk_used_bytes()) {
      EXPECT_LE(usage->disk_used_bytes(), usage->disk_limit_bytes());
    }

    ASSERT_EQ(2u, usage->disk_statistics().size());

    bool done = true;
    foreach (const DiskStatistics& statistics, usage->disk_statistics()) {
      ASSERT_TRUE(statistics.has_limit_bytes());
      EXPECT_EQ(
          statistics.has_persistence() ? Megabytes(4) : Megabytes(3),
          statistics.limit_bytes());

      if (!statistics.has_used_bytes()) {
        done = false;
      } else {
        EXPECT_GT(
            statistics.has_persistence() ? Megabytes(4) : Megabytes(3),
            statistics.used_bytes());
      }
    }

    if (done) {
      break;
    }

    ASSERT_LT(elapsed, Seconds(5));

    os::sleep(Milliseconds(1));
    elapsed += Milliseconds(1);
  }

  driver.killTask(task.task_id());

  AWAIT_READY(status2);
  EXPECT_EQ(task.task_id(), status2->task_id());
  EXPECT_EQ(TASK_KILLED, status2->state());

  driver.stop();
  driver.join();
}
// This test verifies that disk quota isolator recovers properly after
// the slave restarts.
TEST_F(DiskQuotaTest, SlaveRecovery)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();
  flags.isolation = "posix/cpu,posix/mem,disk/du";
  flags.container_disk_watch_interval = Milliseconds(1);

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);

  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), containerizer.get(), flags);

  ASSERT_SOME(slave);

  MockScheduler sched;

  // Enable checkpointing for the framework.
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_checkpoint(true);

  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());      // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  const Offer& offer = offers.get()[0];

  // Create a task that uses 2MB disk.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128;disk:3").get(),
      "dd if=/dev/zero of=file bs=1048576 count=2 && sleep 1000");

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status))
    .WillRepeatedly(Return());       // Ignore subsequent updates.

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(status);
  EXPECT_EQ(task.task_id(), status->task_id());
  EXPECT_EQ(TASK_RUNNING, status->state());

  Future<hashset<ContainerID>> containers = containerizer->containers();

  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  const ContainerID& containerId = *(containers->begin());

  // Stop the slave.
  slave.get()->terminate();

  Future<ReregisterExecutorMessage> reregisterExecutorMessage =
    FUTURE_PROTOBUF(ReregisterExecutorMessage(), _, _);

  Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);

  _containerizer = MesosContainerizer::create(flags, true, &fetcher);
  ASSERT_SOME(_containerizer);

  containerizer.reset(_containerizer.get());

  detector = master.get()->createDetector();

  slave = StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  Clock::pause();

  AWAIT_READY(_recover);

  // Wait for slave to schedule reregister timeout.
  Clock::settle();

  // Ensure the executor re-registers before completing recovery.
  AWAIT_READY(reregisterExecutorMessage);

  // Ensure the slave considers itself recovered.
  Clock::advance(flags.executor_reregistration_timeout);

  // NOTE: We resume the clock because we need the reaper to reap the
  // 'du' subprocess.
  Clock::resume();

  // Wait until disk usage can be retrieved.
  Duration elapsed = Duration::zero();
  while (true) {
    Future<ResourceStatistics> usage = containerizer->usage(containerId);
    AWAIT_READY(usage);

    ASSERT_TRUE(usage->has_disk_limit_bytes());
    EXPECT_EQ(Megabytes(3), Bytes(usage->disk_limit_bytes()));

    if (usage->has_disk_used_bytes()) {
      EXPECT_LE(usage->disk_used_bytes(), usage->disk_limit_bytes());

      // NOTE: This is to capture the regression in MESOS-2452. The data
      // stored in the executor meta directory should be less than 64K.
      if (usage->disk_used_bytes() > Kilobytes(64).bytes()) {
        break;
      }
    }

    ASSERT_LT(elapsed, Seconds(15));

    os::sleep(Milliseconds(1));
    elapsed += Milliseconds(1);
  }

  driver.stop();
  driver.join();
}
TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_Statistics)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();

  // We only care about memory cgroup for this test.
  flags.isolation = "cgroups/mem";

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);
  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());      // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  // Run a task that triggers memory pressure event. We request 1G
  // disk because we are going to write a 512 MB file repeatedly.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:256;disk:1024").get(),
      "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done");

  Future<TaskStatus> starting;
  Future<TaskStatus> running;
  Future<TaskStatus> killed;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&starting))
    .WillOnce(FutureArg<1>(&running))
    .WillOnce(FutureArg<1>(&killed))
    .WillRepeatedly(Return());       // Ignore subsequent updates.

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(starting);
  EXPECT_EQ(task.task_id(), starting->task_id());
  EXPECT_EQ(TASK_STARTING, starting->state());

  AWAIT_READY(running);
  EXPECT_EQ(task.task_id(), running->task_id());
  EXPECT_EQ(TASK_RUNNING, running->state());

  Future<hashset<ContainerID>> containers = containerizer->containers();
  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *(containers->begin());

  // Wait a while for some memory pressure events to occur.
  Duration waited = Duration::zero();
  do {
    Future<ResourceStatistics> usage = containerizer->usage(containerId);
    AWAIT_READY(usage);

    if (usage->mem_low_pressure_counter() > 0) {
      // We will check the correctness of the memory pressure counters
      // later, because the memory-hammering task is still active
      // and potentially incrementing these counters.
      break;
    }

    os::sleep(Milliseconds(100));
    waited += Milliseconds(100);
  } while (waited < Seconds(5));

  EXPECT_LE(waited, Seconds(5));

  // Pause the clock to ensure that the reaper doesn't reap the exited
  // command executor and inform the containerizer/slave.
  Clock::pause();
  Clock::settle();

  // Stop the memory-hammering task.
  driver.killTask(task.task_id());

  AWAIT_READY_FOR(killed, Seconds(120));
  EXPECT_EQ(task.task_id(), killed->task_id());
  EXPECT_EQ(TASK_KILLED, killed->state());

  // Now check the correctness of the memory pressure counters.
  Future<ResourceStatistics> usage = containerizer->usage(containerId);
  AWAIT_READY(usage);

  EXPECT_GE(usage->mem_low_pressure_counter(),
            usage->mem_medium_pressure_counter());
  EXPECT_GE(usage->mem_medium_pressure_counter(),
            usage->mem_critical_pressure_counter());

  Clock::resume();

  driver.stop();
  driver.join();
}
// This test verifies that the container will not be killed if
// disk_enforce_quota flag is false (even if the disk usage exceeds
// its quota).
TEST_F(DiskQuotaTest, NoQuotaEnforcement)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();
  flags.isolation = "posix/cpu,posix/mem,disk/du";

  // NOTE: We can't pause the clock because we need the reaper to reap
  // the 'du' subprocess.
  flags.container_disk_watch_interval = Milliseconds(1);
  flags.enforce_container_disk_quota = false;

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);

  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), containerizer.get(), flags);

  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());      // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  const Offer& offer = offers.get()[0];

  // Create a task that uses 2MB disk.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128;disk:1").get(),
      "dd if=/dev/zero of=file bs=1048576 count=2 && sleep 1000");

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status))
    .WillRepeatedly(Return());       // Ignore subsequent updates.

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(status);
  EXPECT_EQ(task.task_id(), status->task_id());
  EXPECT_EQ(TASK_RUNNING, status->state());

  Future<hashset<ContainerID>> containers = containerizer->containers();

  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  const ContainerID& containerId = *(containers->begin());

  // Wait until disk usage can be retrieved and the usage actually
  // exceeds the limit. If the container is killed due to quota
  // enforcement (which shouldn't happen), the 'usage' call will
  // return a failed future, leading to a failed test.
  Duration elapsed = Duration::zero();
  while (true) {
    Future<ResourceStatistics> usage = containerizer->usage(containerId);
    AWAIT_READY(usage);

    ASSERT_TRUE(usage->has_disk_limit_bytes());
    EXPECT_EQ(Megabytes(1), Bytes(usage->disk_limit_bytes()));

    if (usage->has_disk_used_bytes() &&
        usage->disk_used_bytes() > usage->disk_limit_bytes()) {
      break;
    }

    ASSERT_LT(elapsed, Seconds(5));

    os::sleep(Milliseconds(1));
    elapsed += Milliseconds(1);
  }

  driver.stop();
  driver.join();
}
Exemple #6
0
// In this test, the framework is not checkpointed. This ensures that when we
// stop the slave, the executor is killed and we will need to recover the
// working directories without getting any checkpointed recovery state.
TEST_F(ROOT_XFS_QuotaTest, NoCheckpointRecovery)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();

  Fetcher fetcher(flags);
  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);

  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(
      detector.get(),
      containerizer.get(),
      flags);

  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128;disk:1").get(),
      "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000");

  Future<TaskStatus> runningStatus;
  Future<TaskStatus> startingStatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&startingStatus))
    .WillOnce(FutureArg<1>(&runningStatus))
    .WillOnce(Return());

  driver.launchTasks(offer.id(), {task});

  AWAIT_READY(startingStatus);
  EXPECT_EQ(task.task_id(), startingStatus->task_id());
  EXPECT_EQ(TASK_STARTING, startingStatus->state());

  AWAIT_READY(runningStatus);
  EXPECT_EQ(task.task_id(), runningStatus->task_id());
  EXPECT_EQ(TASK_RUNNING, runningStatus->state());

  Future<ResourceUsage> usage1 =
    process::dispatch(slave.get()->pid, &Slave::usage);
  AWAIT_READY(usage1);

  // We should have 1 executor using resources.
  ASSERT_EQ(1, usage1->executors().size());

  Future<hashset<ContainerID>> containers = containerizer->containers();

  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *containers->begin();

  // Restart the slave.
  slave.get()->terminate();

  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);

  _containerizer = MesosContainerizer::create(flags, true, &fetcher);
  ASSERT_SOME(_containerizer);

  containerizer.reset(_containerizer.get());

  slave = StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  // Wait until slave recovery is complete.
  Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
  AWAIT_READY_FOR(_recover, Seconds(60));

  // Wait until the orphan containers are cleaned up.
  AWAIT_READY_FOR(containerizer.get()->wait(containerId), Seconds(60));
  AWAIT_READY(slaveReregisteredMessage);

  Future<ResourceUsage> usage2 =
    process::dispatch(slave.get()->pid, &Slave::usage);
  AWAIT_READY(usage2);

  // We should have no executors left because we didn't checkpoint.
  ASSERT_TRUE(usage2->executors().empty());

  Try<std::list<string>> sandboxes = os::glob(path::join(
      slave::paths::getSandboxRootDir(mountPoint.get()),
      "*",
      "frameworks",
      "*",
      "executors",
      "*",
      "runs",
      "*"));

  ASSERT_SOME(sandboxes);

  // One sandbox and one symlink.
  ASSERT_EQ(2u, sandboxes->size());

  // Scan the remaining sandboxes and make sure that no projects are assigned.
  foreach (const string& sandbox, sandboxes.get()) {
    // Skip the "latest" symlink.
    if (os::stat::islink(sandbox)) {
      continue;
    }

    EXPECT_NONE(xfs::getProjectId(sandbox));
  }

  driver.stop();
  driver.join();
}
Exemple #7
0
// This is the same logic as ResourceStatistics, except the task should
// be allowed to exceed the disk quota, and usage statistics should report
// that the quota was exceeded.
TEST_F(ROOT_XFS_QuotaTest, ResourceStatisticsNoEnforce)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();
  flags.enforce_container_disk_quota = false;

  Fetcher fetcher(flags);
  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);
  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  // Create a task that uses 4MB of 3MB disk and fails if it can't
  // write the full amount.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128;disk:3").get(),
      "dd if=/dev/zero of=file bs=1048576 count=4 && sleep 1000");

  Future<TaskStatus> startingStatus;
  Future<TaskStatus> runningStatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&startingStatus))
    .WillOnce(FutureArg<1>(&runningStatus))
    .WillRepeatedly(Return()); // Ignore subsequent updates.

  driver.launchTasks(offers.get()[0].id(), {task});

  AWAIT_READY(startingStatus);
  EXPECT_EQ(task.task_id(), startingStatus->task_id());
  EXPECT_EQ(TASK_STARTING, startingStatus->state());

  AWAIT_READY(runningStatus);
  EXPECT_EQ(task.task_id(), runningStatus->task_id());
  EXPECT_EQ(TASK_RUNNING, runningStatus->state());

  Future<hashset<ContainerID>> containers = containerizer.get()->containers();
  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *(containers->begin());
  Duration diskTimeout = Seconds(5);
  Timeout timeout = Timeout::in(diskTimeout);

  while (true) {
    Future<ResourceStatistics> usage = containerizer.get()->usage(containerId);
    AWAIT_READY(usage);

    ASSERT_TRUE(usage->has_disk_limit_bytes());
    EXPECT_EQ(Megabytes(3), Bytes(usage->disk_limit_bytes()));

    if (usage->has_disk_used_bytes()) {
      if (usage->disk_used_bytes() >= Megabytes(4).bytes()) {
        break;
      }
    }

    // The stopping condition for this test is that the isolator is
    // able to report that we wrote the full amount of data without
    // being constrained by the task disk limit.
    EXPECT_LE(usage->disk_used_bytes(), Megabytes(4).bytes());

    ASSERT_FALSE(timeout.expired())
      << "Used " << Bytes(usage->disk_used_bytes())
      << " of expected " << Megabytes(4)
      << " within the " << diskTimeout << " timeout";

    os::sleep(Milliseconds(100));
  }

  driver.stop();
  driver.join();
}
Exemple #8
0
// Verify that we can get accurate resource statistics from the XFS
// disk isolator.
TEST_F(ROOT_XFS_QuotaTest, ResourceStatistics)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();

  Fetcher fetcher(flags);
  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);
  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Try<Owned<cluster::Slave>> slave =
    StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;

  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(_, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());      // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  // Create a task that uses 4 of 3MB disk but doesn't fail. We will verify
  // that the allocated disk is filled.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:128;disk:3").get(),
      "dd if=/dev/zero of=file bs=1048576 count=4 || sleep 1000");

  Future<TaskStatus> startingStatus;
  Future<TaskStatus> runningStatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&startingStatus))
    .WillOnce(FutureArg<1>(&runningStatus))
    .WillRepeatedly(Return()); // Ignore subsequent updates.

  driver.launchTasks(offers.get()[0].id(), {task});

  AWAIT_READY(startingStatus);
  EXPECT_EQ(task.task_id(), startingStatus->task_id());
  EXPECT_EQ(TASK_STARTING, startingStatus->state());

  AWAIT_READY(runningStatus);
  EXPECT_EQ(task.task_id(), runningStatus->task_id());
  EXPECT_EQ(TASK_RUNNING, runningStatus->state());

  Future<hashset<ContainerID>> containers = containerizer.get()->containers();
  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *(containers->begin());
  Timeout timeout = Timeout::in(Seconds(5));

  while (true) {
    Future<ResourceStatistics> usage = containerizer.get()->usage(containerId);
    AWAIT_READY(usage);

    ASSERT_TRUE(usage->has_disk_limit_bytes());
    EXPECT_EQ(Megabytes(3), Bytes(usage->disk_limit_bytes()));

    if (usage->has_disk_used_bytes()) {
      // Usage must always be <= the limit.
      EXPECT_LE(usage->disk_used_bytes(), usage->disk_limit_bytes());

      // Usage might not be equal to the limit, but it must hit
      // and not exceed the limit.
      if (usage->disk_used_bytes() >= usage->disk_limit_bytes()) {
        EXPECT_EQ(
            usage->disk_used_bytes(), usage->disk_limit_bytes());
        EXPECT_EQ(Megabytes(3), Bytes(usage->disk_used_bytes()));
        break;
      }
    }

    ASSERT_FALSE(timeout.expired());
    os::sleep(Milliseconds(100));
  }

  driver.stop();
  driver.join();
}
// This test verifies that persistent volumes are unmounted properly
// after a checkpointed framework disappears and the slave restarts.
//
// TODO(jieyu): Even though the command task specifies a new
// filesystem root, the executor (command executor) itself does not
// change filesystem root (uses the host filesystem). We need to add a
// test to test the scenario that the executor itself changes rootfs.
TEST_F(LinuxFilesystemIsolatorMesosTest,
       ROOT_RecoverOrphanedPersistentVolume)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  string registry = path::join(sandbox.get(), "registry");
  AWAIT_READY(DockerArchive::create(registry, "test_image"));

  slave::Flags flags = CreateSlaveFlags();
  flags.resources = "cpus:2;mem:1024;disk(role1):1024";
  flags.isolation = "filesystem/linux,docker/runtime";
  flags.docker_registry = registry;
  flags.docker_store_dir = path::join(sandbox.get(), "store");
  flags.image_providers = "docker";

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> create =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(create);

  Owned<Containerizer> containerizer(create.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(
      detector.get(),
      containerizer.get(),
      flags);

  ASSERT_SOME(slave);

  MockScheduler sched;
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, "role1");
  frameworkInfo.set_checkpoint(true);

  MesosSchedulerDriver driver(
      &sched,
      frameworkInfo,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  string dir1 = path::join(sandbox.get(), "dir1");
  ASSERT_SOME(os::mkdir(dir1));

  Resource persistentVolume = createPersistentVolume(
      Megabytes(64),
      "role1",
      "id1",
      "path1",
      None(),
      None(),
      frameworkInfo.principal());

  // Create a task that does nothing for a long time.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:512").get() + persistentVolume,
      "sleep 1000");

  task.mutable_container()->CopyFrom(createContainerInfo(
      "test_image",
      {createVolumeHostPath("/tmp", dir1, Volume::RW)}));

  Future<TaskStatus> statusStarting;
  Future<TaskStatus> statusRunning;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusStarting))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillRepeatedly(DoDefault());

  Future<Nothing> ack =
    FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement);

  // Create the persistent volumes and launch task via `acceptOffers`.
  driver.acceptOffers(
      {offer.id()},
      {CREATE(persistentVolume), LAUNCH({task})});

  AWAIT_READY(statusStarting);
  EXPECT_EQ(TASK_STARTING, statusStarting->state());

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  // Wait for the ACK to be checkpointed.
  AWAIT_READY(ack);

  Future<hashset<ContainerID>> containers = containerizer->containers();

  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *containers->begin();

  // Restart the slave.
  slave.get()->terminate();

  // Wipe the slave meta directory so that the slave will treat the
  // above running task as an orphan.
  ASSERT_SOME(os::rmdir(slave::paths::getMetaRootDir(flags.work_dir)));

  Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);

  // Recreate the containerizer using the same helper as above.
  containerizer.reset();

  create = MesosContainerizer::create(flags, true, &fetcher);
  ASSERT_SOME(create);

  containerizer.reset(create.get());

  slave = StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  // Wait until slave recovery is complete.
  AWAIT_READY(_recover);

  // Wait until the orphan containers are cleaned up.
  AWAIT_READY(containerizer->wait(containerId));

  Try<fs::MountInfoTable> table = fs::MountInfoTable::read();
  ASSERT_SOME(table);

  // All mount targets should be under this directory.
  string directory = slave::paths::getSandboxRootDir(flags.work_dir);

  // Verify that the orphaned container's persistent volume and
  // the rootfs are unmounted.
  foreach (const fs::MountInfoTable::Entry& entry, table->entries) {
    EXPECT_FALSE(strings::contains(entry.target, directory))
      << "Target was not unmounted: " << entry.target;
  }

  driver.stop();
  driver.join();
}
TEST_P(MemoryIsolatorTest, ROOT_MemUsage)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  slave::Flags flags = CreateSlaveFlags();
  flags.isolation = GetParam();

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> _containerizer =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(_containerizer);

  Owned<MesosContainerizer> containerizer(_containerizer.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(
      detector.get(),
      containerizer.get());

  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched,
      DEFAULT_FRAMEWORK_INFO,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  TaskInfo task = createTask(offers.get()[0], "sleep 120");

  Future<TaskStatus> statusRunning;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusRunning));

  driver.launchTasks(offers.get()[0].id(), {task});

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  Future<hashset<ContainerID>> containers = containerizer->containers();
  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *(containers->begin());

  Future<ResourceStatistics> usage = containerizer->usage(containerId);
  AWAIT_READY(usage);

  // TODO(jieyu): Consider using a program that predictably increases
  // RSS so that we can set more meaningful expectation here.
  EXPECT_LT(0u, usage->mem_rss_bytes());

  driver.stop();
  driver.join();
}