// This test verifies that persistent volumes are unmounted properly
// after a checkpointed framework disappears and the slave restarts.
//
// TODO(jieyu): Even though the command task specifies a new
// filesystem root, the executor (command executor) itself does not
// change filesystem root (uses the host filesystem). We need to add a
// test to test the scenario that the executor itself changes rootfs.
TEST_F(LinuxFilesystemIsolatorMesosTest,
       ROOT_RecoverOrphanedPersistentVolume)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  string registry = path::join(sandbox.get(), "registry");
  AWAIT_READY(DockerArchive::create(registry, "test_image"));

  slave::Flags flags = CreateSlaveFlags();
  flags.resources = "cpus:2;mem:1024;disk(role1):1024";
  flags.isolation = "filesystem/linux,docker/runtime";
  flags.docker_registry = registry;
  flags.docker_store_dir = path::join(sandbox.get(), "store");
  flags.image_providers = "docker";

  Fetcher fetcher(flags);

  Try<MesosContainerizer*> create =
    MesosContainerizer::create(flags, true, &fetcher);

  ASSERT_SOME(create);

  Owned<Containerizer> containerizer(create.get());

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(
      detector.get(),
      containerizer.get(),
      flags);

  ASSERT_SOME(slave);

  MockScheduler sched;
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, "role1");
  frameworkInfo.set_checkpoint(true);

  MesosSchedulerDriver driver(
      &sched,
      frameworkInfo,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  string dir1 = path::join(sandbox.get(), "dir1");
  ASSERT_SOME(os::mkdir(dir1));

  Resource persistentVolume = createPersistentVolume(
      Megabytes(64),
      "role1",
      "id1",
      "path1",
      None(),
      None(),
      frameworkInfo.principal());

  // Create a task that does nothing for a long time.
  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:512").get() + persistentVolume,
      "sleep 1000");

  task.mutable_container()->CopyFrom(createContainerInfo(
      "test_image",
      {createVolumeHostPath("/tmp", dir1, Volume::RW)}));

  Future<TaskStatus> statusStarting;
  Future<TaskStatus> statusRunning;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusStarting))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillRepeatedly(DoDefault());

  Future<Nothing> ack =
    FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement);

  // Create the persistent volumes and launch task via `acceptOffers`.
  driver.acceptOffers(
      {offer.id()},
      {CREATE(persistentVolume), LAUNCH({task})});

  AWAIT_READY(statusStarting);
  EXPECT_EQ(TASK_STARTING, statusStarting->state());

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  // Wait for the ACK to be checkpointed.
  AWAIT_READY(ack);

  Future<hashset<ContainerID>> containers = containerizer->containers();

  AWAIT_READY(containers);
  ASSERT_EQ(1u, containers->size());

  ContainerID containerId = *containers->begin();

  // Restart the slave.
  slave.get()->terminate();

  // Wipe the slave meta directory so that the slave will treat the
  // above running task as an orphan.
  ASSERT_SOME(os::rmdir(slave::paths::getMetaRootDir(flags.work_dir)));

  Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);

  // Recreate the containerizer using the same helper as above.
  containerizer.reset();

  create = MesosContainerizer::create(flags, true, &fetcher);
  ASSERT_SOME(create);

  containerizer.reset(create.get());

  slave = StartSlave(detector.get(), containerizer.get(), flags);
  ASSERT_SOME(slave);

  // Wait until slave recovery is complete.
  AWAIT_READY(_recover);

  // Wait until the orphan containers are cleaned up.
  AWAIT_READY(containerizer->wait(containerId));

  Try<fs::MountInfoTable> table = fs::MountInfoTable::read();
  ASSERT_SOME(table);

  // All mount targets should be under this directory.
  string directory = slave::paths::getSandboxRootDir(flags.work_dir);

  // Verify that the orphaned container's persistent volume and
  // the rootfs are unmounted.
  foreach (const fs::MountInfoTable::Entry& entry, table->entries) {
    EXPECT_FALSE(strings::contains(entry.target, directory))
      << "Target was not unmounted: " << entry.target;
  }

  driver.stop();
  driver.join();
}
// This test verifies that, after a master failover, reconciliation of an
// operation that is still pending on an agent results in `OPERATION_PENDING`.
TEST_P(OperationReconciliationTest, AgentPendingOperationAfterMasterFailover)
{
  Clock::pause();

  mesos::internal::master::Flags masterFlags = CreateMasterFlags();
  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
  ASSERT_SOME(master);

  Future<UpdateSlaveMessage> updateSlaveMessage =
    FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _);

  auto detector = std::make_shared<StandaloneMasterDetector>(master.get()->pid);

  mesos::internal::slave::Flags slaveFlags = CreateSlaveFlags();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
  ASSERT_SOME(slave);

  // Advance the clock to trigger agent registration.
  Clock::advance(slaveFlags.registration_backoff_factor);

  // Wait for the agent to register.
  AWAIT_READY(updateSlaveMessage);

  // Start and register a resource provider.

  ResourceProviderInfo resourceProviderInfo;
  resourceProviderInfo.set_type("org.apache.mesos.rp.test");
  resourceProviderInfo.set_name("test");

  Resource disk = createDiskResource(
      "200", "*", None(), None(), createDiskSourceRaw(None(), "profile"));

  Owned<MockResourceProvider> resourceProvider(
      new MockResourceProvider(
          resourceProviderInfo,
          Resources(disk)));

  // We override the mock resource provider's default action, so the operation
  // will stay in `OPERATION_PENDING`.
  Future<resource_provider::Event::ApplyOperation> applyOperation;
  EXPECT_CALL(*resourceProvider, applyOperation(_))
    .WillOnce(FutureArg<0>(&applyOperation));

  Owned<EndpointDetector> endpointDetector(
      mesos::internal::tests::resource_provider::createEndpointDetector(
          slave.get()->pid));

  updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _);

  // NOTE: We need to resume the clock so that the resource provider can
  // fully register.
  Clock::resume();

  ContentType contentType = GetParam();

  resourceProvider->start(endpointDetector, contentType);

  // Wait until the agent's resources have been updated to include the
  // resource provider resources.
  AWAIT_READY(updateSlaveMessage);
  ASSERT_TRUE(updateSlaveMessage->has_resource_providers());
  ASSERT_EQ(1, updateSlaveMessage->resource_providers().providers_size());

  Clock::pause();

  // Start a v1 framework.
  auto scheduler = std::make_shared<MockHTTPScheduler>();

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, DEFAULT_TEST_ROLE);

  EXPECT_CALL(*scheduler, connected(_))
    .WillOnce(scheduler::SendSubscribe(frameworkInfo));

  Future<scheduler::Event::Subscribed> subscribed;
  EXPECT_CALL(*scheduler, subscribed(_, _))
    .WillOnce(FutureArg<1>(&subscribed));

  // Ignore heartbeats.
  EXPECT_CALL(*scheduler, heartbeat(_))
    .WillRepeatedly(Return());

  // Decline offers that do not contain wanted resources.
  EXPECT_CALL(*scheduler, offers(_, _))
    .WillRepeatedly(scheduler::DeclineOffers());

  Future<scheduler::Event::Offers> offers;

  auto isRaw = [](const Resource& r) {
    return r.has_disk() &&
      r.disk().has_source() &&
      r.disk().source().type() == Resource::DiskInfo::Source::RAW;
  };

  EXPECT_CALL(*scheduler, offers(_, scheduler::OffersHaveAnyResource(
      std::bind(isRaw, lambda::_1))))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(scheduler::DeclineOffers()); // Decline successive offers.

  scheduler::TestMesos mesos(
      master.get()->pid, contentType, scheduler, detector);

  AWAIT_READY(subscribed);
  FrameworkID frameworkId(subscribed->framework_id());

  // NOTE: If the framework has not declined an unwanted offer yet when
  // the master updates the agent with the RAW disk resource, the new
  // allocation triggered by this update won't generate an allocatable
  // offer due to no CPU and memory resources. So here we first settle
  // the clock to ensure that the unwanted offer has been declined, then
  // advance the clock to trigger another allocation.
  Clock::settle();
  Clock::advance(masterFlags.allocation_interval);

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->offers().empty());

  const Offer& offer = offers->offers(0);
  const AgentID& agentId = offer.agent_id();

  Option<Resource> source;
  Option<ResourceProviderID> resourceProviderId;
  foreach (const Resource& resource, offer.resources()) {
    if (isRaw(resource)) {
      source = resource;

      ASSERT_TRUE(resource.has_provider_id());
      resourceProviderId = resource.provider_id();

      break;
    }
  }

  ASSERT_SOME(source);
  ASSERT_SOME(resourceProviderId);

  OperationID operationId;
  operationId.set_value("operation");

  mesos.send(createCallAccept(
      frameworkId,
      offer,
      {CREATE_DISK(
           source.get(),
           Resource::DiskInfo::Source::MOUNT,
           None(),
           operationId)}));

  AWAIT_READY(applyOperation);

  // Simulate master failover.
  EXPECT_CALL(*scheduler, disconnected(_));

  detector->appoint(None());

  master->reset();
  master = StartMaster();
  ASSERT_SOME(master);

  // Settle the clock to ensure the master finishes recovering the registry.
  Clock::settle();

  Future<SlaveReregisteredMessage> slaveReregistered = FUTURE_PROTOBUF(
      SlaveReregisteredMessage(), master.get()->pid, slave.get()->pid);

  updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _);

  EXPECT_CALL(*scheduler, connected(_))
    .WillOnce(scheduler::SendSubscribe(frameworkInfo, frameworkId));

  Future<scheduler::Event::Subscribed> frameworkResubscribed;
  EXPECT_CALL(*scheduler, subscribed(_, _))
    .WillOnce(FutureArg<1>(&frameworkResubscribed));

  // Simulate a new master detected event to the agent and the scheduler.
  detector->appoint(master.get()->pid);

  // Advance the clock, so that the agent re-registers.
  Clock::advance(slaveFlags.registration_backoff_factor);

  // Resume the clock to avoid deadlocks related to agent registration.
  // See MESOS-8828.
  Clock::resume();

  // Wait for the framework and agent to re-register.
  AWAIT_READY(slaveReregistered);
  AWAIT_READY(updateSlaveMessage);
  AWAIT_READY(frameworkResubscribed);

  Clock::pause();

  // Test explicit reconciliation
  {
    scheduler::Call::ReconcileOperations::Operation operation;
    operation.mutable_operation_id()->CopyFrom(operationId);
    operation.mutable_agent_id()->CopyFrom(agentId);

    const Future<scheduler::APIResult> result =
      mesos.call({createCallReconcileOperations(frameworkId, {operation})});

    AWAIT_READY(result);

    // The master should respond with '200 OK' and with a `scheduler::Response`.
    ASSERT_EQ(process::http::Status::OK, result->status_code());
    ASSERT_TRUE(result->has_response());

    const scheduler::Response response = result->response();
    ASSERT_EQ(scheduler::Response::RECONCILE_OPERATIONS, response.type());
    ASSERT_TRUE(response.has_reconcile_operations());

    const scheduler::Response::ReconcileOperations& reconcile =
      response.reconcile_operations();
    ASSERT_EQ(1, reconcile.operation_statuses_size());

    const OperationStatus& operationStatus = reconcile.operation_statuses(0);
    EXPECT_EQ(operationId, operationStatus.operation_id());
    EXPECT_EQ(OPERATION_PENDING, operationStatus.state());
    EXPECT_FALSE(operationStatus.has_uuid());
  }

  // Test implicit reconciliation
  {
    const Future<scheduler::APIResult> result =
      mesos.call({createCallReconcileOperations(frameworkId, {})});

    AWAIT_READY(result);

    // The master should respond with '200 OK' and with a `scheduler::Response`.
    ASSERT_EQ(process::http::Status::OK, result->status_code());
    ASSERT_TRUE(result->has_response());

    const scheduler::Response response = result->response();
    ASSERT_EQ(scheduler::Response::RECONCILE_OPERATIONS, response.type());
    ASSERT_TRUE(response.has_reconcile_operations());

    const scheduler::Response::ReconcileOperations& reconcile =
      response.reconcile_operations();
    ASSERT_EQ(1, reconcile.operation_statuses_size());

    const OperationStatus& operationStatus = reconcile.operation_statuses(0);
    EXPECT_EQ(operationId, operationStatus.operation_id());
    EXPECT_EQ(OPERATION_PENDING, operationStatus.state());
    EXPECT_FALSE(operationStatus.has_uuid());
  }
}
// This test verifies that the framework can launch a command task
// that specifies both container image and persistent volumes.
TEST_F(LinuxFilesystemIsolatorMesosTest,
       ROOT_ChangeRootFilesystemCommandExecutorPersistentVolume)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  string registry = path::join(sandbox.get(), "registry");
  AWAIT_READY(DockerArchive::create(registry, "test_image"));

  slave::Flags flags = CreateSlaveFlags();
  flags.resources = "cpus:2;mem:1024;disk(role1):1024";
  flags.isolation = "filesystem/linux,docker/runtime";
  flags.docker_registry = registry;
  flags.docker_store_dir = path::join(sandbox.get(), "store");
  flags.image_providers = "docker";

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, "role1");

  MesosSchedulerDriver driver(
      &sched,
      frameworkInfo,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(frameworkId);

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  Offer offer = offers.get()[0];

  string dir1 = path::join(sandbox.get(), "dir1");
  ASSERT_SOME(os::mkdir(dir1));

  Resource persistentVolume = createPersistentVolume(
      Megabytes(64),
      "role1",
      "id1",
      "path1",
      None(),
      None(),
      frameworkInfo.principal());

  // We use the filter explicitly here so that the resources will not
  // be filtered for 5 seconds (the default).
  Filters filters;
  filters.set_refuse_seconds(0);

  TaskInfo task = createTask(
      offer.slave_id(),
      Resources::parse("cpus:1;mem:512").get() + persistentVolume,
      "echo abc > path1/file");

  task.mutable_container()->CopyFrom(createContainerInfo(
      "test_image",
      {createVolumeHostPath("/tmp", dir1, Volume::RW)}));

  // Create the persistent volumes and launch task via `acceptOffers`.
  driver.acceptOffers(
      {offer.id()},
      {CREATE(persistentVolume), LAUNCH({task})},
      filters);

  Future<TaskStatus> statusStarting;
  Future<TaskStatus> statusRunning;
  Future<TaskStatus> statusFinished;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusStarting))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillOnce(FutureArg<1>(&statusFinished));

  AWAIT_READY(statusStarting);
  EXPECT_EQ(TASK_STARTING, statusStarting->state());

  AWAIT_READY(statusRunning);
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  AWAIT_READY(statusFinished);
  EXPECT_EQ(TASK_FINISHED, statusFinished->state());

  // NOTE: The command executor's id is the same as the task id.
  ExecutorID executorId;
  executorId.set_value(task.task_id().value());

  string directory = slave::paths::getExecutorLatestRunPath(
      flags.work_dir,
      offer.slave_id(),
      frameworkId.get(),
      executorId);

  EXPECT_FALSE(os::exists(path::join(directory, "path1")));

  string volumePath = slave::paths::getPersistentVolumePath(
      flags.work_dir,
      "role1",
      "id1");

  EXPECT_SOME_EQ("abc\n", os::read(path::join(volumePath, "file")));

  driver.stop();
  driver.join();
}
// Tests that the task fails when it attempts to write to a persistent volume
// mounted as read-only. Note that although we use a shared persistent volume,
// the behavior is the same for non-shared persistent volumes.
TEST_F(LinuxFilesystemIsolatorMesosTest,
       ROOT_WriteAccessSharedPersistentVolumeReadOnlyMode)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  string registry = path::join(sandbox.get(), "registry");
  AWAIT_READY(DockerArchive::create(registry, "test_image"));

  slave::Flags flags = CreateSlaveFlags();
  flags.resources = "cpus:2;mem:128;disk(role1):128";
  flags.isolation = "filesystem/linux,docker/runtime";
  flags.docker_registry = registry;
  flags.docker_store_dir = path::join(sandbox.get(), "store");
  flags.image_providers = "docker";

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, "role1");
  frameworkInfo.add_capabilities()->set_type(
      FrameworkInfo::Capability::SHARED_RESOURCES);

  MesosSchedulerDriver driver(
      &sched,
      frameworkInfo,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  // We create a shared volume which shall be used by the task to
  // write to that volume.
  Resource volume = createPersistentVolume(
      Megabytes(4),
      "role1",
      "id1",
      "volume_path",
      None(),
      None(),
      frameworkInfo.principal(),
      true); // Shared volume.

  // The task uses the shared volume as read-only.
  Resource roVolume = volume;
  roVolume.mutable_disk()->mutable_volume()->set_mode(Volume::RO);

  Resources taskResources =
    Resources::parse("cpus:1;mem:64;disk(role1):1").get() + roVolume;

  TaskInfo task = createTask(
      offers.get()[0].slave_id(),
      taskResources,
      "echo hello > volume_path/file");

  // The task fails to write to the volume since the task's resources
  // intends to use the volume as read-only.
  Future<TaskStatus> statusStarting;
  Future<TaskStatus> statusRunning;
  Future<TaskStatus> statusFailed;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusStarting))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillOnce(FutureArg<1>(&statusFailed));

  driver.acceptOffers(
      {offers.get()[0].id()},
      {CREATE(volume),
       LAUNCH({task})});

  AWAIT_READY(statusStarting);
  EXPECT_EQ(task.task_id(), statusStarting->task_id());
  EXPECT_EQ(TASK_STARTING, statusStarting->state());

  AWAIT_READY(statusRunning);
  EXPECT_EQ(task.task_id(), statusRunning->task_id());
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  AWAIT_READY(statusFailed);
  EXPECT_EQ(task.task_id(), statusFailed->task_id());
  EXPECT_EQ(TASK_FAILED, statusFailed->state());

  driver.stop();
  driver.join();
}
// This test verifies that the volume usage accounting for sandboxes
// with bind-mounted volumes (while linux filesystem isolator is used)
// works correctly by creating a file within the volume the size of
// which exceeds the sandbox quota.
TEST_F(LinuxFilesystemIsolatorMesosTest,
       ROOT_VolumeUsageExceedsSandboxQuota)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  string registry = path::join(sandbox.get(), "registry");
  AWAIT_READY(DockerArchive::create(registry, "test_image"));

  slave::Flags flags = CreateSlaveFlags();
  flags.resources = "cpus:2;mem:128;disk(role1):128";
  flags.isolation = "disk/du,filesystem/linux,docker/runtime";
  flags.docker_registry = registry;
  flags.docker_store_dir = path::join(sandbox.get(), "store");
  flags.image_providers = "docker";

  // NOTE: We can't pause the clock because we need the reaper to reap
  // the 'du' subprocess.
  flags.container_disk_watch_interval = Milliseconds(1);
  flags.enforce_container_disk_quota = true;

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
  ASSERT_SOME(slave);

  MockScheduler sched;
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, "role1");

  MesosSchedulerDriver driver(
      &sched,
      frameworkInfo,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->empty());

  // We request a sandbox (1MB) that is smaller than the persistent
  // volume (4MB) and attempt to create a file in that volume that is
  // twice the size of the sanbox (2MB).
  Resources volume = createPersistentVolume(
      Megabytes(4),
      "role1",
      "id1",
      "volume_path",
      None(),
      None(),
      frameworkInfo.principal());

  Resources taskResources =
      Resources::parse("cpus:1;mem:64;disk(role1):1").get() + volume;

  // We sleep to give quota enforcement (du) a chance to kick in.
  TaskInfo task = createTask(
      offers.get()[0].slave_id(),
      taskResources,
      "dd if=/dev/zero of=volume_path/file bs=1048576 count=2 && sleep 1");

  Future<TaskStatus> statusStarting;
  Future<TaskStatus> statusRunning;
  Future<TaskStatus> statusFinished;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&statusStarting))
    .WillOnce(FutureArg<1>(&statusRunning))
    .WillOnce(FutureArg<1>(&statusFinished));

  driver.acceptOffers(
      {offers.get()[0].id()},
      {CREATE(volume),
      LAUNCH({task})});

  AWAIT_READY(statusStarting);
  EXPECT_EQ(task.task_id(), statusStarting->task_id());
  EXPECT_EQ(TASK_STARTING, statusStarting->state());

  AWAIT_READY(statusRunning);
  EXPECT_EQ(task.task_id(), statusRunning->task_id());
  EXPECT_EQ(TASK_RUNNING, statusRunning->state());

  AWAIT_READY(statusFinished);
  EXPECT_EQ(task.task_id(), statusFinished->task_id());
  EXPECT_EQ(TASK_FINISHED, statusFinished->state());

  driver.stop();
  driver.join();
}
Esempio n. 6
0
// This test verifies that authorization based endpoint filtering
// works correctly on the /state endpoint.
// Both default users are allowed to view high level frameworks, but only
// one is allowed to view the tasks.
// After launching a single task per each framework, one for role "superhero"
// and the other for role "muggle", this test verifies that each of two
// default users can view resource allocations and resource reservations for
// corresponding allowed roles only.
TYPED_TEST(SlaveAuthorizerTest, FilterStateEndpoint)
{
  ACLs acls;

  const string roleSuperhero = "superhero";
  const string roleMuggle = "muggle";

  {
    // Default principal can see all frameworks.
    mesos::ACL::ViewFramework* acl = acls.add_view_frameworks();
    acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal());
    acl->mutable_users()->set_type(ACL::Entity::ANY);
  }

  {
    // Second default principal can see all frameworks.
    mesos::ACL::ViewFramework* acl = acls.add_view_frameworks();
    acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL_2.principal());
    acl->mutable_users()->set_type(ACL::Entity::ANY);
  }

  {
    // No other principal can see frameworks running under any user.
    ACL::ViewFramework* acl = acls.add_view_frameworks();
    acl->mutable_principals()->set_type(ACL::Entity::ANY);
    acl->mutable_users()->set_type(ACL::Entity::NONE);
  }

  {
    // Default principal can see all executors.
    mesos::ACL::ViewExecutor* acl = acls.add_view_executors();
    acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal());
    acl->mutable_users()->set_type(ACL::Entity::ANY);
  }

  {
    // No other principal can see executors running under any user.
    ACL::ViewExecutor* acl = acls.add_view_executors();
    acl->mutable_principals()->set_type(ACL::Entity::ANY);
    acl->mutable_users()->set_type(ACL::Entity::NONE);
  }

  {
    // Default principal can see all tasks.
    mesos::ACL::ViewTask* acl = acls.add_view_tasks();
    acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal());
    acl->mutable_users()->set_type(ACL::Entity::ANY);
  }

  {
    // No other principal can see tasks running under any user.
    ACL::ViewTask* acl = acls.add_view_tasks();
    acl->mutable_principals()->set_type(ACL::Entity::ANY);
    acl->mutable_users()->set_type(ACL::Entity::NONE);
  }

  {
    // Default principal can view "superhero" role only.
    ACL::ViewRole* acl = acls.add_view_roles();
    acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal());
    acl->mutable_roles()->add_values(roleSuperhero);

    acl = acls.add_view_roles();
    acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL.principal());
    acl->mutable_roles()->set_type(mesos::ACL::Entity::NONE);
  }

  {
    // Second default principal can view "muggle" role only.
    ACL::ViewRole* acl = acls.add_view_roles();
    acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL_2.principal());
    acl->mutable_roles()->add_values(roleMuggle);

    acl = acls.add_view_roles();
    acl->mutable_principals()->add_values(DEFAULT_CREDENTIAL_2.principal());
    acl->mutable_roles()->set_type(mesos::ACL::Entity::NONE);
  }

  // Create an `Authorizer` with the ACLs.
  Try<Authorizer*> create = TypeParam::create(parameterize(acls));
  ASSERT_SOME(create);
  Owned<Authorizer> authorizer(create.get());

  Try<Owned<cluster::Master>> master = this->StartMaster(authorizer.get());
  ASSERT_SOME(master);

  // Register framework with user "bar" and role "superhero".
  FrameworkInfo frameworkSuperhero = DEFAULT_FRAMEWORK_INFO;
  frameworkSuperhero.set_name("framework-" + roleSuperhero);
  frameworkSuperhero.set_roles(0, roleSuperhero);
  frameworkSuperhero.set_user("bar");

  // Create an executor with user "bar".
  ExecutorInfo executorSuperhero =
    createExecutorInfo("test-executor-" + roleSuperhero, "sleep 2");
  executorSuperhero.mutable_command()->set_user("bar");
  MockExecutor execSuperhero(executorSuperhero.executor_id());

  // Register framework with user "foo" and role "muggle".
  FrameworkInfo frameworkMuggle = DEFAULT_FRAMEWORK_INFO;
  frameworkMuggle.set_name("framework-" + roleMuggle);
  frameworkMuggle.set_principal(DEFAULT_CREDENTIAL_2.principal());
  frameworkMuggle.set_roles(0, roleMuggle);
  frameworkMuggle.set_user("foo");

  // Create an executor with user "foo".
  ExecutorInfo executorMuggle =
    createExecutorInfo("test-executor-" + roleMuggle, "sleep 2");
  executorMuggle.mutable_command()->set_user("foo");
  MockExecutor execMuggle(executorMuggle.executor_id());

  TestContainerizer containerizer(
      {{executorSuperhero.executor_id(), &execSuperhero},
       {executorMuggle.executor_id(), &execMuggle}});

  slave::Flags flags = this->CreateSlaveFlags();
  // Statically reserve resources for each role.
  flags.resources = "cpus(" + roleSuperhero + "):2;" + "cpus(" + roleMuggle +
    "):3;mem(" + roleSuperhero + "):512;" + "mem(" + roleMuggle + "):1024;";

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = this->StartSlave(
      detector.get(), &containerizer, authorizer.get(), flags);

  ASSERT_SOME(slave);

  MockScheduler schedSuperhero;
  MesosSchedulerDriver driverSuperhero(
      &schedSuperhero,
      frameworkSuperhero,
      master.get()->pid,
      DEFAULT_CREDENTIAL);

  EXPECT_CALL(execSuperhero, registered(_, _, _, _))
    .Times(AtMost(1));

  Future<FrameworkID> frameworkIdSuperhero;
  EXPECT_CALL(schedSuperhero, registered(&driverSuperhero, _, _))
    .WillOnce(FutureArg<1>(&frameworkIdSuperhero));

  Future<vector<Offer>> offersSuperhero;
  EXPECT_CALL(schedSuperhero, resourceOffers(&driverSuperhero, _))
    .WillOnce(FutureArg<1>(&offersSuperhero))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driverSuperhero.start();

  AWAIT_READY(frameworkIdSuperhero);

  AWAIT_READY(offersSuperhero);
  ASSERT_FALSE(offersSuperhero->empty());

  // Define a task which will run on executorSuperhero of frameworkSuperhero.
  TaskInfo taskSuperhero;
  taskSuperhero.set_name("test-" + roleSuperhero);
  taskSuperhero.mutable_task_id()->set_value("1");
  taskSuperhero.mutable_slave_id()->MergeFrom(
      offersSuperhero.get()[0].slave_id());
  taskSuperhero.mutable_resources()->MergeFrom(
      offersSuperhero.get()[0].resources());
  taskSuperhero.mutable_executor()->MergeFrom(executorSuperhero);

  EXPECT_CALL(execSuperhero, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING))
    .WillRepeatedly(Return());

  Future<TaskStatus> statusSuperhero;
  EXPECT_CALL(schedSuperhero, statusUpdate(&driverSuperhero, _))
    .WillOnce(FutureArg<1>(&statusSuperhero));

  driverSuperhero.launchTasks(offersSuperhero.get()[0].id(), {taskSuperhero});

  AWAIT_READY(statusSuperhero);
  EXPECT_EQ(TASK_RUNNING, statusSuperhero->state());

  MockScheduler schedMuggle;
  MesosSchedulerDriver driverMuggle(
      &schedMuggle,
      frameworkMuggle,
      master.get()->pid,
      DEFAULT_CREDENTIAL_2);

  EXPECT_CALL(execMuggle, registered(_, _, _, _))
    .Times(AtMost(1));

  Future<FrameworkID> frameworkIdMuggle;
  EXPECT_CALL(schedMuggle, registered(&driverMuggle, _, _))
    .WillOnce(FutureArg<1>(&frameworkIdMuggle));

  Future<vector<Offer>> offersMuggle;
  EXPECT_CALL(schedMuggle, resourceOffers(&driverMuggle, _))
    .WillOnce(FutureArg<1>(&offersMuggle))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driverMuggle.start();

  AWAIT_READY(frameworkIdMuggle);

  AWAIT_READY(offersMuggle);
  ASSERT_FALSE(offersMuggle->empty());

  // Define a task which will run on executorMuggle of frameworkMuggle.
  TaskInfo taskMuggle;
  taskMuggle.set_name("test-" + roleMuggle);
  taskMuggle.mutable_task_id()->set_value("2");
  taskMuggle.mutable_slave_id()->MergeFrom(
      offersMuggle.get()[0].slave_id());
  taskMuggle.mutable_resources()->MergeFrom(
      offersMuggle.get()[0].resources());
  taskMuggle.mutable_executor()->MergeFrom(executorMuggle);

  EXPECT_CALL(execMuggle, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING))
    .WillRepeatedly(Return());

  Future<TaskStatus> statusMuggle;
  EXPECT_CALL(schedMuggle, statusUpdate(&driverMuggle, _))
    .WillOnce(FutureArg<1>(&statusMuggle));

  driverMuggle.launchTasks(offersMuggle.get()[0].id(), {taskMuggle});

  AWAIT_READY(statusMuggle);
  ASSERT_EQ(TASK_RUNNING, statusMuggle->state());

  // Retrieve endpoint with the user allowed to view the frameworks.
  // The default user allowed to view role "superhero" only.
  {
    Future<Response> response = http::get(
        slave.get()->pid,
        "state",
        None(),
        createBasicAuthHeaders(DEFAULT_CREDENTIAL));

    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);

    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
    ASSERT_SOME(parse);

    JSON::Object state = parse.get();

    ASSERT_TRUE(state.values["frameworks"].is<JSON::Array>());

    JSON::Array frameworks = state.values["frameworks"].as<JSON::Array>();
    EXPECT_EQ(2u, frameworks.values.size());

    foreach (const JSON::Value& value, frameworks.values) {
      JSON::Object framework = value.as<JSON::Object>();
      EXPECT_FALSE(framework.values.empty());
      ASSERT_TRUE(framework.values["executors"].is<JSON::Array>());

      JSON::Array executors = framework.values["executors"].as<JSON::Array>();
      EXPECT_EQ(1u, executors.values.size());

      JSON::Object executor = executors.values.front().as<JSON::Object>();
      EXPECT_EQ(1u, executor.values["tasks"].as<JSON::Array>().values.size());
    }

    ASSERT_TRUE(state.values["reserved_resources"].is<JSON::Object>());

    JSON::Object reserved_resources =
      state.values["reserved_resources"].as<JSON::Object>();
    EXPECT_TRUE(reserved_resources.values[roleSuperhero].is<JSON::Object>());
    EXPECT_FALSE(reserved_resources.values[roleMuggle].is<JSON::Object>());

    ASSERT_TRUE(
        state.values["reserved_resources_allocated"].is<JSON::Object>());

    JSON::Object reserved_resources_allocated =
      state.values["reserved_resources_allocated"].as<JSON::Object>();
    EXPECT_TRUE(
        reserved_resources_allocated.values[roleSuperhero].is<JSON::Object>());
    EXPECT_FALSE(
        reserved_resources_allocated.values[roleMuggle].is<JSON::Object>());

    ASSERT_TRUE(state.values["reserved_resources_full"].is<JSON::Object>());

    JSON::Object reserved_resources_full =
      state.values["reserved_resources_full"].as<JSON::Object>();
    EXPECT_TRUE(
        reserved_resources_full.values[roleSuperhero].is<JSON::Array>());
    EXPECT_FALSE(
        reserved_resources_full.values[roleMuggle].is<JSON::Array>());
  }

  // Retrieve endpoint with the user allowed to view the frameworks,
  // but not the executors.
  // The second default user allowed to view role "muggle" only.
  {
    Future<Response> response = http::get(
        slave.get()->pid,
        "state",
        None(),
        createBasicAuthHeaders(DEFAULT_CREDENTIAL_2));

    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);

    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
    ASSERT_SOME(parse);

    JSON::Object state = parse.get();
    ASSERT_TRUE(state.values["frameworks"].is<JSON::Array>());

    JSON::Array frameworks = state.values["frameworks"].as<JSON::Array>();
    EXPECT_EQ(2u, frameworks.values.size());

    foreach (const JSON::Value& value, frameworks.values) {
      JSON::Object framework = value.as<JSON::Object>();
      EXPECT_FALSE(framework.values.empty());
      EXPECT_TRUE(
          framework.values["executors"].as<JSON::Array>().values.empty());
    }

    ASSERT_TRUE(state.values["reserved_resources"].is<JSON::Object>());

    JSON::Object reserved_resources =
      state.values["reserved_resources"].as<JSON::Object>();
    EXPECT_TRUE(reserved_resources.values[roleMuggle].is<JSON::Object>());
    EXPECT_FALSE(reserved_resources.values[roleSuperhero].is<JSON::Object>());

    ASSERT_TRUE(
        state.values["reserved_resources_allocated"].is<JSON::Object>());

    JSON::Object reserved_resources_allocated =
      state.values["reserved_resources_allocated"].as<JSON::Object>();
    EXPECT_TRUE(
        reserved_resources_allocated.values[roleMuggle].is<JSON::Object>());
    EXPECT_FALSE(
        reserved_resources_allocated.values[roleSuperhero].is<JSON::Object>());

    ASSERT_TRUE(state.values["reserved_resources_full"].is<JSON::Object>());

    JSON::Object reserved_resources_full =
      state.values["reserved_resources_full"].as<JSON::Object>();
    EXPECT_TRUE(
        reserved_resources_full.values[roleMuggle].is<JSON::Array>());
    EXPECT_FALSE(
        reserved_resources_full.values[roleSuperhero].is<JSON::Array>());
  }

  EXPECT_CALL(execSuperhero, shutdown(_))
    .Times(AtMost(1));

  EXPECT_CALL(execMuggle, shutdown(_))
    .Times(AtMost(1));

  driverSuperhero.stop();
  driverSuperhero.join();

  driverMuggle.stop();
  driverMuggle.join();
}
// This test verifies that the master reconciles operations that are missing
// from a reregistering slave. In this case, we drop the ApplyOperationMessage
// and expect the master to send a ReconcileOperationsMessage after the slave
// reregisters.
TEST_F(MasterSlaveReconciliationTest, ReconcileDroppedOperation)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  StandaloneMasterDetector detector(master.get()->pid);

  Future<UpdateSlaveMessage> updateSlaveMessage =
    FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _);

  Try<Owned<cluster::Slave>> slave = StartSlave(&detector);
  ASSERT_SOME(slave);

  // Since any out-of-sync operation state in `UpdateSlaveMessage` triggers a
  // reconciliation, await the message from the initial agent registration
  // sequence beforce continuing. Otherwise we risk the master reconciling with
  // the agent before we fail over the master.
  AWAIT_READY(updateSlaveMessage);

  // Register the framework in a non-`*` role so it can reserve resources.
  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, DEFAULT_TEST_ROLE);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);

  // We prevent the operation from reaching the agent.
  Future<ApplyOperationMessage> applyOperationMessage =
    DROP_PROTOBUF(ApplyOperationMessage(), _, _);

  // Perform a reserve operation on the offered resources.
  // This will trigger an `ApplyOperationMessage`.
  ASSERT_FALSE(offers->empty());
  const Offer& offer = offers->at(0);

  Resources reservedResources = offer.resources();
  reservedResources =
    reservedResources.pushReservation(createDynamicReservationInfo(
        frameworkInfo.roles(0), frameworkInfo.principal()));

  driver.acceptOffers({offer.id()}, {RESERVE(reservedResources)});

  AWAIT_READY(applyOperationMessage);

  // We expect the master to detect the missing operation when the
  // slave reregisters and to reconcile the operations on that slave.
  Future<ReconcileOperationsMessage> reconcileOperationsMessage =
    FUTURE_PROTOBUF(ReconcileOperationsMessage(), _, _);

  // Simulate a master failover to trigger slave reregistration.
  detector.appoint(master.get()->pid);

  AWAIT_READY(reconcileOperationsMessage);

  ASSERT_EQ(1, reconcileOperationsMessage->operations_size());
  EXPECT_EQ(
      applyOperationMessage->operation_uuid(),
      reconcileOperationsMessage->operations(0).operation_uuid());
}