Ejemplo n.º 1
0
// This test checks that a scheduler gets a slave lost
// message for a partitioned slave.
TEST_F(PartitionTest, PartitionedSlave)
{
  master::Flags masterFlags = CreateMasterFlags();
  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
  ASSERT_SOME(master);

  // Set these expectations up before we spawn the slave so that we
  // don't miss the first PING.
  Future<Message> ping = FUTURE_MESSAGE(
      Eq(PingSlaveMessage().GetTypeName()), _, _);

  // Drop all the PONGs to simulate slave partition.
  DROP_PROTOBUFS(PongSlaveMessage(), _, _);

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<Nothing> resourceOffers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureSatisfy(&resourceOffers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  // Need to make sure the framework AND slave have registered with
  // master. Waiting for resource offers should accomplish both.
  AWAIT_READY(resourceOffers);

  Clock::pause();

  EXPECT_CALL(sched, offerRescinded(&driver, _))
    .Times(AtMost(1));

  Future<Nothing> slaveLost;
  EXPECT_CALL(sched, slaveLost(&driver, _))
    .WillOnce(FutureSatisfy(&slaveLost));

  // Now advance through the PINGs.
  size_t pings = 0;
  while (true) {
    AWAIT_READY(ping);
    pings++;
    if (pings == masterFlags.max_slave_ping_timeouts) {
     break;
    }
    ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
    Clock::advance(masterFlags.slave_ping_timeout);
  }

  Clock::advance(masterFlags.slave_ping_timeout);

  AWAIT_READY(slaveLost);

  slave.get()->terminate();
  slave->reset();

  JSON::Object stats = Metrics();
  EXPECT_EQ(1, stats.values["master/slave_removals"]);
  EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]);

  driver.stop();
  driver.join();

  Clock::resume();
}
Ejemplo n.º 2
0
// Ensures that the driver can handle the SUBSCRIBED event
// after a master failover.
TEST_F(SchedulerDriverEventTest, SubscribedMasterFailover)
{
    Try<Owned<cluster::Master>> master = StartMaster();
    ASSERT_SOME(master);

    FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
    frameworkInfo.set_failover_timeout(Weeks(2).secs());

    // Make sure the initial registration calls 'registered'.
    MockScheduler sched;
    StandaloneMasterDetector detector(master.get()->pid);
    TestingMesosSchedulerDriver driver(&sched, &detector, frameworkInfo);

    // Intercept the registration message, send a SUBSCRIBED instead.
    Future<Message> frameworkRegisteredMessage =
        DROP_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

    // Ensure that there will be no (re-)registration retries
    // from the scheduler driver.
    Clock::pause();

    driver.start();

    AWAIT_READY(frameworkRegisteredMessage);
    UPID frameworkPid = frameworkRegisteredMessage.get().to;

    FrameworkRegisteredMessage message;
    ASSERT_TRUE(message.ParseFromString(frameworkRegisteredMessage.get().body));

    FrameworkID frameworkId = message.framework_id();
    frameworkInfo.mutable_id()->CopyFrom(frameworkId);

    Event event;
    event.set_type(Event::SUBSCRIBED);
    event.mutable_subscribed()->mutable_framework_id()->CopyFrom(frameworkId);

    Future<Nothing> registered;
    EXPECT_CALL(sched, registered(&driver, frameworkId, _))
    .WillOnce(FutureSatisfy(&registered));

    process::post(master.get()->pid, frameworkPid, event);

    AWAIT_READY(registered);

    EXPECT_CALL(sched, disconnected(&driver));

    // Fail over the master and expect a 'reregistered' call.
    // Note that the master sends a registered message for
    // this case (see MESOS-786).
    master->reset();
    master = StartMaster();
    ASSERT_SOME(master);

    frameworkRegisteredMessage =
        DROP_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

    detector.appoint(master.get()->pid);

    AWAIT_READY(frameworkRegisteredMessage);

    Future<Nothing> reregistered;
    EXPECT_CALL(sched, reregistered(&driver, _))
    .WillOnce(FutureSatisfy(&reregistered));

    process::post(master.get()->pid, frameworkPid, event);

    AWAIT_READY(reregistered);

    driver.stop();
    driver.join();
}
Ejemplo n.º 3
0
// This test verifies that the provisioner can recover the rootfses
// for the child containers if there is no image specified for its
// parent container.
TEST_F(ProvisionerAppcTest, RecoverNestedContainerNoParentImage)
{
  slave::Flags flags;
  flags.image_providers = "APPC";
  flags.appc_store_dir = path::join(os::getcwd(), "store");
  flags.image_provisioner_backend = COPY_BACKEND;
  flags.work_dir = path::join(sandbox.get(), "work_dir");

  Try<Owned<Provisioner>> provisioner = Provisioner::create(flags);
  ASSERT_SOME(provisioner);

  Try<string> createImage = createTestImage(
      flags.appc_store_dir,
      getManifest());

  ASSERT_SOME(createImage);

  // Recover. This is when the image in the store is loaded.
  AWAIT_READY(provisioner.get()->recover({}));

  Image image;
  image.mutable_appc()->CopyFrom(getTestImage());

  ContainerID parent;
  ContainerID child;

  parent.set_value(UUID::random().toString());
  child.set_value(UUID::random().toString());
  child.mutable_parent()->CopyFrom(parent);

  AWAIT_READY(provisioner.get()->provision(child, image));

  provisioner->reset();

  // Create a new provisioner to recover the state from the container.
  provisioner = Provisioner::create(flags);
  ASSERT_SOME(provisioner);

  AWAIT_READY(provisioner.get()->recover({parent, child}));
  AWAIT_READY(provisioner.get()->provision(child, image));

  const string provisionerDir = slave::paths::getProvisionerDir(flags.work_dir);

  string containerDir =
    slave::provisioner::paths::getContainerDir(
        provisionerDir,
        child);

  Future<bool> destroy = provisioner.get()->destroy(child);
  AWAIT_READY(destroy);
  EXPECT_TRUE(destroy.get());
  EXPECT_FALSE(os::exists(containerDir));

  containerDir =
    slave::provisioner::paths::getContainerDir(
        provisionerDir,
        parent);

  destroy = provisioner.get()->destroy(parent);
  AWAIT_READY(destroy);
  EXPECT_TRUE(destroy.get());
  EXPECT_FALSE(os::exists(containerDir));
}
// This test verifies that, after a master failover, reconciliation of an
// operation that is still pending on an agent results in `OPERATION_PENDING`.
TEST_P(OperationReconciliationTest, AgentPendingOperationAfterMasterFailover)
{
  Clock::pause();

  mesos::internal::master::Flags masterFlags = CreateMasterFlags();
  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
  ASSERT_SOME(master);

  Future<UpdateSlaveMessage> updateSlaveMessage =
    FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _);

  auto detector = std::make_shared<StandaloneMasterDetector>(master.get()->pid);

  mesos::internal::slave::Flags slaveFlags = CreateSlaveFlags();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
  ASSERT_SOME(slave);

  // Advance the clock to trigger agent registration.
  Clock::advance(slaveFlags.registration_backoff_factor);

  // Wait for the agent to register.
  AWAIT_READY(updateSlaveMessage);

  // Start and register a resource provider.

  ResourceProviderInfo resourceProviderInfo;
  resourceProviderInfo.set_type("org.apache.mesos.rp.test");
  resourceProviderInfo.set_name("test");

  Resource disk = createDiskResource(
      "200", "*", None(), None(), createDiskSourceRaw(None(), "profile"));

  Owned<MockResourceProvider> resourceProvider(
      new MockResourceProvider(
          resourceProviderInfo,
          Resources(disk)));

  // We override the mock resource provider's default action, so the operation
  // will stay in `OPERATION_PENDING`.
  Future<resource_provider::Event::ApplyOperation> applyOperation;
  EXPECT_CALL(*resourceProvider, applyOperation(_))
    .WillOnce(FutureArg<0>(&applyOperation));

  Owned<EndpointDetector> endpointDetector(
      mesos::internal::tests::resource_provider::createEndpointDetector(
          slave.get()->pid));

  updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _);

  // NOTE: We need to resume the clock so that the resource provider can
  // fully register.
  Clock::resume();

  ContentType contentType = GetParam();

  resourceProvider->start(endpointDetector, contentType);

  // Wait until the agent's resources have been updated to include the
  // resource provider resources.
  AWAIT_READY(updateSlaveMessage);
  ASSERT_TRUE(updateSlaveMessage->has_resource_providers());
  ASSERT_EQ(1, updateSlaveMessage->resource_providers().providers_size());

  Clock::pause();

  // Start a v1 framework.
  auto scheduler = std::make_shared<MockHTTPScheduler>();

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_roles(0, DEFAULT_TEST_ROLE);

  EXPECT_CALL(*scheduler, connected(_))
    .WillOnce(scheduler::SendSubscribe(frameworkInfo));

  Future<scheduler::Event::Subscribed> subscribed;
  EXPECT_CALL(*scheduler, subscribed(_, _))
    .WillOnce(FutureArg<1>(&subscribed));

  // Ignore heartbeats.
  EXPECT_CALL(*scheduler, heartbeat(_))
    .WillRepeatedly(Return());

  // Decline offers that do not contain wanted resources.
  EXPECT_CALL(*scheduler, offers(_, _))
    .WillRepeatedly(scheduler::DeclineOffers());

  Future<scheduler::Event::Offers> offers;

  auto isRaw = [](const Resource& r) {
    return r.has_disk() &&
      r.disk().has_source() &&
      r.disk().source().type() == Resource::DiskInfo::Source::RAW;
  };

  EXPECT_CALL(*scheduler, offers(_, scheduler::OffersHaveAnyResource(
      std::bind(isRaw, lambda::_1))))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(scheduler::DeclineOffers()); // Decline successive offers.

  scheduler::TestMesos mesos(
      master.get()->pid, contentType, scheduler, detector);

  AWAIT_READY(subscribed);
  FrameworkID frameworkId(subscribed->framework_id());

  // NOTE: If the framework has not declined an unwanted offer yet when
  // the master updates the agent with the RAW disk resource, the new
  // allocation triggered by this update won't generate an allocatable
  // offer due to no CPU and memory resources. So here we first settle
  // the clock to ensure that the unwanted offer has been declined, then
  // advance the clock to trigger another allocation.
  Clock::settle();
  Clock::advance(masterFlags.allocation_interval);

  AWAIT_READY(offers);
  ASSERT_FALSE(offers->offers().empty());

  const Offer& offer = offers->offers(0);
  const AgentID& agentId = offer.agent_id();

  Option<Resource> source;
  Option<ResourceProviderID> resourceProviderId;
  foreach (const Resource& resource, offer.resources()) {
    if (isRaw(resource)) {
      source = resource;

      ASSERT_TRUE(resource.has_provider_id());
      resourceProviderId = resource.provider_id();

      break;
    }
  }

  ASSERT_SOME(source);
  ASSERT_SOME(resourceProviderId);

  OperationID operationId;
  operationId.set_value("operation");

  mesos.send(createCallAccept(
      frameworkId,
      offer,
      {CREATE_DISK(
           source.get(),
           Resource::DiskInfo::Source::MOUNT,
           None(),
           operationId)}));

  AWAIT_READY(applyOperation);

  // Simulate master failover.
  EXPECT_CALL(*scheduler, disconnected(_));

  detector->appoint(None());

  master->reset();
  master = StartMaster();
  ASSERT_SOME(master);

  // Settle the clock to ensure the master finishes recovering the registry.
  Clock::settle();

  Future<SlaveReregisteredMessage> slaveReregistered = FUTURE_PROTOBUF(
      SlaveReregisteredMessage(), master.get()->pid, slave.get()->pid);

  updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _);

  EXPECT_CALL(*scheduler, connected(_))
    .WillOnce(scheduler::SendSubscribe(frameworkInfo, frameworkId));

  Future<scheduler::Event::Subscribed> frameworkResubscribed;
  EXPECT_CALL(*scheduler, subscribed(_, _))
    .WillOnce(FutureArg<1>(&frameworkResubscribed));

  // Simulate a new master detected event to the agent and the scheduler.
  detector->appoint(master.get()->pid);

  // Advance the clock, so that the agent re-registers.
  Clock::advance(slaveFlags.registration_backoff_factor);

  // Resume the clock to avoid deadlocks related to agent registration.
  // See MESOS-8828.
  Clock::resume();

  // Wait for the framework and agent to re-register.
  AWAIT_READY(slaveReregistered);
  AWAIT_READY(updateSlaveMessage);
  AWAIT_READY(frameworkResubscribed);

  Clock::pause();

  // Test explicit reconciliation
  {
    scheduler::Call::ReconcileOperations::Operation operation;
    operation.mutable_operation_id()->CopyFrom(operationId);
    operation.mutable_agent_id()->CopyFrom(agentId);

    const Future<scheduler::APIResult> result =
      mesos.call({createCallReconcileOperations(frameworkId, {operation})});

    AWAIT_READY(result);

    // The master should respond with '200 OK' and with a `scheduler::Response`.
    ASSERT_EQ(process::http::Status::OK, result->status_code());
    ASSERT_TRUE(result->has_response());

    const scheduler::Response response = result->response();
    ASSERT_EQ(scheduler::Response::RECONCILE_OPERATIONS, response.type());
    ASSERT_TRUE(response.has_reconcile_operations());

    const scheduler::Response::ReconcileOperations& reconcile =
      response.reconcile_operations();
    ASSERT_EQ(1, reconcile.operation_statuses_size());

    const OperationStatus& operationStatus = reconcile.operation_statuses(0);
    EXPECT_EQ(operationId, operationStatus.operation_id());
    EXPECT_EQ(OPERATION_PENDING, operationStatus.state());
    EXPECT_FALSE(operationStatus.has_uuid());
  }

  // Test implicit reconciliation
  {
    const Future<scheduler::APIResult> result =
      mesos.call({createCallReconcileOperations(frameworkId, {})});

    AWAIT_READY(result);

    // The master should respond with '200 OK' and with a `scheduler::Response`.
    ASSERT_EQ(process::http::Status::OK, result->status_code());
    ASSERT_TRUE(result->has_response());

    const scheduler::Response response = result->response();
    ASSERT_EQ(scheduler::Response::RECONCILE_OPERATIONS, response.type());
    ASSERT_TRUE(response.has_reconcile_operations());

    const scheduler::Response::ReconcileOperations& reconcile =
      response.reconcile_operations();
    ASSERT_EQ(1, reconcile.operation_statuses_size());

    const OperationStatus& operationStatus = reconcile.operation_statuses(0);
    EXPECT_EQ(operationId, operationStatus.operation_id());
    EXPECT_EQ(OPERATION_PENDING, operationStatus.state());
    EXPECT_FALSE(operationStatus.has_uuid());
  }
}
Ejemplo n.º 5
0
// This test verifies that a provisioner can recover the rootfs
// provisioned by a previous provisioner and then destroy it. Note
// that we use the copy backend in this test so Linux is not required.
TEST_F(ProvisionerAppcTest, Recover)
{
  // Create provisioner.
  slave::Flags flags;
  flags.image_providers = "APPC";
  flags.appc_store_dir = path::join(os::getcwd(), "store");
  flags.image_provisioner_backend = COPY_BACKEND;
  flags.work_dir = path::join(sandbox.get(), "work_dir");

  Try<Owned<Provisioner>> provisioner = Provisioner::create(flags);
  ASSERT_SOME(provisioner);

  Try<string> createImage = createTestImage(
      flags.appc_store_dir,
      getManifest());

  ASSERT_SOME(createImage);

  // Recover. This is when the image in the store is loaded.
  AWAIT_READY(provisioner.get()->recover({}));

  Image image;
  image.mutable_appc()->CopyFrom(getTestImage());

  ContainerID containerId;
  containerId.set_value(UUID::random().toString());

  Future<slave::ProvisionInfo> provisionInfo =
    provisioner.get()->provision(containerId, image);
  AWAIT_READY(provisionInfo);

  provisioner->reset();

  // Create a new provisioner to recover the state from the container.
  provisioner = Provisioner::create(flags);
  ASSERT_SOME(provisioner);

  AWAIT_READY(provisioner.get()->recover({containerId}));

  // It's possible for the user to provision two different rootfses
  // from the same image.
  AWAIT_READY(provisioner.get()->provision(containerId, image));

  string provisionerDir = slave::paths::getProvisionerDir(flags.work_dir);

  string containerDir =
    slave::provisioner::paths::getContainerDir(
        provisionerDir,
        containerId);

  Try<hashmap<string, hashset<string>>> rootfses =
    slave::provisioner::paths::listContainerRootfses(
        provisionerDir,
        containerId);

  ASSERT_SOME(rootfses);

  // Verify that the rootfs is successfully provisioned.
  ASSERT_TRUE(rootfses->contains(flags.image_provisioner_backend));
  EXPECT_EQ(2u, rootfses->get(flags.image_provisioner_backend)->size());

  Future<bool> destroy = provisioner.get()->destroy(containerId);
  AWAIT_READY(destroy);
  EXPECT_TRUE(destroy.get());

  // The container directory is successfully cleaned up.
  EXPECT_FALSE(os::exists(containerDir));
}