Пример #1
0
TEST_F(ResourceOffersTest, Request)
{
  TestAllocator<master::allocator::HierarchicalDRFAllocator> allocator;

  EXPECT_CALL(allocator, initialize(_, _, _, _))
    .Times(1);

  Try<PID<Master>> master = StartMaster(&allocator);
  ASSERT_SOME(master);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(allocator, addFramework(_, _, _))
    .Times(1);

  Future<Nothing> registered;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureSatisfy(&registered));

  driver.start();

  AWAIT_READY(registered);

  vector<Request> sent;
  Request request;
  request.mutable_slave_id()->set_value("test");
  sent.push_back(request);

  Future<vector<Request>> received;
  EXPECT_CALL(allocator, requestResources(_, _))
    .WillOnce(FutureArg<1>(&received));

  driver.requestResources(sent);

  AWAIT_READY(received);
  EXPECT_EQ(sent.size(), received.get().size());
  EXPECT_NE(0u, received.get().size());
  EXPECT_EQ(request.slave_id(), received.get()[0].slave_id());

  driver.stop();
  driver.join();

  Shutdown();
}
// Ensures that the driver can handle the SUBSCRIBED event.
TEST_F(SchedulerDriverEventTest, Subscribed)
{
  Try<PID<Master>> master = StartMaster();
  ASSERT_SOME(master);

  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_failover_timeout(Weeks(2).secs());

  // Make sure the initial registration calls 'registered'.
  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);

  // Intercept the registration message, send a SUBSCRIBED instead.
  Future<Message> frameworkRegisteredMessage =
    DROP_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

  // Ensure that there will be no (re-)registration retries
  // from the scheduler driver.
  Clock::pause();

  driver.start();

  AWAIT_READY(frameworkRegisteredMessage);
  UPID frameworkPid = frameworkRegisteredMessage.get().to;

  FrameworkRegisteredMessage message;
  ASSERT_TRUE(message.ParseFromString(frameworkRegisteredMessage.get().body));

  FrameworkID frameworkId = message.framework_id();
  frameworkInfo.mutable_id()->CopyFrom(frameworkId);

  Event event;
  event.set_type(Event::SUBSCRIBED);
  event.mutable_subscribed()->mutable_framework_id()->CopyFrom(frameworkId);

  Future<Nothing> registered;
  EXPECT_CALL(sched, registered(&driver, frameworkId, _))
    .WillOnce(FutureSatisfy(&registered));

  process::post(master.get(), frameworkPid, event);

  AWAIT_READY(registered);
}
Пример #3
0
// Ensures that the driver can handle the FAILURE event.
TEST_F(SchedulerDriverEventTest, Failure)
{
  Try<PID<Master>> master = StartMaster();
  ASSERT_SOME(master);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<Message> frameworkRegisteredMessage =
    FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

  driver.start();

  AWAIT_READY(frameworkRegisteredMessage);
  UPID frameworkPid = frameworkRegisteredMessage.get().to;

  // Send a failure for an executor, this should be dropped
  // to match the existing behavior of the scheduler driver.
  SlaveID slaveId;
  slaveId.set_value("S");

  Event event;
  event.set_type(Event::FAILURE);
  event.mutable_failure()->mutable_slave_id()->CopyFrom(slaveId);
  event.mutable_failure()->mutable_executor_id()->set_value("E");

  process::post(master.get(), frameworkPid, event);

  // Now, post a failure for a slave and expect a 'slaveLost'.
  event.mutable_failure()->clear_executor_id();

  Future<Nothing> slaveLost;
  EXPECT_CALL(sched, slaveLost(&driver, slaveId))
    .WillOnce(FutureSatisfy(&slaveLost));

  process::post(master.get(), frameworkPid, event);

  AWAIT_READY(slaveLost);
}
Пример #4
0
// Ensures that the driver can handle the MESSAGE event.
TEST_F(SchedulerDriverEventTest, Message)
{
    Try<Owned<cluster::Master>> master = StartMaster();
    ASSERT_SOME(master);

    MockScheduler sched;
    MesosSchedulerDriver driver(
        &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

    EXPECT_CALL(sched, registered(&driver, _, _));

    Future<Message> frameworkRegisteredMessage =
        FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

    driver.start();

    AWAIT_READY(frameworkRegisteredMessage);
    UPID frameworkPid = frameworkRegisteredMessage.get().to;

    Event event;
    event.set_type(Event::MESSAGE);
    event.mutable_message()->mutable_slave_id()->set_value("S");
    event.mutable_message()->mutable_executor_id()->set_value("E");
    event.mutable_message()->set_data("data");

    Future<Nothing> frameworkMessage;
    EXPECT_CALL(sched, frameworkMessage(
                    &driver,
                    event.message().executor_id(),
                    event.message().slave_id(),
                    event.message().data()))
    .WillOnce(FutureSatisfy(&frameworkMessage));

    process::post(master.get()->pid, frameworkPid, event);

    AWAIT_READY(frameworkMessage);

    driver.stop();
    driver.join();
}
Пример #5
0
TEST_F(MesosSchedulerDriverTest, MetricsEndpoint)
{
  Try<PID<Master>> master = StartMaster();
  ASSERT_SOME(master);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  Future<Nothing> registered;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureSatisfy(&registered));

  ASSERT_EQ(DRIVER_RUNNING, driver.start());

  AWAIT_READY(registered);

  Future<process::http::Response> response =
    process::http::get(MetricsProcess::instance()->self(), "/snapshot");

  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
  AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);

  Try<JSON::Object> parse = JSON::parse<JSON::Object>(response.get().body);

  ASSERT_SOME(parse);

  JSON::Object metrics = parse.get();

  EXPECT_EQ(1u, metrics.values.count("scheduler/event_queue_messages"));
  EXPECT_EQ(1u, metrics.values.count("scheduler/event_queue_dispatches"));

  driver.stop();
  driver.join();

  Shutdown();
}
// Ensures the scheduler driver can handle the UPDATE event.
TEST_F(SchedulerDriverEventTest, Update)
{
  Try<PID<Master>> master = StartMaster();
  ASSERT_SOME(master);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<Message> frameworkRegisteredMessage =
    FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

  driver.start();

  AWAIT_READY(frameworkRegisteredMessage);
  UPID frameworkPid = frameworkRegisteredMessage.get().to;

  FrameworkRegisteredMessage message;
  ASSERT_TRUE(message.ParseFromString(frameworkRegisteredMessage.get().body));

  FrameworkID frameworkId = message.framework_id();

  SlaveID slaveId;
  slaveId.set_value("S");

  TaskID taskId;
  taskId.set_value("T");

  ExecutorID executorId;
  executorId.set_value("E");

  // Generate an update that needs no acknowledgement.
  Event event;
  event.set_type(Event::UPDATE);
  event.mutable_update()->mutable_status()->CopyFrom(
      protobuf::createStatusUpdate(
          frameworkId,
          slaveId,
          taskId,
          TASK_RUNNING,
          TaskStatus::SOURCE_MASTER,
          None(),
          "message",
          None(),
          executorId).status());

  Future<Nothing> statusUpdate;
  Future<Nothing> statusUpdate2;
  EXPECT_CALL(sched, statusUpdate(&driver, event.update().status()))
    .WillOnce(FutureSatisfy(&statusUpdate))
    .WillOnce(FutureSatisfy(&statusUpdate2));

  process::post(master.get(), frameworkPid, event);

  AWAIT_READY(statusUpdate);

  // Generate an update that requires acknowledgement.
  event.mutable_update()->mutable_status()->set_uuid(UUID::random().toBytes());

  Future<mesos::scheduler::Call> acknowledgement = DROP_CALL(
      mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, _);

  process::post(master.get(), frameworkPid, event);

  AWAIT_READY(statusUpdate2);
  AWAIT_READY(acknowledgement);
}
// Ensures that the driver can handle an OFFERS event.
// Note that this includes the ability to bypass the
// master when sending framework messages.
TEST_F(SchedulerDriverEventTest, Offers)
{
  Try<PID<Master>> master = StartMaster();
  ASSERT_SOME(master);

  MockScheduler sched;
  MesosSchedulerDriver schedDriver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&schedDriver, _, _));

  Future<Message> frameworkRegisteredMessage =
    FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

  schedDriver.start();

  AWAIT_READY(frameworkRegisteredMessage);
  UPID frameworkPid = frameworkRegisteredMessage.get().to;

  // Start a slave and capture the offers.
  Future<ResourceOffersMessage> resourceOffersMessage =
    DROP_PROTOBUF(ResourceOffersMessage(), _, _);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);
  Try<PID<Slave>> slave = StartSlave(&exec);
  ASSERT_SOME(slave);

  AWAIT_READY(resourceOffersMessage);

  google::protobuf::RepeatedPtrField<Offer> offers =
    resourceOffersMessage.get().offers();

  ASSERT_EQ(1, offers.size());

  // Ignore future offer messages.
  DROP_PROTOBUFS(ResourceOffersMessage(), _, _);

  // Send the offers event and expect a 'resourceOffers' call.
  Event event;
  event.set_type(Event::OFFERS);
  event.mutable_offers()->mutable_offers()->CopyFrom(offers);

  Future<Nothing> resourceOffers;
  EXPECT_CALL(sched, resourceOffers(&schedDriver, _))
    .WillOnce(FutureSatisfy(&resourceOffers));

  process::post(master.get(), frameworkPid, event);

  AWAIT_READY(resourceOffers);

  // To test that the framework -> executor messages are
  // sent directly to the slave, launch a task and send
  // the executor a message.
  EXPECT_CALL(exec, registered(_, _, _, _));

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&schedDriver, _))
    .WillOnce(FutureArg<1>(&status));

  TaskInfo task = createTask(offers.Get(0), "", DEFAULT_EXECUTOR_ID);

  schedDriver.launchTasks(offers.Get(0).id(), {task});

  AWAIT_READY(status);
  EXPECT_EQ(TASK_RUNNING, status.get().state());

  // This message should skip the master!
  Future<FrameworkToExecutorMessage> frameworkToExecutorMessage =
    FUTURE_PROTOBUF(FrameworkToExecutorMessage(), frameworkPid, slave.get());

  Future<string> data;
  EXPECT_CALL(exec, frameworkMessage(_, _))
    .WillOnce(FutureArg<1>(&data));

  schedDriver.sendFrameworkMessage(
      DEFAULT_EXECUTOR_ID, offers.Get(0).slave_id(), "hello");

  AWAIT_READY(frameworkToExecutorMessage);
  AWAIT_EXPECT_EQ("hello", data);

  EXPECT_CALL(exec, shutdown(_))
    .Times(AtMost(1));

  schedDriver.stop();
  schedDriver.join();

  Shutdown();
}
// This test verifies that a framework attempting to subscribe
// after its failover timeout has elapsed is disallowed.
TEST_F(HttpFaultToleranceTest, SchedulerSubscribeAfterFailoverTimeout)
{
  master::Flags flags = CreateMasterFlags();
  flags.authenticate_frameworks = false;

  v1::FrameworkInfo frameworkInfo = v1::DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_failover_timeout(Weeks(2).secs());

  Try<Owned<cluster::Master>> master = StartMaster(flags);
  ASSERT_SOME(master);

  Future<Nothing> deactivateFramework = FUTURE_DISPATCH(
      _, &master::allocator::MesosAllocatorProcess::deactivateFramework);

  v1::FrameworkID frameworkId;

  ContentType contentType = ContentType::PROTOBUF;

  // Launch the first (i.e., failing) scheduler and wait until it receives
  // a `SUBSCRIBED` event to launch the second (i.e., failover) scheduler.
  {
    auto scheduler = std::make_shared<v1::MockHTTPScheduler>();

    Future<Nothing> connected;
    EXPECT_CALL(*scheduler, connected(_))
      .WillOnce(FutureSatisfy(&connected));

    v1::scheduler::TestMesos schedulerLibrary(
        master.get()->pid,
        contentType,
        scheduler);

    AWAIT_READY(connected);

    Future<Event::Subscribed> subscribed;
    EXPECT_CALL(*scheduler, subscribed(_, _))
      .WillOnce(FutureArg<1>(&subscribed));

    EXPECT_CALL(*scheduler, heartbeat(_))
      .WillRepeatedly(Return()); // Ignore heartbeats.

    {
      Call call;
      call.set_type(Call::SUBSCRIBE);
      Call::Subscribe* subscribe = call.mutable_subscribe();
      subscribe->mutable_framework_info()->CopyFrom(frameworkInfo);

      schedulerLibrary.send(call);
    }

    AWAIT_READY(subscribed);

    frameworkId = subscribed->framework_id();
  }

  // Wait until master schedules the framework for removal.
  AWAIT_READY(deactivateFramework);

  // Simulate framework failover timeout.
  Clock::pause();
  Clock::settle();

  Try<Duration> failoverTimeout =
    Duration::create(frameworkInfo.failover_timeout());

  ASSERT_SOME(failoverTimeout);

  Future<Nothing> frameworkFailoverTimeout =
    FUTURE_DISPATCH(_, &Master::frameworkFailoverTimeout);

  Clock::advance(failoverTimeout.get());
  Clock::resume();

  // Wait until master actually marks the framework as completed.
  AWAIT_READY(frameworkFailoverTimeout);

  // Now launch the second (i.e., failover) scheduler using the
  // framework id recorded from the first scheduler.
  {
    auto scheduler = std::make_shared<v1::MockHTTPScheduler>();

    Future<Nothing> connected;
    EXPECT_CALL(*scheduler, connected(_))
      .WillOnce(FutureSatisfy(&connected))
      .WillRepeatedly(Return()); // Ignore future invocations.

    v1::scheduler::TestMesos schedulerLibrary(
        master.get()->pid,
        contentType,
        scheduler);

    AWAIT_READY(connected);

    // Framework should get `Error` event because the framework with this id
    // is marked as completed.
    Future<Nothing> error;
    EXPECT_CALL(*scheduler, error(_, _))
      .WillOnce(FutureSatisfy(&error));

    EXPECT_CALL(*scheduler, disconnected(_))
      .Times(AtMost(1));

    {
      Call call;
      call.mutable_framework_id()->CopyFrom(frameworkId);
      call.set_type(Call::SUBSCRIBE);

      Call::Subscribe* subscribe = call.mutable_subscribe();
      subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO);
      subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId);

      schedulerLibrary.send(call);
    }

    AWAIT_READY(error);
  }
}
// This test verifies that the master reconciles tasks that are
// missing from a re-registering slave. In this case, we trigger
// a race between the slave re-registration message and the launch
// message. There should be no TASK_LOST.
// This was motivated by MESOS-1696.
TEST_F(MasterSlaveReconciliationTest, ReconcileRace)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);
  TestContainerizer containerizer(&exec);

  StandaloneMasterDetector detector(master.get()->pid);

  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
    FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get()->pid, _);

  Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer);
  ASSERT_SOME(slave);

  AWAIT_READY(slaveRegisteredMessage);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  // Since the agent may have retried registration, we want to
  // ensure that any duplicate registrations are flushed before
  // we appoint the master again. Otherwise, the agent may
  // receive a stale registration message.
  Clock::pause();
  Clock::settle();
  Clock::resume();

  // Trigger a re-registration of the slave and capture the message
  // so that we can spoof a race with a launch task message.
  DROP_PROTOBUFS(ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid);

  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
    DROP_PROTOBUF(
        ReregisterSlaveMessage(),
        slave.get()->pid,
        master.get()->pid);

  detector.appoint(master.get()->pid);

  AWAIT_READY(reregisterSlaveMessage);

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  TaskInfo task;
  task.set_name("test task");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id());
  task.mutable_resources()->MergeFrom(offers.get()[0].resources());
  task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);

  ExecutorDriver* executorDriver;
  EXPECT_CALL(exec, registered(_, _, _, _))
    .WillOnce(SaveArg<0>(&executorDriver));

  // Leave the task in TASK_STAGING.
  Future<Nothing> launchTask;
  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(FutureSatisfy(&launchTask));

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .Times(0);

  driver.launchTasks(offers.get()[0].id(), {task});

  AWAIT_READY(launchTask);

  // Send the stale re-registration message, which does not contain
  // the task we just launched. This will trigger a reconciliation
  // by the master.
  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);

  // Prevent this from being dropped per the DROP_PROTOBUFS above.
  FUTURE_PROTOBUF(
      ReregisterSlaveMessage(),
      slave.get()->pid,
      master.get()->pid);

  process::post(
      slave.get()->pid,
      master.get()->pid,
      reregisterSlaveMessage.get());

  AWAIT_READY(slaveReregisteredMessage);

  // Neither the master nor the slave should send a TASK_LOST
  // as part of the reconciliation. We check this by calling
  // Clock::settle() to flush all pending events.
  Clock::pause();
  Clock::settle();
  Clock::resume();

  // Now send TASK_FINISHED and make sure it's the only message
  // received by the scheduler.
  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status));

  TaskStatus taskStatus;
  taskStatus.mutable_task_id()->CopyFrom(task.task_id());
  taskStatus.set_state(TASK_FINISHED);
  executorDriver->sendStatusUpdate(taskStatus);

  AWAIT_READY(status);
  ASSERT_EQ(TASK_FINISHED, status.get().state());

  EXPECT_CALL(exec, shutdown(_))
    .Times(AtMost(1));

  driver.stop();
  driver.join();
}
Пример #10
0
// This test checks that a scheduler gets a slave lost
// message for a partitioned slave.
TEST_F(PartitionTest, PartitionedSlave)
{
  master::Flags masterFlags = CreateMasterFlags();
  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
  ASSERT_SOME(master);

  // Set these expectations up before we spawn the slave so that we
  // don't miss the first PING.
  Future<Message> ping = FUTURE_MESSAGE(
      Eq(PingSlaveMessage().GetTypeName()), _, _);

  // Drop all the PONGs to simulate slave partition.
  DROP_PROTOBUFS(PongSlaveMessage(), _, _);

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<Nothing> resourceOffers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureSatisfy(&resourceOffers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  // Need to make sure the framework AND slave have registered with
  // master. Waiting for resource offers should accomplish both.
  AWAIT_READY(resourceOffers);

  Clock::pause();

  EXPECT_CALL(sched, offerRescinded(&driver, _))
    .Times(AtMost(1));

  Future<Nothing> slaveLost;
  EXPECT_CALL(sched, slaveLost(&driver, _))
    .WillOnce(FutureSatisfy(&slaveLost));

  // Now advance through the PINGs.
  size_t pings = 0;
  while (true) {
    AWAIT_READY(ping);
    pings++;
    if (pings == masterFlags.max_slave_ping_timeouts) {
     break;
    }
    ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
    Clock::advance(masterFlags.slave_ping_timeout);
  }

  Clock::advance(masterFlags.slave_ping_timeout);

  AWAIT_READY(slaveLost);

  slave.get()->terminate();
  slave->reset();

  JSON::Object stats = Metrics();
  EXPECT_EQ(1, stats.values["master/slave_removals"]);
  EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]);

  driver.stop();
  driver.join();

  Clock::resume();
}
Пример #11
0
// The purpose of this test is to ensure that when slaves are removed
// from the master, and then attempt to send status updates, we send
// a ShutdownMessage to the slave. Why? Because during a network
// partition, the master will remove a partitioned slave, thus sending
// its tasks to LOST. At this point, when the partition is removed,
// the slave may attempt to send updates if it was unaware that the
// master removed it. We've already notified frameworks that these
// tasks were LOST, so we have to have the slave shut down.
TEST_F(PartitionTest, PartitionedSlaveStatusUpdates)
{
  master::Flags masterFlags = CreateMasterFlags();
  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
  ASSERT_SOME(master);

  // Allow the master to PING the slave, but drop all PONG messages
  // from the slave. Note that we don't match on the master / slave
  // PIDs because it's actually the SlaveObserver Process that sends
  // the pings.
  Future<Message> ping = FUTURE_MESSAGE(
      Eq(PingSlaveMessage().GetTypeName()), _, _);

  DROP_PROTOBUFS(PongSlaveMessage(), _, _);

  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);
  TestContainerizer containerizer(&exec);

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
  ASSERT_SOME(slave);

  AWAIT_READY(slaveRegisteredMessage);
  SlaveID slaveId = slaveRegisteredMessage.get().slave_id();

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));

  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillRepeatedly(Return());

  driver.start();

  AWAIT_READY(frameworkId);

  // Drop the first shutdown message from the master (simulated
  // partition), allow the second shutdown message to pass when
  // the slave sends an update.
  Future<ShutdownMessage> shutdownMessage =
    DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid);

  EXPECT_CALL(sched, offerRescinded(&driver, _))
    .WillRepeatedly(Return());

  Future<Nothing> slaveLost;
  EXPECT_CALL(sched, slaveLost(&driver, _))
    .WillOnce(FutureSatisfy(&slaveLost));

  Clock::pause();

  // Now, induce a partition of the slave by having the master
  // timeout the slave.
  size_t pings = 0;
  while (true) {
    AWAIT_READY(ping);
    pings++;
    if (pings == masterFlags.max_slave_ping_timeouts) {
     break;
    }
    ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
    Clock::advance(masterFlags.slave_ping_timeout);
    Clock::settle();
  }

  Clock::advance(masterFlags.slave_ping_timeout);
  Clock::settle();

  // Wait for the master to attempt to shut down the slave.
  AWAIT_READY(shutdownMessage);

  // The master will notify the framework that the slave was lost.
  AWAIT_READY(slaveLost);

  shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid);

  // At this point, the slave still thinks it's registered, so we
  // simulate a status update coming from the slave.
  TaskID taskId;
  taskId.set_value("task_id");
  const StatusUpdate& update = protobuf::createStatusUpdate(
      frameworkId.get(),
      slaveId,
      taskId,
      TASK_RUNNING,
      TaskStatus::SOURCE_SLAVE,
      UUID::random());

  StatusUpdateMessage message;
  message.mutable_update()->CopyFrom(update);
  message.set_pid(stringify(slave.get()->pid));

  process::post(master.get()->pid, message);

  // The master should shutdown the slave upon receiving the update.
  AWAIT_READY(shutdownMessage);

  Clock::resume();

  driver.stop();
  driver.join();
}
Пример #12
0
// Ensures that the driver can handle the SUBSCRIBED event
// after a master failover.
TEST_F(SchedulerDriverEventTest, SubscribedMasterFailover)
{
    Try<Owned<cluster::Master>> master = StartMaster();
    ASSERT_SOME(master);

    FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
    frameworkInfo.set_failover_timeout(Weeks(2).secs());

    // Make sure the initial registration calls 'registered'.
    MockScheduler sched;
    StandaloneMasterDetector detector(master.get()->pid);
    TestingMesosSchedulerDriver driver(&sched, &detector, frameworkInfo);

    // Intercept the registration message, send a SUBSCRIBED instead.
    Future<Message> frameworkRegisteredMessage =
        DROP_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

    // Ensure that there will be no (re-)registration retries
    // from the scheduler driver.
    Clock::pause();

    driver.start();

    AWAIT_READY(frameworkRegisteredMessage);
    UPID frameworkPid = frameworkRegisteredMessage.get().to;

    FrameworkRegisteredMessage message;
    ASSERT_TRUE(message.ParseFromString(frameworkRegisteredMessage.get().body));

    FrameworkID frameworkId = message.framework_id();
    frameworkInfo.mutable_id()->CopyFrom(frameworkId);

    Event event;
    event.set_type(Event::SUBSCRIBED);
    event.mutable_subscribed()->mutable_framework_id()->CopyFrom(frameworkId);

    Future<Nothing> registered;
    EXPECT_CALL(sched, registered(&driver, frameworkId, _))
    .WillOnce(FutureSatisfy(&registered));

    process::post(master.get()->pid, frameworkPid, event);

    AWAIT_READY(registered);

    EXPECT_CALL(sched, disconnected(&driver));

    // Fail over the master and expect a 'reregistered' call.
    // Note that the master sends a registered message for
    // this case (see MESOS-786).
    master->reset();
    master = StartMaster();
    ASSERT_SOME(master);

    frameworkRegisteredMessage =
        DROP_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);

    detector.appoint(master.get()->pid);

    AWAIT_READY(frameworkRegisteredMessage);

    Future<Nothing> reregistered;
    EXPECT_CALL(sched, reregistered(&driver, _))
    .WillOnce(FutureSatisfy(&reregistered));

    process::post(master.get()->pid, frameworkPid, event);

    AWAIT_READY(reregistered);

    driver.stop();
    driver.join();
}
// This is a simple end to end test that makes sure a master using log
// storage with ZooKeeper can successfully launch a task.
TEST_F(RegistrarZooKeeperTest, TaskRunning)
{
    Try<PID<Master> > master = StartMaster();
    ASSERT_SOME(master);

    MockExecutor exec(DEFAULT_EXECUTOR_ID);

    TestContainerizer containerizer(&exec);

    Try<PID<Slave> > slave = StartSlave(&containerizer);
    ASSERT_SOME(slave);

    MockScheduler sched;
    MesosSchedulerDriver driver(
        &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

    EXPECT_CALL(sched, registered(&driver, _, _))
    .Times(1);

    Future<vector<Offer> > offers;
    EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

    driver.start();

    AWAIT_READY(offers);
    EXPECT_NE(0u, offers.get().size());

    TaskInfo task = createTask(offers.get()[0], "dummy", DEFAULT_EXECUTOR_ID);

    EXPECT_CALL(exec, registered(_, _, _, _))
    .Times(1);

    EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));

    Future<Nothing> resourcesUpdated;
    EXPECT_CALL(containerizer,
                update(_, Resources(offers.get()[0].resources())))
    .WillOnce(DoAll(FutureSatisfy(&resourcesUpdated),
                    Return(Nothing())));

    Future<TaskStatus> status;
    EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&status));

    driver.launchTasks(offers.get()[0].id(), {task});

    AWAIT_READY(status);
    EXPECT_EQ(TASK_RUNNING, status.get().state());

    AWAIT_READY(resourcesUpdated);

    EXPECT_CALL(exec, shutdown(_))
    .Times(AtMost(1));

    driver.stop();
    driver.join();

    Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
// This test ensures that destroy can be called while in the
// launch loop. The composing containerizer still calls the
// underlying containerizer's destroy (because it's not sure
// if the containerizer can handle the type of container being
// launched). If the launch is not supported by the 1st containerizer,
// the composing containerizer should stop the launch loop and
// set the value of destroy future to true.
TEST_F(ComposingContainerizerTest, DestroyDuringUnsupportedLaunchLoop)
{
  vector<Containerizer*> containerizers;

  MockContainerizer* mockContainerizer1 = new MockContainerizer();
  MockContainerizer* mockContainerizer2 = new MockContainerizer();

  containerizers.push_back(mockContainerizer1);
  containerizers.push_back(mockContainerizer2);

  ComposingContainerizer containerizer(containerizers);
  ContainerID containerId;
  containerId.set_value("container");
  TaskInfo taskInfo;
  ExecutorInfo executorInfo;
  SlaveID slaveId;
  std::map<std::string, std::string> environment;

  Promise<bool> launchPromise;

  EXPECT_CALL(*mockContainerizer1, launch(_, _, _, _, _, _, _, _))
    .WillOnce(Return(launchPromise.future()));

  Future<Nothing> destroy;
  Promise<bool> destroyPromise;
  EXPECT_CALL(*mockContainerizer1, destroy(_))
    .WillOnce(DoAll(FutureSatisfy(&destroy),
                    Return(destroyPromise.future())));

  Future<bool> launched = containerizer.launch(
      containerId,
      taskInfo,
      executorInfo,
      "dir",
      "user",
      slaveId,
      environment,
      false);

  Resources resources = Resources::parse("cpus:1;mem:256").get();

  EXPECT_TRUE(launched.isPending());

  Future<bool> destroyed = containerizer.destroy(containerId);

  EXPECT_CALL(*mockContainerizer2, launch(_, _, _, _, _, _, _, _))
    .Times(0);

  // We make sure the destroy is being called on the first containerizer.
  // The second containerizer shouldn't be called as well since the
  // container is already destroyed.
  AWAIT_READY(destroy);

  launchPromise.set(false);
  destroyPromise.set(false);

  // `launched` should be a failure and `destroyed` should be true
  // because the launch was stopped from being tried on the 2nd
  // containerizer because of the destroy.
  AWAIT_FAILED(launched);
  AWAIT_EXPECT_EQ(true, destroyed);
}
// This test checks that a failed over scheduler gets the retried status update
// when the original instance dies without acknowledging the update.
TEST_F(HttpFaultToleranceTest, SchedulerFailoverStatusUpdate)
{
  master::Flags flags = CreateMasterFlags();
  flags.authenticate_frameworks = false;

  Try<Owned<cluster::Master>> master = StartMaster(flags);
  ASSERT_SOME(master);

  auto scheduler = std::make_shared<v1::MockHTTPScheduler>();
  auto executor = std::make_shared<v1::MockHTTPExecutor>();

  ExecutorID executorId = DEFAULT_EXECUTOR_ID;
  TestContainerizer containerizer(executorId, executor);

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
  ASSERT_SOME(slave);

  Future<Nothing> connected;
  EXPECT_CALL(*scheduler, connected(_))
    .WillOnce(FutureSatisfy(&connected))
    .WillRepeatedly(Return()); // Ignore future invocations.

  ContentType contentType = ContentType::PROTOBUF;

  v1::scheduler::TestMesos schedulerLibrary(
      master.get()->pid,
      contentType,
      scheduler);

  AWAIT_READY(connected);

  Future<Event::Subscribed> subscribed;
  EXPECT_CALL(*scheduler, subscribed(_, _))
    .WillOnce(FutureArg<1>(&subscribed));

  EXPECT_CALL(*scheduler, heartbeat(_))
    .WillRepeatedly(Return()); // Ignore heartbeats.

  Future<Event::Offers> offers;
  EXPECT_CALL(*scheduler, offers(_, _))
    .WillOnce(FutureArg<1>(&offers));

  {
    Call call;
    call.set_type(Call::SUBSCRIBE);

    Call::Subscribe* subscribe = call.mutable_subscribe();
    subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO);

    schedulerLibrary.send(call);
  }

  AWAIT_READY(subscribed);

  v1::FrameworkID frameworkId(subscribed->framework_id());

  AWAIT_READY(offers);
  EXPECT_NE(0, offers->offers().size());

  EXPECT_CALL(*executor, connected(_))
    .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId)));

  EXPECT_CALL(*executor, subscribed(_, _));

  EXPECT_CALL(*executor, launch(_, _))
    .WillOnce(v1::executor::SendUpdateFromTask(
        frameworkId, evolve(executorId), v1::TASK_RUNNING));

  Future<Nothing> acknowledged;
  EXPECT_CALL(*executor, acknowledged(_, _))
    .WillOnce(FutureSatisfy(&acknowledged));

  Future<Event::Update> update;
  EXPECT_CALL(*scheduler, update(_, _))
    .WillOnce(FutureArg<1>(&update));

  const v1::Offer& offer = offers->offers(0);

  v1::TaskInfo taskInfo =
    evolve(createTask(devolve(offer), "", executorId));

  {
    Call call;
    call.mutable_framework_id()->CopyFrom(frameworkId);
    call.set_type(Call::ACCEPT);

    Call::Accept* accept = call.mutable_accept();
    accept->add_offer_ids()->CopyFrom(offer.id());

    v1::Offer::Operation* operation = accept->add_operations();
    operation->set_type(v1::Offer::Operation::LAUNCH);
    operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo);

    schedulerLibrary.send(call);
  }

  AWAIT_READY(acknowledged);
  AWAIT_READY(update);

  EXPECT_EQ(v1::TASK_RUNNING, update->status().state());
  EXPECT_EQ(executorId, devolve(update->status().executor_id()));

  EXPECT_TRUE(update->status().has_executor_id());
  EXPECT_TRUE(update->status().has_uuid());

  // Failover the scheduler without acknowledging the status update.

  auto scheduler2 = std::make_shared<v1::MockHTTPScheduler>();

  Future<Nothing> connected2;
  EXPECT_CALL(*scheduler2, connected(_))
    .WillOnce(FutureSatisfy(&connected2));

  // Failover to another scheduler instance.
  v1::scheduler::TestMesos schedulerLibrary2(
      master.get()->pid,
      contentType,
      scheduler2);

  AWAIT_READY(connected2);

  // The previously connected scheduler instance should receive an
  // error/disconnected event.
  Future<Nothing> error;
  EXPECT_CALL(*scheduler, error(_, _))
    .WillOnce(FutureSatisfy(&error));

  Future<Nothing> disconnected;
  EXPECT_CALL(*scheduler, disconnected(_))
    .WillOnce(FutureSatisfy(&disconnected));

  EXPECT_CALL(*scheduler2, subscribed(_, _))
    .WillOnce(FutureArg<1>(&subscribed));

  EXPECT_CALL(*scheduler2, heartbeat(_))
    .WillRepeatedly(Return()); // Ignore heartbeats.

  // Scheduler2 should receive the retried status update.
  Future<Nothing> update2;
  EXPECT_CALL(*scheduler2, update(_, _))
    .WillOnce(FutureSatisfy(&update2))
    .WillRepeatedly(Return()); // Ignore subsequent updates.

  {
    Call call;
    call.mutable_framework_id()->CopyFrom(frameworkId);
    call.set_type(Call::SUBSCRIBE);

    Call::Subscribe* subscribe = call.mutable_subscribe();
    subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO);
    subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId);

    schedulerLibrary2.send(call);
  }

  AWAIT_READY(error);
  AWAIT_READY(disconnected);
  AWAIT_READY(subscribed);

  EXPECT_EQ(frameworkId, subscribed->framework_id());

  Clock::pause();

  // Now advance time enough for the reliable timeout to kick in and
  // another status update to be sent.
  Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN);

  AWAIT_READY(update2);

  EXPECT_CALL(*executor, shutdown(_))
    .Times(AtMost(1));

  EXPECT_CALL(*executor, disconnected(_))
    .Times(AtMost(1));
}
Пример #16
0
// The purpose of this test is to ensure that when slaves are removed
// from the master, and then attempt to re-register, we deny the
// re-registration by sending a ShutdownMessage to the slave.
// Why? Because during a network partition, the master will remove a
// partitioned slave, thus sending its tasks to LOST. At this point,
// when the partition is removed, the slave will attempt to
// re-register with its running tasks. We've already notified
// frameworks that these tasks were LOST, so we have to have the slave
// slave shut down.
TEST_F(PartitionTest, PartitionedSlaveReregistration)
{
  master::Flags masterFlags = CreateMasterFlags();
  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
  ASSERT_SOME(master);

  // Allow the master to PING the slave, but drop all PONG messages
  // from the slave. Note that we don't match on the master / slave
  // PIDs because it's actually the SlaveObserver Process that sends
  // the pings.
  Future<Message> ping = FUTURE_MESSAGE(
      Eq(PingSlaveMessage().GetTypeName()), _, _);

  DROP_PROTOBUFS(PongSlaveMessage(), _, _);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);
  TestContainerizer containerizer(&exec);

  StandaloneMasterDetector detector(master.get()->pid);

  Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());

  driver.start();

  AWAIT_READY(offers);
  ASSERT_NE(0u, offers.get().size());

  // Launch a task. This is to ensure the task is killed by the slave,
  // during shutdown.
  TaskID taskId;
  taskId.set_value("1");

  TaskInfo task;
  task.set_name("");
  task.mutable_task_id()->MergeFrom(taskId);
  task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id());
  task.mutable_resources()->MergeFrom(offers.get()[0].resources());
  task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);
  task.mutable_executor()->mutable_command()->set_value("sleep 60");

  // Set up the expectations for launching the task.
  EXPECT_CALL(exec, registered(_, _, _, _));
  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));

  Future<TaskStatus> runningStatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&runningStatus));

  Future<Nothing> statusUpdateAck = FUTURE_DISPATCH(
      slave.get()->pid, &Slave::_statusUpdateAcknowledgement);

  driver.launchTasks(offers.get()[0].id(), {task});

  AWAIT_READY(runningStatus);
  EXPECT_EQ(TASK_RUNNING, runningStatus.get().state());

  // Wait for the slave to have handled the acknowledgment prior
  // to pausing the clock.
  AWAIT_READY(statusUpdateAck);

  // Drop the first shutdown message from the master (simulated
  // partition), allow the second shutdown message to pass when
  // the slave re-registers.
  Future<ShutdownMessage> shutdownMessage =
    DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid);

  Future<TaskStatus> lostStatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&lostStatus));

  Future<Nothing> slaveLost;
  EXPECT_CALL(sched, slaveLost(&driver, _))
    .WillOnce(FutureSatisfy(&slaveLost));

  Clock::pause();

  // Now, induce a partition of the slave by having the master
  // timeout the slave.
  size_t pings = 0;
  while (true) {
    AWAIT_READY(ping);
    pings++;
    if (pings == masterFlags.max_slave_ping_timeouts) {
     break;
    }
    ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
    Clock::advance(masterFlags.slave_ping_timeout);
    Clock::settle();
  }

  Clock::advance(masterFlags.slave_ping_timeout);
  Clock::settle();

  // The master will have notified the framework of the lost task.
  AWAIT_READY(lostStatus);
  EXPECT_EQ(TASK_LOST, lostStatus.get().state());

  // Wait for the master to attempt to shut down the slave.
  AWAIT_READY(shutdownMessage);

  // The master will notify the framework that the slave was lost.
  AWAIT_READY(slaveLost);

  Clock::resume();

  // We now complete the partition on the slave side as well. This
  // is done by simulating a master loss event which would normally
  // occur during a network partition.
  detector.appoint(None());

  Future<Nothing> shutdown;
  EXPECT_CALL(exec, shutdown(_))
    .WillOnce(FutureSatisfy(&shutdown));

  shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid);

  // Have the slave re-register with the master.
  detector.appoint(master.get()->pid);

  // Upon re-registration, the master will shutdown the slave.
  // The slave will then shut down the executor.
  AWAIT_READY(shutdownMessage);
  AWAIT_READY(shutdown);

  driver.stop();
  driver.join();
}
// This test ensures that the failed over scheduler is able to send a message
// to the executor.
TEST_F(HttpFaultToleranceTest, SchedulerFailoverFrameworkToExecutorMessage)
{
  master::Flags flags = CreateMasterFlags();
  flags.authenticate_frameworks = false;

  Try<Owned<cluster::Master>> master = StartMaster(flags);
  ASSERT_SOME(master);

  auto scheduler = std::make_shared<v1::MockHTTPScheduler>();
  auto executor = std::make_shared<v1::MockHTTPExecutor>();

  ExecutorID executorId = DEFAULT_EXECUTOR_ID;
  TestContainerizer containerizer(executorId, executor);

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
  ASSERT_SOME(slave);

  Future<Nothing> connected;
  EXPECT_CALL(*scheduler, connected(_))
    .WillOnce(FutureSatisfy(&connected))
    .WillRepeatedly(Return()); // Ignore future invocations.

  ContentType contentType = ContentType::PROTOBUF;

  v1::scheduler::TestMesos schedulerLibrary(
      master.get()->pid,
      contentType,
      scheduler);

  AWAIT_READY(connected);

  Future<Event::Subscribed> subscribed;
  EXPECT_CALL(*scheduler, subscribed(_, _))
    .WillOnce(FutureArg<1>(&subscribed));

  EXPECT_CALL(*scheduler, heartbeat(_))
    .WillRepeatedly(Return()); // Ignore heartbeats.

  Future<Event::Offers> offers;
  EXPECT_CALL(*scheduler, offers(_, _))
    .WillOnce(FutureArg<1>(&offers));

  {
    Call call;
    call.set_type(Call::SUBSCRIBE);

    Call::Subscribe* subscribe = call.mutable_subscribe();
    subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO);

    schedulerLibrary.send(call);
  }

  AWAIT_READY(subscribed);

  v1::FrameworkID frameworkId(subscribed->framework_id());

  AWAIT_READY(offers);
  EXPECT_NE(0, offers->offers().size());

  EXPECT_CALL(*executor, connected(_))
    .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId)));

  EXPECT_CALL(*executor, subscribed(_, _));

  Future<Nothing> launch;
  EXPECT_CALL(*executor, launch(_, _))
    .WillOnce(FutureSatisfy(&launch));

  const v1::Offer& offer = offers->offers(0);

  v1::TaskInfo taskInfo =
    evolve(createTask(devolve(offer), "", executorId));

  {
    Call call;
    call.mutable_framework_id()->CopyFrom(frameworkId);
    call.set_type(Call::ACCEPT);

    Call::Accept* accept = call.mutable_accept();
    accept->add_offer_ids()->CopyFrom(offer.id());

    v1::Offer::Operation* operation = accept->add_operations();
    operation->set_type(v1::Offer::Operation::LAUNCH);
    operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo);

    schedulerLibrary.send(call);
  }

  AWAIT_READY(launch);

  auto scheduler2 = std::make_shared<v1::MockHTTPScheduler>();

  Future<Nothing> connected2;
  EXPECT_CALL(*scheduler2, connected(_))
    .WillOnce(FutureSatisfy(&connected2));

  // Failover to another scheduler instance.
  v1::scheduler::TestMesos schedulerLibrary2(
      master.get()->pid,
      contentType,
      scheduler2);

  AWAIT_READY(connected2);

  // The previously connected scheduler instance should receive an
  // error/disconnected event.
  Future<Nothing> error;
  EXPECT_CALL(*scheduler, error(_, _))
    .WillOnce(FutureSatisfy(&error));

  Future<Nothing> disconnected;
  EXPECT_CALL(*scheduler, disconnected(_))
    .WillOnce(FutureSatisfy(&disconnected));

  EXPECT_CALL(*scheduler2, subscribed(_, _))
    .WillOnce(FutureArg<1>(&subscribed));

  EXPECT_CALL(*scheduler2, heartbeat(_))
    .WillRepeatedly(Return()); // Ignore heartbeats.

  {
    Call call;
    call.mutable_framework_id()->CopyFrom(frameworkId);
    call.set_type(Call::SUBSCRIBE);

    Call::Subscribe* subscribe = call.mutable_subscribe();
    subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO);
    subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId);

    schedulerLibrary2.send(call);
  }

  AWAIT_READY(error);
  AWAIT_READY(disconnected);
  AWAIT_READY(subscribed);

  EXPECT_EQ(frameworkId, subscribed->framework_id());

  Future<v1::executor::Event::Message> message;
  EXPECT_CALL(*executor, message(_, _))
    .WillOnce(FutureArg<1>(&message));

  {
    Call call;
    call.mutable_framework_id()->CopyFrom(frameworkId);
    call.set_type(Call::MESSAGE);

    Call::Message* message = call.mutable_message();
    message->mutable_agent_id()->CopyFrom(offer.agent_id());
    message->mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID);
    message->set_data("hello world");

    schedulerLibrary2.send(call);
  }

  AWAIT_READY(message);
  ASSERT_EQ("hello world", message->data());

  EXPECT_CALL(*executor, shutdown(_))
    .Times(AtMost(1));

  EXPECT_CALL(*executor, disconnected(_))
    .Times(AtMost(1));
}
Пример #18
0
// The purpose of this test is to ensure that when slaves are removed
// from the master, and then attempt to send exited executor messages,
// we send a ShutdownMessage to the slave. Why? Because during a
// network partition, the master will remove a partitioned slave, thus
// sending its tasks to LOST. At this point, when the partition is
// removed, the slave may attempt to send exited executor messages if
// it was unaware that the master removed it. We've already
// notified frameworks that the tasks under the executors were LOST,
// so we have to have the slave shut down.
TEST_F(PartitionTest, PartitionedSlaveExitedExecutor)
{
  master::Flags masterFlags = CreateMasterFlags();
  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
  ASSERT_SOME(master);

  // Allow the master to PING the slave, but drop all PONG messages
  // from the slave. Note that we don't match on the master / slave
  // PIDs because it's actually the SlaveObserver Process that sends
  // the pings.
  Future<Message> ping = FUTURE_MESSAGE(
      Eq(PingSlaveMessage().GetTypeName()), _, _);

  DROP_PROTOBUFS(PongSlaveMessage(), _, _);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);
  TestContainerizer containerizer(&exec);

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));\

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return());

  driver.start();

  AWAIT_READY(frameworkId);
  AWAIT_READY(offers);
  ASSERT_NE(0u, offers.get().size());

  // Launch a task. This allows us to have the slave send an
  // ExitedExecutorMessage.
  TaskID taskId;
  taskId.set_value("1");

  TaskInfo task;
  task.set_name("");
  task.mutable_task_id()->MergeFrom(taskId);
  task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id());
  task.mutable_resources()->MergeFrom(offers.get()[0].resources());
  task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);
  task.mutable_executor()->mutable_command()->set_value("sleep 60");

  // Set up the expectations for launching the task.
  EXPECT_CALL(exec, registered(_, _, _, _));

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));

  // Drop all the status updates from the slave, so that we can
  // ensure the ExitedExecutorMessage is what triggers the slave
  // shutdown.
  DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()->pid);

  driver.launchTasks(offers.get()[0].id(), {task});

  // Drop the first shutdown message from the master (simulated
  // partition) and allow the second shutdown message to pass when
  // triggered by the ExitedExecutorMessage.
  Future<ShutdownMessage> shutdownMessage =
    DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid);

  Future<TaskStatus> lostStatus;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureArg<1>(&lostStatus));

  Future<Nothing> slaveLost;
  EXPECT_CALL(sched, slaveLost(&driver, _))
    .WillOnce(FutureSatisfy(&slaveLost));

  Clock::pause();

  // Now, induce a partition of the slave by having the master
  // timeout the slave.
  size_t pings = 0;
  while (true) {
    AWAIT_READY(ping);
    pings++;
    if (pings == masterFlags.max_slave_ping_timeouts) {
     break;
    }
    ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
    Clock::advance(masterFlags.slave_ping_timeout);
    Clock::settle();
  }

  Clock::advance(masterFlags.slave_ping_timeout);
  Clock::settle();

  // The master will have notified the framework of the lost task.
  AWAIT_READY(lostStatus);
  EXPECT_EQ(TASK_LOST, lostStatus.get().state());

  // Wait for the master to attempt to shut down the slave.
  AWAIT_READY(shutdownMessage);

  // The master will notify the framework that the slave was lost.
  AWAIT_READY(slaveLost);

  shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid);

  // Induce an ExitedExecutorMessage from the slave.
  containerizer.destroy(
      frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id());

  // Upon receiving the message, the master will shutdown the slave.
  AWAIT_READY(shutdownMessage);

  Clock::resume();

  driver.stop();
  driver.join();
}
// This test checks that a scheduler exit shuts down the executor.
TEST_F(HttpFaultToleranceTest, SchedulerExit)
{
  master::Flags flags = CreateMasterFlags();
  flags.authenticate_frameworks = false;

  Try<Owned<cluster::Master>> master = StartMaster(flags);
  ASSERT_SOME(master);

  auto scheduler = std::make_shared<v1::MockHTTPScheduler>();
  auto executor = std::make_shared<v1::MockHTTPExecutor>();

  ExecutorID executorId = DEFAULT_EXECUTOR_ID;
  TestContainerizer containerizer(executorId, executor);

  Owned<MasterDetector> detector = master.get()->createDetector();

  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
  ASSERT_SOME(slave);

  Future<Nothing> connected;
  EXPECT_CALL(*scheduler, connected(_))
    .WillOnce(FutureSatisfy(&connected))
    .WillRepeatedly(Return()); // Ignore future invocations.

  ContentType contentType = ContentType::PROTOBUF;

  v1::scheduler::TestMesos schedulerLibrary(
      master.get()->pid,
      contentType,
      scheduler);

  AWAIT_READY(connected);

  Future<Event::Subscribed> subscribed;
  EXPECT_CALL(*scheduler, subscribed(_, _))
    .WillOnce(FutureArg<1>(&subscribed));

  EXPECT_CALL(*scheduler, heartbeat(_))
    .WillRepeatedly(Return()); // Ignore heartbeats.

  Future<Event::Offers> offers;
  EXPECT_CALL(*scheduler, offers(_, _))
    .WillOnce(FutureArg<1>(&offers));

  {
    Call call;
    call.set_type(Call::SUBSCRIBE);

    Call::Subscribe* subscribe = call.mutable_subscribe();
    subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO);

    schedulerLibrary.send(call);
  }

  AWAIT_READY(subscribed);

  v1::FrameworkID frameworkId(subscribed->framework_id());

  AWAIT_READY(offers);
  EXPECT_NE(0, offers->offers().size());

  EXPECT_CALL(*executor, connected(_))
    .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId)));

  EXPECT_CALL(*executor, subscribed(_, _));

  Future<Nothing> launch;
  EXPECT_CALL(*executor, launch(_, _))
    .WillOnce(FutureSatisfy(&launch));

  const v1::Offer& offer = offers->offers(0);

  v1::TaskInfo taskInfo =
    evolve(createTask(devolve(offer), "", DEFAULT_EXECUTOR_ID));

  {
    Call call;
    call.mutable_framework_id()->CopyFrom(frameworkId);
    call.set_type(Call::ACCEPT);

    Call::Accept* accept = call.mutable_accept();
    accept->add_offer_ids()->CopyFrom(offer.id());

    v1::Offer::Operation* operation = accept->add_operations();
    operation->set_type(v1::Offer::Operation::LAUNCH);
    operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo);

    schedulerLibrary.send(call);
  }

  AWAIT_READY(launch);

  EXPECT_CALL(*scheduler, disconnected(_))
    .Times(AtMost(1));

  Future<Nothing> shutdown;
  EXPECT_CALL(*executor, shutdown(_))
    .WillOnce(FutureSatisfy(&shutdown));

  {
    Call call;
    call.mutable_framework_id()->CopyFrom(frameworkId);
    call.set_type(Call::TEARDOWN);

    schedulerLibrary.send(call);
  }

  // Ensure that the executor receives a `Event::Shutdown` after the
  // scheduler exit.
  AWAIT_READY(shutdown);
}
Пример #20
0
// This test does not set any Accept header for the subscribe call.
// The default response media type should be "application/json" in
// this case.
TEST_P(ExecutorHttpApiTest, NoAcceptHeader)
{
  Try<Owned<cluster::Master>> master = StartMaster();
  ASSERT_SOME(master);

  ExecutorID executorId = DEFAULT_EXECUTOR_ID;
  MockExecutor exec(executorId);
  TestContainerizer containerizer(&exec);

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));

  Future<vector<Offer>> offers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureArg<1>(&offers));

  Future<Nothing> statusUpdate;
  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(FutureSatisfy(&statusUpdate));

  driver.start();

  AWAIT_READY(frameworkId);
  AWAIT_READY(offers);

  ASSERT_EQ(1u, offers.get().size());

  EXPECT_CALL(exec, registered(_, _, _, _))
    .Times(1);

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));

  TaskInfo taskInfo = createTask(offers.get()[0], "", executorId);
  driver.launchTasks(offers.get()[0].id(), {taskInfo});

  // Wait until status update is received on the scheduler before sending
  // an executor subscribe request.
  AWAIT_READY(statusUpdate);

  // Only subscribe needs to 'Accept' JSON or protobuf.
  Call call;
  call.mutable_framework_id()->CopyFrom(evolve(frameworkId.get()));
  call.mutable_executor_id()->CopyFrom(evolve(executorId));

  call.set_type(Call::SUBSCRIBE);

  call.mutable_subscribe();

  // Retrieve the parameter passed as content type to this test.
  const ContentType contentType = GetParam();

  // No 'Accept' header leads to all media types considered
  // acceptable. JSON will be chosen by default.
  process::http::Headers headers;

  Future<Response> response = process::http::streaming::post(
      slave.get()->pid,
      "api/v1/executor",
      headers,
      serialize(contentType, call),
      stringify(contentType));

  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
  AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);

  driver.stop();
  driver.join();
}
Пример #21
0
// This test checks that a scheduler gets a slave lost
// message for a partitioned slave.
TEST_F(PartitionTest, PartitionedSlave)
{
  Try<PID<Master> > master = StartMaster();
  ASSERT_SOME(master);

  // Set these expectations up before we spawn the slave so that we
  // don't miss the first PING.
  Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _);

  // Drop all the PONGs to simulate slave partition.
  DROP_MESSAGES(Eq("PONG"), _, _);

  Try<PID<Slave> > slave = StartSlave();
  ASSERT_SOME(slave);

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);

  EXPECT_CALL(sched, registered(&driver, _, _));

  Future<Nothing> resourceOffers;
  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(FutureSatisfy(&resourceOffers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  // Need to make sure the framework AND slave have registered with
  // master. Waiting for resource offers should accomplish both.
  AWAIT_READY(resourceOffers);

  Clock::pause();

  EXPECT_CALL(sched, offerRescinded(&driver, _))
    .Times(AtMost(1));

  Future<Nothing> slaveLost;
  EXPECT_CALL(sched, slaveLost(&driver, _))
    .WillOnce(FutureSatisfy(&slaveLost));

  // Now advance through the PINGs.
  uint32_t pings = 0;
  while (true) {
    AWAIT_READY(ping);
    pings++;
    if (pings == master::MAX_SLAVE_PING_TIMEOUTS) {
     break;
    }
    ping = FUTURE_MESSAGE(Eq("PING"), _, _);
    Clock::advance(master::SLAVE_PING_TIMEOUT);
  }

  Clock::advance(master::SLAVE_PING_TIMEOUT);

  AWAIT_READY(slaveLost);

  this->Stop(slave.get());

  JSON::Object stats = Metrics();
  EXPECT_EQ(1, stats.values["master/slave_removals"]);
  EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]);

  driver.stop();
  driver.join();

  Shutdown();

  Clock::resume();
}