Ejemplo n.º 1
0
  void sendStatusUpdate(const TaskStatus& status)
  {
    StatusUpdateMessage message;
    StatusUpdate* update = message.mutable_update();
    update->mutable_framework_id()->MergeFrom(frameworkId);
    update->mutable_executor_id()->MergeFrom(executorId);
    update->mutable_slave_id()->MergeFrom(slaveId);
    update->mutable_status()->MergeFrom(status);
    update->set_timestamp(Clock::now().secs());
    update->mutable_status()->set_timestamp(update->timestamp());
    message.set_pid(self());

    // We overwrite the UUID for this status update, however with
    // the HTTP API, the executor will have to generate a UUID
    // (which needs to be validated to be RFC-4122 compliant).
    UUID uuid = UUID::random();
    update->set_uuid(uuid.toBytes());
    update->mutable_status()->set_uuid(uuid.toBytes());

    // We overwrite the SlaveID for this status update, however with
    // the HTTP API, this can be overwritten by the slave instead.
    update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId);

    VLOG(1) << "Executor sending status update " << *update;

    // Capture the status update.
    updates[uuid] = *update;

    send(slave, message);
  }
Ejemplo n.º 2
0
  void sendStatusUpdate(const TaskStatus& status)
  {
    if (status.state() == TASK_STAGING) {
      VLOG(1) << "Executor is not allowed to send "
              << "TASK_STAGING status update. Aborting!";

      driver->abort();

      Stopwatch stopwatch;
      if (FLAGS_v >= 1) {
        stopwatch.start();
      }

      executor->error(driver, "Attempted to send TASK_STAGING status update");

      VLOG(1) << "Executor::error took " << stopwatch.elapsed();

      return;
    }

    StatusUpdateMessage message;
    StatusUpdate* update = message.mutable_update();
    update->mutable_framework_id()->MergeFrom(frameworkId);
    update->mutable_executor_id()->MergeFrom(executorId);
    update->mutable_slave_id()->MergeFrom(slaveId);
    update->mutable_status()->MergeFrom(status);
    update->set_timestamp(Clock::now().secs());
    update->set_uuid(UUID::random().toBytes());
    message.set_pid(self());

    VLOG(1) << "Executor sending status update " << *update;

    // Capture the status update.
    updates[UUID::fromBytes(update->uuid())] = *update;

    send(slave, message);
  }
Ejemplo n.º 3
0
  void sendStatusUpdate(const TaskStatus& status)
  {
    StatusUpdateMessage message;
    StatusUpdate* update = message.mutable_update();
    update->mutable_framework_id()->MergeFrom(frameworkId);
    update->mutable_executor_id()->MergeFrom(executorId);
    update->mutable_slave_id()->MergeFrom(slaveId);
    update->mutable_status()->MergeFrom(status);
    update->set_timestamp(Clock::now().secs());
    update->mutable_status()->set_timestamp(update->timestamp());
    update->set_uuid(UUID::random().toBytes());
    message.set_pid(self());

    // Incoming status update might come from an executor which has not set
    // slave id in TaskStatus. Set/overwrite slave id.
    update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId);

    VLOG(1) << "Executor sending status update " << *update;

    // Capture the status update.
    updates[UUID::fromBytes(update->uuid())] = *update;

    send(slave, message);
  }
Ejemplo n.º 4
0
// The purpose of this test is to ensure that when slaves are removed
// from the master, and then attempt to send status updates, we send
// a ShutdownMessage to the slave. Why? Because during a network
// partition, the master will remove a partitioned slave, thus sending
// its tasks to LOST. At this point, when the partition is removed,
// the slave may attempt to send updates if it was unaware that the
// master removed it. We've already notified frameworks that these
// tasks were LOST, so we have to have the slave shut down.
TEST_F(PartitionTest, PartitionedSlaveStatusUpdates)
{
  master::Flags masterFlags = CreateMasterFlags();
  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
  ASSERT_SOME(master);

  // Allow the master to PING the slave, but drop all PONG messages
  // from the slave. Note that we don't match on the master / slave
  // PIDs because it's actually the SlaveObserver Process that sends
  // the pings.
  Future<Message> ping = FUTURE_MESSAGE(
      Eq(PingSlaveMessage().GetTypeName()), _, _);

  DROP_PROTOBUFS(PongSlaveMessage(), _, _);

  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);
  TestContainerizer containerizer(&exec);

  Owned<MasterDetector> detector = master.get()->createDetector();
  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
  ASSERT_SOME(slave);

  AWAIT_READY(slaveRegisteredMessage);
  SlaveID slaveId = slaveRegisteredMessage.get().slave_id();

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

  Future<FrameworkID> frameworkId;
  EXPECT_CALL(sched, registered(&driver, _, _))
    .WillOnce(FutureArg<1>(&frameworkId));

  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillRepeatedly(Return());

  driver.start();

  AWAIT_READY(frameworkId);

  // Drop the first shutdown message from the master (simulated
  // partition), allow the second shutdown message to pass when
  // the slave sends an update.
  Future<ShutdownMessage> shutdownMessage =
    DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid);

  EXPECT_CALL(sched, offerRescinded(&driver, _))
    .WillRepeatedly(Return());

  Future<Nothing> slaveLost;
  EXPECT_CALL(sched, slaveLost(&driver, _))
    .WillOnce(FutureSatisfy(&slaveLost));

  Clock::pause();

  // Now, induce a partition of the slave by having the master
  // timeout the slave.
  size_t pings = 0;
  while (true) {
    AWAIT_READY(ping);
    pings++;
    if (pings == masterFlags.max_slave_ping_timeouts) {
     break;
    }
    ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
    Clock::advance(masterFlags.slave_ping_timeout);
    Clock::settle();
  }

  Clock::advance(masterFlags.slave_ping_timeout);
  Clock::settle();

  // Wait for the master to attempt to shut down the slave.
  AWAIT_READY(shutdownMessage);

  // The master will notify the framework that the slave was lost.
  AWAIT_READY(slaveLost);

  shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid);

  // At this point, the slave still thinks it's registered, so we
  // simulate a status update coming from the slave.
  TaskID taskId;
  taskId.set_value("task_id");
  const StatusUpdate& update = protobuf::createStatusUpdate(
      frameworkId.get(),
      slaveId,
      taskId,
      TASK_RUNNING,
      TaskStatus::SOURCE_SLAVE,
      UUID::random());

  StatusUpdateMessage message;
  message.mutable_update()->CopyFrom(update);
  message.set_pid(stringify(slave.get()->pid));

  process::post(master.get()->pid, message);

  // The master should shutdown the slave upon receiving the update.
  AWAIT_READY(shutdownMessage);

  Clock::resume();

  driver.stop();
  driver.join();
}