Beispiel #1
0
  void statusUpdate(const StatusUpdate& update, const UPID& pid)
  {
    const TaskStatus& status = update.status();

    VLOG(1) << "Status update: task " << status.task_id()
            << " of framework " << update.framework_id()
            << " is now in state " << status.state();

    CHECK(frameworkId == update.framework_id());

    // TODO(benh): Note that this maybe a duplicate status update!
    // Once we get support to try and have a more consistent view
    // of what's running in the cluster, we'll just let this one
    // slide. The alternative is possibly dealing with a scheduler
    // failover and not correctly giving the scheduler it's status
    // update, which seems worse than giving a status update
    // multiple times (of course, if a scheduler re-uses a TaskID,
    // that could be bad.

    invoke(bind(&Scheduler::statusUpdate, sched, driver, cref(status)));

    if (pid) {
      // Acknowledge the message (we do this last, after we invoked
      // the scheduler, if we did at all, in case it causes a crash,
      // since this way the message might get resent/routed after the
      // scheduler comes back online).
      StatusUpdateAcknowledgementMessage message;
      message.mutable_framework_id()->MergeFrom(frameworkId);
      message.mutable_slave_id()->MergeFrom(update.slave_id());
      message.mutable_task_id()->MergeFrom(status.task_id());
      message.set_uuid(update.uuid());
      send(pid, message);
    }
  }
  void _handle(const StatusUpdate& update, const StatusUpdateRecord::Type& type)
  {
    CHECK(error.isNone());

    if (type == StatusUpdateRecord::UPDATE) {
      // Record this update.
      received.insert(UUID::fromBytes(update.uuid()));

      // Add it to the pending updates queue.
      pending.push(update);
    } else {
      // Record this ACK.
      acknowledged.insert(UUID::fromBytes(update.uuid()));

      // Remove the corresponding update from the pending queue.
      pending.pop();

      if (!terminated) {
        terminated = protobuf::isTerminalState(update.status().state());
      }
    }
  }
Beispiel #3
0
  void statusUpdateAcknowledgement(const StatusUpdate& update, const UPID& pid)
  {
    if (aborted) {
      VLOG(1) << "Not sending status update acknowledgment message because "
              << "the driver is aborted!";
      return;
    }

    VLOG(2) << "Sending ACK for status update " << update << " to " << pid;

    StatusUpdateAcknowledgementMessage message;
    message.mutable_framework_id()->MergeFrom(framework.id());
    message.mutable_slave_id()->MergeFrom(update.slave_id());
    message.mutable_task_id()->MergeFrom(update.status().task_id());
    message.set_uuid(update.uuid());
    send(pid, message);
  }
Beispiel #4
0
  void sendStatusUpdate(const TaskStatus& status)
  {
    if (status.state() == TASK_STAGING) {
      VLOG(1) << "Executor is not allowed to send "
              << "TASK_STAGING status update. Aborting!";

      driver->abort();

      Stopwatch stopwatch;
      if (FLAGS_v >= 1) {
        stopwatch.start();
      }

      executor->error(driver, "Attempted to send TASK_STAGING status update");

      VLOG(1) << "Executor::error took " << stopwatch.elapsed();

      return;
    }

    StatusUpdateMessage message;
    StatusUpdate* update = message.mutable_update();
    update->mutable_framework_id()->MergeFrom(frameworkId);
    update->mutable_executor_id()->MergeFrom(executorId);
    update->mutable_slave_id()->MergeFrom(slaveId);
    update->mutable_status()->MergeFrom(status);
    update->set_timestamp(Clock::now().secs());
    update->set_uuid(UUID::random().toBytes());
    message.set_pid(self());

    VLOG(1) << "Executor sending status update " << *update;

    // Capture the status update.
    updates[UUID::fromBytes(update->uuid())] = *update;

    send(slave, message);
  }
Beispiel #5
0
  void sendStatusUpdate(const TaskStatus& status)
  {
    StatusUpdateMessage message;
    StatusUpdate* update = message.mutable_update();
    update->mutable_framework_id()->MergeFrom(frameworkId);
    update->mutable_executor_id()->MergeFrom(executorId);
    update->mutable_slave_id()->MergeFrom(slaveId);
    update->mutable_status()->MergeFrom(status);
    update->set_timestamp(Clock::now().secs());
    update->mutable_status()->set_timestamp(update->timestamp());
    update->set_uuid(UUID::random().toBytes());
    message.set_pid(self());

    // Incoming status update might come from an executor which has not set
    // slave id in TaskStatus. Set/overwrite slave id.
    update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId);

    VLOG(1) << "Executor sending status update " << *update;

    // Capture the status update.
    updates[UUID::fromBytes(update->uuid())] = *update;

    send(slave, message);
  }
// This test verifies that status update manager ignores
// duplicate ACK for an earlier update when it is waiting
// for an ACK for a later update. This could happen when the
// duplicate ACK is for a retried update.
TEST_F(StatusUpdateManagerTest, IgnoreDuplicateStatusUpdateAck)
{
  Try<PID<Master> > master = StartMaster();
  ASSERT_SOME(master);

  MockExecutor exec(DEFAULT_EXECUTOR_ID);

  slave::Flags flags = CreateSlaveFlags();
  flags.checkpoint = true;

  Try<PID<Slave> > slave = StartSlave(&exec, flags);
  ASSERT_SOME(slave);

  FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line.
  frameworkInfo = DEFAULT_FRAMEWORK_INFO;
  frameworkInfo.set_checkpoint(true); // Enable checkpointing.

  MockScheduler sched;
  MesosSchedulerDriver driver(
      &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);

  FrameworkID frameworkId;
  EXPECT_CALL(sched, registered(_, _, _))
    .WillOnce(SaveArg<1>(&frameworkId));

  Future<vector<Offer> > offers;
  EXPECT_CALL(sched, resourceOffers(_, _))
    .WillOnce(FutureArg<1>(&offers))
    .WillRepeatedly(Return()); // Ignore subsequent offers.

  driver.start();

  AWAIT_READY(offers);
  EXPECT_NE(0u, offers.get().size());

  ExecutorDriver* execDriver;
  EXPECT_CALL(exec, registered(_, _, _, _))
      .WillOnce(SaveArg<0>(&execDriver));

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));

  // Drop the first update, so that status update manager
  // resends the update.
  Future<StatusUpdateMessage> statusUpdateMessage =
    DROP_PROTOBUF(StatusUpdateMessage(), master.get(), _);

  Clock::pause();

  driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0]));

  AWAIT_READY(statusUpdateMessage);
  StatusUpdate update = statusUpdateMessage.get().update();

  Future<TaskStatus> status;
  EXPECT_CALL(sched, statusUpdate(_, _))
    .WillOnce(FutureArg<1>(&status));

  // This is the ACK for the retried update.
  Future<Nothing> ack =
    FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement);

  Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL);

  AWAIT_READY(status);

  EXPECT_EQ(TASK_RUNNING, status.get().state());

  AWAIT_READY(ack);

  // Now send TASK_FINISHED update so that the status update manager
  // is waiting for its ACK, which it never gets because we drop the
  // update.
  DROP_PROTOBUFS(StatusUpdateMessage(), master.get(), _);

  Future<Nothing> update2 = FUTURE_DISPATCH(_, &Slave::_statusUpdate);

  TaskStatus status2 = status.get();
  status2.set_state(TASK_FINISHED);

  execDriver->sendStatusUpdate(status2);

  AWAIT_READY(update2);

  // This is to catch the duplicate ack for TASK_RUNNING.
  Future<Nothing> duplicateAck =
      FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement);

  // Now send a duplicate ACK for the TASK_RUNNING update.
  process::dispatch(
      slave.get(),
      &Slave::statusUpdateAcknowledgement,
      update.slave_id(),
      frameworkId,
      update.status().task_id(),
      update.uuid());

  AWAIT_READY(duplicateAck);

  Clock::resume();

  EXPECT_CALL(exec, shutdown(_))
    .Times(AtMost(1));

  driver.stop();
  driver.join();

  Shutdown();
}