Beispiel #1
0
  void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    if (!flags.long_running) {
      if (status.state() == TASK_FAILED &&
          status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) {
        // NOTE: We expect TASK_FAILED when this scheduler is launched by the
        // balloon_framework_test.sh shell script. The abort here ensures the
        // script considers the test result as "PASS".
        driver->abort();
      } else if (status.state() == TASK_FAILED ||
          status.state() == TASK_FINISHED ||
          status.state() == TASK_KILLED ||
          status.state() == TASK_LOST ||
          status.state() == TASK_ERROR) {
        driver->stop();
      }
    }

    if (stringify(tasksLaunched - 1) != status.task_id().value()) {
      // We might receive messages from older tasks. Ignore them.
      LOG(INFO) << "Ignoring status update from older task "
                << status.task_id();
      return;
    }

    switch (status.state()) {
      case TASK_FINISHED:
        taskActive = false;
        ++metrics.tasks_finished;
        break;
      case TASK_FAILED:
        taskActive = false;
        if (status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) {
          ++metrics.tasks_oomed;
          break;
        }

        // NOTE: Fetching the executor (e.g. `--executor_uri`) may fail
        // occasionally if the URI is rate limited. This case is common
        // enough that it makes sense to track this failure metric separately.
        if (status.reason() == TaskStatus::REASON_CONTAINER_LAUNCH_FAILED) {
          ++metrics.launch_failures;
          break;
        }
      case TASK_KILLED:
      case TASK_LOST:
      case TASK_ERROR:
        taskActive = false;

        ++metrics.abnormal_terminations;
        break;
      default:
        break;
    }
  }
Beispiel #2
0
bool operator == (const TaskStatus& left, const TaskStatus& right)
{
    return left.task_id() == right.task_id() &&
           left.state() == right.state() &&
           left.data() == right.data() &&
           left.message() == right.message() &&
           left.slave_id() == right.slave_id() &&
           left.timestamp() == right.timestamp() &&
           left.executor_id() == right.executor_id() &&
           left.healthy() == right.healthy() &&
           left.source() == right.source() &&
           left.reason() == right.reason() &&
           left.uuid() == right.uuid();
}
Beispiel #3
0
  void update(const TaskStatus& status)
  {
    CHECK_EQ(SUBSCRIBED, state);

    LOG(INFO)
      << "Task " << status.task_id().value()
      << " is in state " << TaskState_Name(status.state())
      << (status.has_message() ? " with message: " + status.message() : "");

    if (status.has_uuid()) {
      Call call;
      call.set_type(Call::ACKNOWLEDGE);

      CHECK(framework.has_id());
      call.mutable_framework_id()->CopyFrom(framework.id());

      Call::Acknowledge* acknowledge = call.mutable_acknowledge();
      acknowledge->mutable_agent_id()->CopyFrom(status.agent_id());
      acknowledge->mutable_task_id()->CopyFrom(status.task_id());
      acknowledge->set_uuid(status.uuid());

      mesos->send(call);
    }

    if (status.state() == TaskState::TASK_KILLED ||
        status.state() == TaskState::TASK_LOST ||
        status.state() == TaskState::TASK_FAILED ||
        status.state() == TaskState::TASK_ERROR) {
      // Launch on an invalid offer should not be
      // counted as abnormal termination.
      if (status.reason() != TaskStatus::REASON_INVALID_OFFERS) {
        ++metrics.abnormal_terminations;
      }
    }
  }
Beispiel #4
0
  virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    std::cout << "Task in state " << status.state() << std::endl;
    std::cout << "Source: " << status.source() << std::endl;
    std::cout << "Reason: " << status.reason() << std::endl;
    if (status.has_message()) {
      std::cout << "Message: " << status.message() << std::endl;
    }

    if (protobuf::isTerminalState(status.state())) {
      // NOTE: We expect TASK_FAILED here. The abort here ensures the shell
      // script invoking this test, considers the test result as 'PASS'.
      if (status.state() == TASK_FAILED) {
        driver->abort();
      } else {
        driver->stop();
      }
    }
  }
  void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    if (stringify(tasksLaunched - 1) != status.task_id().value()) {
      // We might receive messages from older tasks. Ignore them.
      LOG(INFO) << "Ignoring status update from older task "
                << status.task_id();
      return;
    }

    switch (status.state()) {
    case TASK_FINISHED:
      if (flags.run_once) {
          driver->stop();
          break;
      }

      taskActive = false;
      ++metrics.tasks_finished;
      break;
    case TASK_FAILED:
      if (flags.run_once) {
          driver->abort();
          break;
      }

      taskActive = false;

      if (status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_DISK) {
        ++metrics.tasks_disk_full;

        // Increment abnormal_termination metric counter in case the task
        // wasn't supposed to consume beyond its disk quota but still got
        // terminated because of disk overuse.
        if (flags.disk_use_limit >= DISK_PER_TASK) {
          ++metrics.abnormal_terminations;
        }

        break;
      }

      ++metrics.abnormal_terminations;
      break;
    case TASK_KILLED:
    case TASK_LOST:
    case TASK_ERROR:
    case TASK_DROPPED:
    case TASK_UNREACHABLE:
    case TASK_GONE:
    case TASK_GONE_BY_OPERATOR:
      if (flags.run_once) {
        driver->abort();
      }

      taskActive = false;
      ++metrics.abnormal_terminations;
      break;
    case TASK_STARTING:
    case TASK_RUNNING:
    case TASK_STAGING:
    case TASK_KILLING:
    case TASK_UNKNOWN:
      break;
    }
  }