void update(const TaskStatus& status)
  {
    CHECK_EQ(SUBSCRIBED, state);

    LOG(INFO)
      << "Task " << status.task_id().value()
      << " is in state " << TaskState_Name(status.state())
      << (status.has_message() ? " with message: " + status.message() : "");

    if (status.has_uuid()) {
      Call call;
      call.set_type(Call::ACKNOWLEDGE);

      CHECK(framework.has_id());
      call.mutable_framework_id()->CopyFrom(framework.id());

      Call::Acknowledge* acknowledge = call.mutable_acknowledge();
      acknowledge->mutable_agent_id()->CopyFrom(status.agent_id());
      acknowledge->mutable_task_id()->CopyFrom(status.task_id());
      acknowledge->set_uuid(status.uuid());

      mesos->send(call);
    }

    if (status.state() == TaskState::TASK_KILLED ||
        status.state() == TaskState::TASK_LOST ||
        status.state() == TaskState::TASK_FAILED ||
        status.state() == TaskState::TASK_ERROR) {
      ++metrics.abnormal_terminations;
    }
  }
Esempio n. 2
0
  void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    if (!flags.long_running) {
      if (status.state() == TASK_FAILED &&
          status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) {
        // NOTE: We expect TASK_FAILED when this scheduler is launched by the
        // balloon_framework_test.sh shell script. The abort here ensures the
        // script considers the test result as "PASS".
        driver->abort();
      } else if (status.state() == TASK_FAILED ||
          status.state() == TASK_FINISHED ||
          status.state() == TASK_KILLED ||
          status.state() == TASK_LOST ||
          status.state() == TASK_ERROR) {
        driver->stop();
      }
    }

    if (stringify(tasksLaunched - 1) != status.task_id().value()) {
      // We might receive messages from older tasks. Ignore them.
      LOG(INFO) << "Ignoring status update from older task "
                << status.task_id();
      return;
    }

    switch (status.state()) {
      case TASK_FINISHED:
        taskActive = false;
        ++metrics.tasks_finished;
        break;
      case TASK_FAILED:
        taskActive = false;
        if (status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) {
          ++metrics.tasks_oomed;
          break;
        }

        // NOTE: Fetching the executor (e.g. `--executor_uri`) may fail
        // occasionally if the URI is rate limited. This case is common
        // enough that it makes sense to track this failure metric separately.
        if (status.reason() == TaskStatus::REASON_CONTAINER_LAUNCH_FAILED) {
          ++metrics.launch_failures;
          break;
        }
      case TASK_KILLED:
      case TASK_LOST:
      case TASK_ERROR:
        taskActive = false;

        ++metrics.abnormal_terminations;
        break;
      default:
        break;
    }
  }
Esempio n. 3
0
 virtual void statusUpdate(
     SchedulerDriver* driver,
     const TaskStatus& status)
 {
   CHECK_EQ(name, status.task_id().value());
   cout << "Received status update " << status.state()
        << " for task " << status.task_id() << endl;
   if (protobuf::isTerminalState(status.state())) {
     driver->stop();
   }
 }
Esempio n. 4
0
bool operator == (const TaskStatus& left, const TaskStatus& right)
{
    return left.task_id() == right.task_id() &&
           left.state() == right.state() &&
           left.data() == right.data() &&
           left.message() == right.message() &&
           left.slave_id() == right.slave_id() &&
           left.timestamp() == right.timestamp() &&
           left.executor_id() == right.executor_id() &&
           left.healthy() == right.healthy() &&
           left.source() == right.source() &&
           left.reason() == right.reason() &&
           left.uuid() == right.uuid();
}
TEST(ResourceOffersTest, TaskUsesNoResources)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  PID<Master> master = local::launch(1, 2, 1 * Gigabyte, false);

  MockScheduler sched;
  MesosSchedulerDriver driver(&sched, "", DEFAULT_EXECUTOR_INFO, master);

  vector<Offer> offers;

  trigger resourceOffersCall;

  EXPECT_CALL(sched, registered(&driver, _))
    .Times(1);

  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(DoAll(SaveArg<1>(&offers),
                    Trigger(&resourceOffersCall)))
    .WillRepeatedly(Return());

  driver.start();

  WAIT_UNTIL(resourceOffersCall);

  EXPECT_NE(0, offers.size());

  TaskDescription task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers[0].slave_id());

  vector<TaskDescription> tasks;
  tasks.push_back(task);

  TaskStatus status;

  trigger statusUpdateCall;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(DoAll(SaveArg<1>(&status),
                    Trigger(&statusUpdateCall)));

  driver.launchTasks(offers[0].id(), tasks);

  WAIT_UNTIL(statusUpdateCall);

  EXPECT_EQ(task.task_id(), status.task_id());
  EXPECT_EQ(TASK_LOST, status.state());
  EXPECT_TRUE(status.has_message());
  EXPECT_EQ("Task uses no resources", status.message());

  driver.stop();
  driver.join();

  local::shutdown();
}
Esempio n. 6
0
  virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    int taskId = lexical_cast<int>(status.task_id().value());

    cout << "Task " << taskId << " is in state " << status.state() << endl;

    if (status.state() == TASK_FINISHED)
      tasksFinished++;

    if (tasksFinished == totalTasks)
      driver->stop();
  }
Esempio n. 7
0
  virtual void statusUpdate(
      SchedulerDriver* driver,
      const TaskStatus& status)
  {
    CHECK_EQ(name, status.task_id().value());
    cout << "Received status update " << status.state()
         << " for task " << status.task_id() << endl;

    cout << "RECEIVED UPDATE:" << endl;
    cout << "Message: " << status.message() << endl;

    if(status.state() == TASK_FINISHED) {
      cout << "=== Dumping data ===" << endl;
      cout << status.data() << endl;
      cout << "====================" << endl;
    }

    if (mesos::internal::protobuf::isTerminalState(status.state())) {
      driver->stop();
    }
  }
Esempio n. 8
0
void CephSchedulerAgent<T>::statusUpdate(
      T* driver,
      const TaskStatus& status)
{
  LOG(INFO) << "Got status update from " << status.source();
  string taskId = status.task_id().value();
  if (status.state() == TASK_RUNNING) {
    LOG(INFO) << taskId << " is Running!";
    stateMachine->updateTaskToRunning(taskId);
    if (status.has_message()){
      vector<string> tokens = StringUtil::explode(status.message(), '.');
      if ((MessageToScheduler)lexical_cast<int>(tokens[0])
          == MessageToScheduler::CONSUMED_OSD_ID
          ){
        string consumedOSDId = tokens[1];
        LOG(INFO) << "Got message of \"consumed_OSD_ID\": "<<consumedOSDId;

      }
    }
  } else if (status.state() == TASK_STARTING) {
    LOG(INFO) << taskId << " is Waiting OSDID, ready for assign osd id!";
    stateMachine->updateTaskToWaitingOSDID(taskId);
  } else if (status.state() == TASK_FAILED) {
    LOG(INFO) << taskId << " failed";
    stateMachine->updateTaskToFailed(taskId);
    //TODO: if has message , add the OSD ID back to StateMachine
  } else if (status.state() == TASK_FINISHED) {
    //only disk executor will have this finished status
    if (status.has_message()){
      vector<string> tokens = StringUtil::explode(status.message(), '.');
      if ((MessageToScheduler)lexical_cast<int>(tokens[0])
          == MessageToScheduler::DISK_READY
          ){
        string failedDevsStr = tokens[1];
        LOG(INFO) << "Got message of \"DISK_READY\": "<<failedDevsStr;
        vector<string> failedDevs = StringUtil::explode(failedDevsStr, ':');
        string hostname = failedDevs[0];
        vector<string> devs; 
        if ("-" != failedDevs[1]) {
          vector<string> devs = StringUtil::explode(failedDevs[1], ',');
        }
        HostConfig* hostconfig = stateMachine->getConfig(hostname);
        //TODO: get this "4" from yml config
        hostconfig->updateDiskPartition(devs,lexical_cast<int>("4"));
        hostconfig->setDiskPreparationDone();
      }
    }
  }
}
Esempio n. 9
0
void ChapelScheduler::statusUpdate(SchedulerDriver* driver, const TaskStatus& status) {

   if (status.state() == TASK_FINISHED) {
      tasksFinished+=1;
      cout << "ChapelScheduler::statusUpdate\tTask " << status.task_id().value() << " finished of # tasksLaunched " << tasksLaunched << " # finished " << tasksFinished << endl;
   }

   if (status.state() == TASK_FAILED) {
      cout << "ChapelScheduler::statusUpdate\tTask " << status.task_id().value() << " FAILED!" << endl;
      terminateAllTasks(schedulerDriver);
      taskExecError=true;
      driver->stop();
   }

   if (status.state() == TASK_LOST) {
      cout << "ChapelScheduler::statusUpdate\tTask " << status.task_id().value() << " LOST!" << endl;
      terminateAllTasks(schedulerDriver);
      taskExecError=true;
      map<string, TaskInfo>::iterator rm = launchedTsks.find(status.task_id().value());
      if(rm != launchedTsks.end()) { launchedTsks.erase(rm); }
   }

   if (status.state() == TASK_KILLED) {
      cout << "ChapelScheduler::statusUpdate\tTask " << status.task_id().value() << " KILLED!" << endl;
      terminateAllTasks(schedulerDriver);
      taskExecError=true;
      map<string, TaskInfo>::iterator rm = launchedTsks.find(status.task_id().value());
      if(rm != launchedTsks.end()) { launchedTsks.erase(rm); }
   }

   cout << "ChapelScheduler::statusUpdate\tMet termination criteria?\t" << (tasksFinished == tasksLaunched) << " " << tasksFinished << " " << tasksLaunched << " " << taskExecError << endl;

   if( taskExecError || ((tasksFinished == tasksLaunched) || (tasksFinished == cpusReq))) {

      if(tasksLaunched < tasksFinished) {
         cout << "ChapelScheduler::statusUpdate\tError getting nodes launched for the batch job! Try re-running the code!" << endl;
      }

      // Wait to receive any pending framework messages
      //
      // If some framework messages are lost, it may hang indefinitely
      // to solve the indefinite "hang", numAttempts caps out and then 
      // terminates the while loop.
      //
      int attempts = 0;
      while(tasksFinished != tasksLaunched && attempts < numAttempts) {
         cout << "ChapelScheduler::statusUpdate\tExecution halted! Waiting for remote nodes to catch up! Attempts\t" << attempts << endl;
         sleep(1);
         attempts+=1;
      }

      cout << "All Chapel task for this framework instance are complete! Shutting down!" << endl;
      driver->stop();
   }
}
Esempio n. 10
0
	virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)  {
		string taskId = status.task_id().value();

		cout << "Container " << taskId << " is in state " << taskState[status.state()] << endl;
		

		if (status.state() == TASK_FINISHED)
			containersFinished++;
			
		if (status.state() == TASK_FAILED)
			driver->stop();
		
		cout << "Total complete: " << stringify(containersFinished) << " out of " << stringify(containersAssigned) << endl;
		
		if (containersFinished == containersAssigned)
			driver->stop();
	}
Esempio n. 11
0
  virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    if (status.state() == TASK_FINISHED) {
      cout << "Task " << status.task_id().value() << " finished" << endl;
      tasksFinished++;
    }

    if (tasksFinished == tasksLaunched &&
        crawlQueue.empty() &&
        renderQueue.empty()) {
      // Wait to receive any pending framework messages
      // If some framework messages are lost, it may hang indefinitely.
      while (frameworkMessagesReceived != tasksFinished) {
        sleep(1);
      }
      shutdown();
      driver->stop();
    }
  }
Esempio n. 12
0
  virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    int taskId = lexical_cast<int>(status.task_id().value());

    cout << "Task " << taskId << " is in state " << status.state() << endl;

    if (status.state() == TASK_FINISHED ||
        status.state() == TASK_FAILED ||
        status.state() == TASK_KILLED ||
        status.state() == TASK_LOST) {
      tasks[taskId].finished = true;
      tasksFinished++;

      if (status.state() == TASK_FINISHED) {
        successfulTasks++;
      }

      if (tasksFinished == tasks.size()) {
        driver->stop();
      }
    }
  }
Esempio n. 13
0
 virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
 {
   int taskId = lexical_cast<int>(status.task_id().value());
   cout << "Task " << taskId << " is in state " << status.state() << endl;
 }
Esempio n. 14
0
  void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    if (stringify(tasksLaunched - 1) != status.task_id().value()) {
      // We might receive messages from older tasks. Ignore them.
      LOG(INFO) << "Ignoring status update from older task "
                << status.task_id();
      return;
    }

    switch (status.state()) {
    case TASK_FINISHED:
      if (flags.run_once) {
          driver->stop();
          break;
      }

      taskActive = false;
      ++metrics.tasks_finished;
      break;
    case TASK_FAILED:
      if (flags.run_once) {
          driver->abort();
          break;
      }

      taskActive = false;

      if (status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_DISK) {
        ++metrics.tasks_disk_full;

        // Increment abnormal_termination metric counter in case the task
        // wasn't supposed to consume beyond its disk quota but still got
        // terminated because of disk overuse.
        if (flags.disk_use_limit >= DISK_PER_TASK) {
          ++metrics.abnormal_terminations;
        }

        break;
      }

      ++metrics.abnormal_terminations;
      break;
    case TASK_KILLED:
    case TASK_LOST:
    case TASK_ERROR:
    case TASK_DROPPED:
    case TASK_UNREACHABLE:
    case TASK_GONE:
    case TASK_GONE_BY_OPERATOR:
      if (flags.run_once) {
        driver->abort();
      }

      taskActive = false;
      ++metrics.abnormal_terminations;
      break;
    case TASK_STARTING:
    case TASK_RUNNING:
    case TASK_STAGING:
    case TASK_KILLING:
    case TASK_UNKNOWN:
      break;
    }
  }
TEST(ResourceOffersTest, ResourcesGetReofferedAfterTaskDescriptionError)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  PID<Master> master = local::launch(1, 2, 1 * Gigabyte, false);

  MockScheduler sched1;
  MesosSchedulerDriver driver1(&sched1, "", DEFAULT_EXECUTOR_INFO, master);

  vector<Offer> offers;

  trigger sched1ResourceOffersCall;

  EXPECT_CALL(sched1, registered(&driver1, _))
    .Times(1);

  EXPECT_CALL(sched1, resourceOffers(&driver1, _))
    .WillOnce(DoAll(SaveArg<1>(&offers),
                    Trigger(&sched1ResourceOffersCall)))
    .WillRepeatedly(Return());

  driver1.start();

  WAIT_UNTIL(sched1ResourceOffersCall);

  EXPECT_NE(0, offers.size());

  TaskDescription task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers[0].slave_id());

  Resource* cpus = task.add_resources();
  cpus->set_name("cpus");
  cpus->set_type(Value::SCALAR);
  cpus->mutable_scalar()->set_value(0);

  Resource* mem = task.add_resources();
  mem->set_name("mem");
  mem->set_type(Value::SCALAR);
  mem->mutable_scalar()->set_value(1 * Gigabyte);

  vector<TaskDescription> tasks;
  tasks.push_back(task);

  TaskStatus status;

  trigger sched1StatusUpdateCall;

  EXPECT_CALL(sched1, statusUpdate(&driver1, _))
    .WillOnce(DoAll(SaveArg<1>(&status),
                    Trigger(&sched1StatusUpdateCall)));

  driver1.launchTasks(offers[0].id(), tasks);

  WAIT_UNTIL(sched1StatusUpdateCall);

  EXPECT_EQ(task.task_id(), status.task_id());
  EXPECT_EQ(TASK_LOST, status.state());
  EXPECT_TRUE(status.has_message());
  EXPECT_EQ("Task uses invalid resources", status.message());

  driver1.stop();
  driver1.join();

  MockScheduler sched2;
  MesosSchedulerDriver driver2(&sched2, "", DEFAULT_EXECUTOR_INFO, master);

  trigger sched2ResourceOffersCall;

  EXPECT_CALL(sched2, registered(&driver2, _))
    .Times(1);

  EXPECT_CALL(sched2, resourceOffers(&driver2, _))
    .WillOnce(Trigger(&sched2ResourceOffersCall))
    .WillRepeatedly(Return());

  EXPECT_CALL(sched2, offerRescinded(&driver2, _))
    .Times(AtMost(1));

  driver2.start();

  WAIT_UNTIL(sched2ResourceOffersCall);

  driver2.stop();
  driver2.join();

  local::shutdown();
}