void update(const TaskStatus& status)
  {
    CHECK_EQ(SUBSCRIBED, state);

    LOG(INFO)
      << "Task " << status.task_id().value()
      << " is in state " << TaskState_Name(status.state())
      << (status.has_message() ? " with message: " + status.message() : "");

    if (status.has_uuid()) {
      Call call;
      call.set_type(Call::ACKNOWLEDGE);

      CHECK(framework.has_id());
      call.mutable_framework_id()->CopyFrom(framework.id());

      Call::Acknowledge* acknowledge = call.mutable_acknowledge();
      acknowledge->mutable_agent_id()->CopyFrom(status.agent_id());
      acknowledge->mutable_task_id()->CopyFrom(status.task_id());
      acknowledge->set_uuid(status.uuid());

      mesos->send(call);
    }

    if (status.state() == TaskState::TASK_KILLED ||
        status.state() == TaskState::TASK_LOST ||
        status.state() == TaskState::TASK_FAILED ||
        status.state() == TaskState::TASK_ERROR) {
      ++metrics.abnormal_terminations;
    }
  }
Example #2
0
  void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    if (!flags.long_running) {
      if (status.state() == TASK_FAILED &&
          status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) {
        // NOTE: We expect TASK_FAILED when this scheduler is launched by the
        // balloon_framework_test.sh shell script. The abort here ensures the
        // script considers the test result as "PASS".
        driver->abort();
      } else if (status.state() == TASK_FAILED ||
          status.state() == TASK_FINISHED ||
          status.state() == TASK_KILLED ||
          status.state() == TASK_LOST ||
          status.state() == TASK_ERROR) {
        driver->stop();
      }
    }

    if (stringify(tasksLaunched - 1) != status.task_id().value()) {
      // We might receive messages from older tasks. Ignore them.
      LOG(INFO) << "Ignoring status update from older task "
                << status.task_id();
      return;
    }

    switch (status.state()) {
      case TASK_FINISHED:
        taskActive = false;
        ++metrics.tasks_finished;
        break;
      case TASK_FAILED:
        taskActive = false;
        if (status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) {
          ++metrics.tasks_oomed;
          break;
        }

        // NOTE: Fetching the executor (e.g. `--executor_uri`) may fail
        // occasionally if the URI is rate limited. This case is common
        // enough that it makes sense to track this failure metric separately.
        if (status.reason() == TaskStatus::REASON_CONTAINER_LAUNCH_FAILED) {
          ++metrics.launch_failures;
          break;
        }
      case TASK_KILLED:
      case TASK_LOST:
      case TASK_ERROR:
        taskActive = false;

        ++metrics.abnormal_terminations;
        break;
      default:
        break;
    }
  }
Example #3
0
void ChapelScheduler::statusUpdate(SchedulerDriver* driver, const TaskStatus& status) {

   if (status.state() == TASK_FINISHED) {
      tasksFinished+=1;
      cout << "ChapelScheduler::statusUpdate\tTask " << status.task_id().value() << " finished of # tasksLaunched " << tasksLaunched << " # finished " << tasksFinished << endl;
   }

   if (status.state() == TASK_FAILED) {
      cout << "ChapelScheduler::statusUpdate\tTask " << status.task_id().value() << " FAILED!" << endl;
      terminateAllTasks(schedulerDriver);
      taskExecError=true;
      driver->stop();
   }

   if (status.state() == TASK_LOST) {
      cout << "ChapelScheduler::statusUpdate\tTask " << status.task_id().value() << " LOST!" << endl;
      terminateAllTasks(schedulerDriver);
      taskExecError=true;
      map<string, TaskInfo>::iterator rm = launchedTsks.find(status.task_id().value());
      if(rm != launchedTsks.end()) { launchedTsks.erase(rm); }
   }

   if (status.state() == TASK_KILLED) {
      cout << "ChapelScheduler::statusUpdate\tTask " << status.task_id().value() << " KILLED!" << endl;
      terminateAllTasks(schedulerDriver);
      taskExecError=true;
      map<string, TaskInfo>::iterator rm = launchedTsks.find(status.task_id().value());
      if(rm != launchedTsks.end()) { launchedTsks.erase(rm); }
   }

   cout << "ChapelScheduler::statusUpdate\tMet termination criteria?\t" << (tasksFinished == tasksLaunched) << " " << tasksFinished << " " << tasksLaunched << " " << taskExecError << endl;

   if( taskExecError || ((tasksFinished == tasksLaunched) || (tasksFinished == cpusReq))) {

      if(tasksLaunched < tasksFinished) {
         cout << "ChapelScheduler::statusUpdate\tError getting nodes launched for the batch job! Try re-running the code!" << endl;
      }

      // Wait to receive any pending framework messages
      //
      // If some framework messages are lost, it may hang indefinitely
      // to solve the indefinite "hang", numAttempts caps out and then 
      // terminates the while loop.
      //
      int attempts = 0;
      while(tasksFinished != tasksLaunched && attempts < numAttempts) {
         cout << "ChapelScheduler::statusUpdate\tExecution halted! Waiting for remote nodes to catch up! Attempts\t" << attempts << endl;
         sleep(1);
         attempts+=1;
      }

      cout << "All Chapel task for this framework instance are complete! Shutting down!" << endl;
      driver->stop();
   }
}
Example #4
0
 virtual void statusUpdate(
     SchedulerDriver* driver,
     const TaskStatus& status)
 {
   CHECK_EQ(name, status.task_id().value());
   cout << "Received status update " << status.state()
        << " for task " << status.task_id() << endl;
   if (protobuf::isTerminalState(status.state())) {
     driver->stop();
   }
 }
  virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    int taskId = lexical_cast<int>(status.task_id().value());

    cout << "Task " << taskId << " is in state " << status.state() << endl;

    if (status.state() == TASK_FINISHED)
      tasksFinished++;

    if (tasksFinished == totalTasks)
      driver->stop();
  }
Example #6
0
bool operator == (const TaskStatus& left, const TaskStatus& right)
{
    return left.task_id() == right.task_id() &&
           left.state() == right.state() &&
           left.data() == right.data() &&
           left.message() == right.message() &&
           left.slave_id() == right.slave_id() &&
           left.timestamp() == right.timestamp() &&
           left.executor_id() == right.executor_id() &&
           left.healthy() == right.healthy() &&
           left.source() == right.source() &&
           left.reason() == right.reason() &&
           left.uuid() == right.uuid();
}
Example #7
0
void CephSchedulerAgent<T>::statusUpdate(
      T* driver,
      const TaskStatus& status)
{
  LOG(INFO) << "Got status update from " << status.source();
  string taskId = status.task_id().value();
  if (status.state() == TASK_RUNNING) {
    LOG(INFO) << taskId << " is Running!";
    stateMachine->updateTaskToRunning(taskId);
    if (status.has_message()){
      vector<string> tokens = StringUtil::explode(status.message(), '.');
      if ((MessageToScheduler)lexical_cast<int>(tokens[0])
          == MessageToScheduler::CONSUMED_OSD_ID
          ){
        string consumedOSDId = tokens[1];
        LOG(INFO) << "Got message of \"consumed_OSD_ID\": "<<consumedOSDId;

      }
    }
  } else if (status.state() == TASK_STARTING) {
    LOG(INFO) << taskId << " is Waiting OSDID, ready for assign osd id!";
    stateMachine->updateTaskToWaitingOSDID(taskId);
  } else if (status.state() == TASK_FAILED) {
    LOG(INFO) << taskId << " failed";
    stateMachine->updateTaskToFailed(taskId);
    //TODO: if has message , add the OSD ID back to StateMachine
  } else if (status.state() == TASK_FINISHED) {
    //only disk executor will have this finished status
    if (status.has_message()){
      vector<string> tokens = StringUtil::explode(status.message(), '.');
      if ((MessageToScheduler)lexical_cast<int>(tokens[0])
          == MessageToScheduler::DISK_READY
          ){
        string failedDevsStr = tokens[1];
        LOG(INFO) << "Got message of \"DISK_READY\": "<<failedDevsStr;
        vector<string> failedDevs = StringUtil::explode(failedDevsStr, ':');
        string hostname = failedDevs[0];
        vector<string> devs; 
        if ("-" != failedDevs[1]) {
          vector<string> devs = StringUtil::explode(failedDevs[1], ',');
        }
        HostConfig* hostconfig = stateMachine->getConfig(hostname);
        //TODO: get this "4" from yml config
        hostconfig->updateDiskPartition(devs,lexical_cast<int>("4"));
        hostconfig->setDiskPreparationDone();
      }
    }
  }
}
Example #8
0
	virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)  {
		string taskId = status.task_id().value();

		cout << "Container " << taskId << " is in state " << taskState[status.state()] << endl;
		

		if (status.state() == TASK_FINISHED)
			containersFinished++;
			
		if (status.state() == TASK_FAILED)
			driver->stop();
		
		cout << "Total complete: " << stringify(containersFinished) << " out of " << stringify(containersAssigned) << endl;
		
		if (containersFinished == containersAssigned)
			driver->stop();
	}
Example #9
0
// Returns a JSON object modeled on a TaskStatus.
JSON::Object model(const TaskStatus& status)
{
  JSON::Object object;
  object.values["state"] = TaskState_Name(status.state());
  object.values["timestamp"] = status.timestamp();

  return object;
}
Example #10
0
  virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    std::cout << "Task in state " << status.state() << std::endl;
    if (status.has_message()) {
      std::cout << "Reason: " << status.message() << std::endl;
    }

    if (protobuf::isTerminalState(status.state())) {
      // NOTE: We expect TASK_FAILED here. The abort here ensures the shell
      // script invoking this test, considers the test result as 'PASS'.
      if (status.state() == TASK_FAILED) {
        driver->abort();
      } else {
        driver->stop();
      }
    }
  }
TEST(ResourceOffersTest, TaskUsesNoResources)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  PID<Master> master = local::launch(1, 2, 1 * Gigabyte, false);

  MockScheduler sched;
  MesosSchedulerDriver driver(&sched, "", DEFAULT_EXECUTOR_INFO, master);

  vector<Offer> offers;

  trigger resourceOffersCall;

  EXPECT_CALL(sched, registered(&driver, _))
    .Times(1);

  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(DoAll(SaveArg<1>(&offers),
                    Trigger(&resourceOffersCall)))
    .WillRepeatedly(Return());

  driver.start();

  WAIT_UNTIL(resourceOffersCall);

  EXPECT_NE(0, offers.size());

  TaskDescription task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers[0].slave_id());

  vector<TaskDescription> tasks;
  tasks.push_back(task);

  TaskStatus status;

  trigger statusUpdateCall;

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(DoAll(SaveArg<1>(&status),
                    Trigger(&statusUpdateCall)));

  driver.launchTasks(offers[0].id(), tasks);

  WAIT_UNTIL(statusUpdateCall);

  EXPECT_EQ(task.task_id(), status.task_id());
  EXPECT_EQ(TASK_LOST, status.state());
  EXPECT_TRUE(status.has_message());
  EXPECT_EQ("Task uses no resources", status.message());

  driver.stop();
  driver.join();

  local::shutdown();
}
Example #12
0
  virtual void statusUpdate(
      SchedulerDriver* driver,
      const TaskStatus& status)
  {
    CHECK_EQ(name, status.task_id().value());
    cout << "Received status update " << status.state()
         << " for task " << status.task_id() << endl;

    cout << "RECEIVED UPDATE:" << endl;
    cout << "Message: " << status.message() << endl;

    if(status.state() == TASK_FINISHED) {
      cout << "=== Dumping data ===" << endl;
      cout << status.data() << endl;
      cout << "====================" << endl;
    }

    if (mesos::internal::protobuf::isTerminalState(status.state())) {
      driver->stop();
    }
  }
Example #13
0
  virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    int taskId = lexical_cast<int>(status.task_id().value());

    cout << "Task " << taskId << " is in state " << status.state() << endl;

    if (status.state() == TASK_FINISHED ||
        status.state() == TASK_FAILED ||
        status.state() == TASK_KILLED ||
        status.state() == TASK_LOST) {
      tasks[taskId].finished = true;
      tasksFinished++;

      if (status.state() == TASK_FINISHED) {
        successfulTasks++;
      }

      if (tasksFinished == tasks.size()) {
        driver->stop();
      }
    }
  }
Example #14
0
  virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    if (status.state() == TASK_FINISHED) {
      cout << "Task " << status.task_id().value() << " finished" << endl;
      tasksFinished++;
    }

    if (tasksFinished == tasksLaunched &&
        crawlQueue.empty() &&
        renderQueue.empty()) {
      // Wait to receive any pending framework messages
      // If some framework messages are lost, it may hang indefinitely.
      while (frameworkMessagesReceived != tasksFinished) {
        sleep(1);
      }
      shutdown();
      driver->stop();
    }
  }
Example #15
0
  void sendStatusUpdate(const TaskStatus& status)
  {
    if (status.state() == TASK_STAGING) {
      VLOG(1) << "Executor is not allowed to send "
              << "TASK_STAGING status update. Aborting!";

      driver->abort();

      Stopwatch stopwatch;
      if (FLAGS_v >= 1) {
        stopwatch.start();
      }

      executor->error(driver, "Attempted to send TASK_STAGING status update");

      VLOG(1) << "Executor::error took " << stopwatch.elapsed();

      return;
    }

    StatusUpdateMessage message;
    StatusUpdate* update = message.mutable_update();
    update->mutable_framework_id()->MergeFrom(frameworkId);
    update->mutable_executor_id()->MergeFrom(executorId);
    update->mutable_slave_id()->MergeFrom(slaveId);
    update->mutable_status()->MergeFrom(status);
    update->set_timestamp(Clock::now().secs());
    update->set_uuid(UUID::random().toBytes());
    message.set_pid(self());

    VLOG(1) << "Executor sending status update " << *update;

    // Capture the status update.
    updates[UUID::fromBytes(update->uuid())] = *update;

    send(slave, message);
  }
Example #16
0
TEST(FaultToleranceTest, SchedulerFailoverFrameworkMessage)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  SimpleAllocator a;
  Master m(&a);
  PID<Master> master = process::spawn(&m);

  MockExecutor exec;

  ExecutorDriver* execDriver;

  EXPECT_CALL(exec, registered(_, _, _, _))
    .WillOnce(SaveArg<0>(&execDriver));

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdate(TASK_RUNNING));

  EXPECT_CALL(exec, shutdown(_))
    .Times(AtMost(1));

  map<ExecutorID, Executor*> execs;
  execs[DEFAULT_EXECUTOR_ID] = &exec;

  TestingIsolationModule isolationModule(execs);

  Resources resources = Resources::parse("cpus:2;mem:1024");

  Slave s(resources, true, &isolationModule);
  PID<Slave> slave = process::spawn(&s);

  BasicMasterDetector detector(master, slave, true);

  MockScheduler sched1;

  MesosSchedulerDriver driver1(&sched1, DEFAULT_FRAMEWORK_INFO, master);

  FrameworkID frameworkId;

  vector<Offer> offers;
  TaskStatus status;
  trigger sched1ResourceOfferCall, sched1StatusUpdateCall;

  EXPECT_CALL(sched1, registered(&driver1, _, _))
    .WillOnce(SaveArg<1>(&frameworkId));
  EXPECT_CALL(sched1, statusUpdate(&driver1, _))
    .WillOnce(DoAll(SaveArg<1>(&status), Trigger(&sched1StatusUpdateCall)));

  EXPECT_CALL(sched1, resourceOffers(&driver1, _))
    .WillOnce(DoAll(SaveArg<1>(&offers),
                    Trigger(&sched1ResourceOfferCall)))
    .WillRepeatedly(Return());

  EXPECT_CALL(sched1, error(&driver1, "Framework failed over"))
    .Times(1);

  driver1.start();

  WAIT_UNTIL(sched1ResourceOfferCall);

  EXPECT_NE(0, offers.size());

  TaskInfo task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers[0].slave_id());
  task.mutable_resources()->MergeFrom(offers[0].resources());
  task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);

  vector<TaskInfo> tasks;
  tasks.push_back(task);

  driver1.launchTasks(offers[0].id(), tasks);

  WAIT_UNTIL(sched1StatusUpdateCall);

  EXPECT_EQ(TASK_RUNNING, status.state());

  MockScheduler sched2;

  FrameworkInfo framework2; // Bug in gcc 4.1.*, must assign on next line.
  framework2 = DEFAULT_FRAMEWORK_INFO;
  framework2.mutable_id()->MergeFrom(frameworkId);

  MesosSchedulerDriver driver2(&sched2, framework2, master);

  trigger sched2RegisteredCall, sched2FrameworkMessageCall;

  EXPECT_CALL(sched2, registered(&driver2, frameworkId, _))
    .WillOnce(Trigger(&sched2RegisteredCall));

  EXPECT_CALL(sched2, frameworkMessage(&driver2, _, _, _))
    .WillOnce(Trigger(&sched2FrameworkMessageCall));

  driver2.start();

  WAIT_UNTIL(sched2RegisteredCall);

  execDriver->sendFrameworkMessage("");

  WAIT_UNTIL(sched2FrameworkMessageCall);

  driver1.stop();
  driver2.stop();

  driver1.join();
  driver2.join();

  process::terminate(slave);
  process::wait(slave);

  process::terminate(master);
  process::wait(master);
}
TEST(MasterTest, TaskRunning)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  SimpleAllocator a;
  Master m(&a);
  PID<Master> master = process::spawn(&m);

  MockExecutor exec;

  trigger shutdownCall;

  EXPECT_CALL(exec, registered(_, _, _, _, _, _))
    .Times(1);

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdate(TASK_RUNNING));

  EXPECT_CALL(exec, shutdown(_))
    .WillOnce(Trigger(&shutdownCall));

  map<ExecutorID, Executor*> execs;
  execs[DEFAULT_EXECUTOR_ID] = &exec;

  TestingIsolationModule isolationModule(execs);

  Resources resources = Resources::parse("cpus:2;mem:1024");

  Slave s(resources, true, &isolationModule);
  PID<Slave> slave = process::spawn(&s);

  BasicMasterDetector detector(master, slave, true);

  MockScheduler sched;
  MesosSchedulerDriver driver(&sched, "", DEFAULT_EXECUTOR_INFO, master);

  vector<Offer> offers;
  TaskStatus status;

  trigger resourceOffersCall, statusUpdateCall, resourcesChangedCall;

  EXPECT_CALL(sched, registered(&driver, _))
    .Times(1);

  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(DoAll(SaveArg<1>(&offers),
                    Trigger(&resourceOffersCall)))
    .WillRepeatedly(Return());

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(DoAll(SaveArg<1>(&status), Trigger(&statusUpdateCall)));

  driver.start();

  WAIT_UNTIL(resourceOffersCall);

  EXPECT_NE(0, offers.size());

  TaskDescription task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers[0].slave_id());
  task.mutable_resources()->MergeFrom(offers[0].resources());

  vector<TaskDescription> tasks;
  tasks.push_back(task);

  EXPECT_CALL(isolationModule,
              resourcesChanged(_, _, Resources(offers[0].resources())))
    .WillOnce(Trigger(&resourcesChangedCall));

  driver.launchTasks(offers[0].id(), tasks);

  WAIT_UNTIL(statusUpdateCall);

  EXPECT_EQ(TASK_RUNNING, status.state());

  WAIT_UNTIL(resourcesChangedCall);

  driver.stop();
  driver.join();

  WAIT_UNTIL(shutdownCall); // To ensure can deallocate MockExecutor.

  process::terminate(slave);
  process::wait(slave);

  process::terminate(master);
  process::wait(master);
}
TEST(MasterTest, FrameworkMessage)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  SimpleAllocator a;
  Master m(&a);
  PID<Master> master = process::spawn(&m);

  MockExecutor exec;

  ExecutorDriver* execDriver;
  string execData;

  trigger execFrameworkMessageCall, shutdownCall;

  EXPECT_CALL(exec, registered(_, _, _, _, _, _))
    .WillOnce(SaveArg<0>(&execDriver));

  EXPECT_CALL(exec, launchTask(_, _))
    .WillOnce(SendStatusUpdate(TASK_RUNNING));

  EXPECT_CALL(exec, frameworkMessage(_, _))
    .WillOnce(DoAll(SaveArg<1>(&execData),
                    Trigger(&execFrameworkMessageCall)));

  EXPECT_CALL(exec, shutdown(_))
    .WillOnce(Trigger(&shutdownCall));

  map<ExecutorID, Executor*> execs;
  execs[DEFAULT_EXECUTOR_ID] = &exec;

  TestingIsolationModule isolationModule(execs);

  Resources resources = Resources::parse("cpus:2;mem:1024");

  Slave s(resources, true, &isolationModule);
  PID<Slave> slave = process::spawn(&s);

  BasicMasterDetector detector(master, slave, true);

  // Launch the first (i.e., failing) scheduler and wait until the
  // first status update message is sent to it (drop the message).

  MockScheduler sched;
  MesosSchedulerDriver schedDriver(&sched, "", DEFAULT_EXECUTOR_INFO, master);

  vector<Offer> offers;
  TaskStatus status;
  string schedData;

  trigger resourceOffersCall, statusUpdateCall, schedFrameworkMessageCall;

  EXPECT_CALL(sched, registered(&schedDriver, _))
    .Times(1);

  EXPECT_CALL(sched, resourceOffers(&schedDriver, _))
    .WillOnce(DoAll(SaveArg<1>(&offers),
                    Trigger(&resourceOffersCall)))
    .WillRepeatedly(Return());

  EXPECT_CALL(sched, statusUpdate(&schedDriver, _))
    .WillOnce(DoAll(SaveArg<1>(&status), Trigger(&statusUpdateCall)));

  EXPECT_CALL(sched, frameworkMessage(&schedDriver, _, _, _))
    .WillOnce(DoAll(SaveArg<3>(&schedData),
                    Trigger(&schedFrameworkMessageCall)));

  schedDriver.start();

  WAIT_UNTIL(resourceOffersCall);

  EXPECT_NE(0, offers.size());

  TaskDescription task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers[0].slave_id());
  task.mutable_resources()->MergeFrom(offers[0].resources());

  vector<TaskDescription> tasks;
  tasks.push_back(task);

  schedDriver.launchTasks(offers[0].id(), tasks);

  WAIT_UNTIL(statusUpdateCall);

  EXPECT_EQ(TASK_RUNNING, status.state());

  string hello = "hello";

  schedDriver.sendFrameworkMessage(offers[0].slave_id(),
				   DEFAULT_EXECUTOR_ID,
				   hello);

  WAIT_UNTIL(execFrameworkMessageCall);

  EXPECT_EQ(hello, execData);

  string reply = "reply";

  execDriver->sendFrameworkMessage(reply);

  WAIT_UNTIL(schedFrameworkMessageCall);

  EXPECT_EQ(reply, schedData);

  schedDriver.stop();
  schedDriver.join();

  WAIT_UNTIL(shutdownCall); // To ensure can deallocate MockExecutor.

  process::terminate(slave);
  process::wait(slave);

  process::terminate(master);
  process::wait(master);
}
Example #19
0
 virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
 {
   int taskId = lexical_cast<int>(status.task_id().value());
   cout << "Task " << taskId << " is in state " << status.state() << endl;
 }
TEST(ResourceOffersTest, ResourcesGetReofferedAfterTaskDescriptionError)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  PID<Master> master = local::launch(1, 2, 1 * Gigabyte, false);

  MockScheduler sched1;
  MesosSchedulerDriver driver1(&sched1, "", DEFAULT_EXECUTOR_INFO, master);

  vector<Offer> offers;

  trigger sched1ResourceOffersCall;

  EXPECT_CALL(sched1, registered(&driver1, _))
    .Times(1);

  EXPECT_CALL(sched1, resourceOffers(&driver1, _))
    .WillOnce(DoAll(SaveArg<1>(&offers),
                    Trigger(&sched1ResourceOffersCall)))
    .WillRepeatedly(Return());

  driver1.start();

  WAIT_UNTIL(sched1ResourceOffersCall);

  EXPECT_NE(0, offers.size());

  TaskDescription task;
  task.set_name("");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers[0].slave_id());

  Resource* cpus = task.add_resources();
  cpus->set_name("cpus");
  cpus->set_type(Value::SCALAR);
  cpus->mutable_scalar()->set_value(0);

  Resource* mem = task.add_resources();
  mem->set_name("mem");
  mem->set_type(Value::SCALAR);
  mem->mutable_scalar()->set_value(1 * Gigabyte);

  vector<TaskDescription> tasks;
  tasks.push_back(task);

  TaskStatus status;

  trigger sched1StatusUpdateCall;

  EXPECT_CALL(sched1, statusUpdate(&driver1, _))
    .WillOnce(DoAll(SaveArg<1>(&status),
                    Trigger(&sched1StatusUpdateCall)));

  driver1.launchTasks(offers[0].id(), tasks);

  WAIT_UNTIL(sched1StatusUpdateCall);

  EXPECT_EQ(task.task_id(), status.task_id());
  EXPECT_EQ(TASK_LOST, status.state());
  EXPECT_TRUE(status.has_message());
  EXPECT_EQ("Task uses invalid resources", status.message());

  driver1.stop();
  driver1.join();

  MockScheduler sched2;
  MesosSchedulerDriver driver2(&sched2, "", DEFAULT_EXECUTOR_INFO, master);

  trigger sched2ResourceOffersCall;

  EXPECT_CALL(sched2, registered(&driver2, _))
    .Times(1);

  EXPECT_CALL(sched2, resourceOffers(&driver2, _))
    .WillOnce(Trigger(&sched2ResourceOffersCall))
    .WillRepeatedly(Return());

  EXPECT_CALL(sched2, offerRescinded(&driver2, _))
    .Times(AtMost(1));

  driver2.start();

  WAIT_UNTIL(sched2ResourceOffersCall);

  driver2.stop();
  driver2.join();

  local::shutdown();
}
Example #21
0
// TOOD(vinod): Disabling this test for now because
// of the following race condition breaking this test:
// We do a driver.launchTasks() after post(noMasterDetected)
// but since dispatch (which is used by launchTasks()) uses
// a different queue than post, it might so happen that the latter
// message is dequeued before the former, thus breaking the test.
TEST(FaultToleranceTest, DISABLED_TaskLost)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  MockFilter filter;
  process::filter(&filter);

  EXPECT_MESSAGE(filter, _, _, _)
    .WillRepeatedly(Return(false));

  SimpleAllocator a;
  Master m(&a);
  PID<Master> master = process::spawn(&m);

  MockExecutor exec;
  EXPECT_CALL(exec, registered(_, _, _, _))
    .Times(0);

  EXPECT_CALL(exec, launchTask(_, _))
    .Times(0);

  EXPECT_CALL(exec, shutdown(_))
    .Times(0);

  map<ExecutorID, Executor*> execs;
  execs[DEFAULT_EXECUTOR_ID] = &exec;

  TestingIsolationModule isolationModule(execs);

  Resources resources = Resources::parse("cpus:2;mem:1024");

  Slave s(resources, true, &isolationModule);
  PID<Slave> slave = process::spawn(&s);

  BasicMasterDetector detector(master, slave, true);

  MockScheduler sched;
  MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master);
  vector<Offer> offers;
  trigger statusUpdateCall, resourceOffersCall;
  TaskStatus status;

  EXPECT_CALL(sched, registered(&driver, _, _))
    .Times(1);

  EXPECT_CALL(sched, resourceOffers(&driver, _))
    .WillOnce(DoAll(SaveArg<1>(&offers),
                    Trigger(&resourceOffersCall)))
    .WillRepeatedly(Return());

  EXPECT_CALL(sched, offerRescinded(&driver, _))
    .Times(AtMost(1));

  EXPECT_CALL(sched, statusUpdate(&driver, _))
    .WillOnce(DoAll(SaveArg<1>(&status),
                    Trigger(&statusUpdateCall)));

  process::Message message;

  EXPECT_MESSAGE(filter, Eq(FrameworkRegisteredMessage().GetTypeName()), _, _)
    .WillOnce(DoAll(SaveArgField<0>(&process::MessageEvent::message, &message),
                    Return(false)));

  driver.start();

  WAIT_UNTIL(resourceOffersCall);

  // Simulate a spurious noMasterDetected event at the scheduler.
  NoMasterDetectedMessage noMasterDetectedMsg;
  process::post(message.to, noMasterDetectedMsg);

  EXPECT_NE(0, offers.size());

  TaskInfo task;
  task.set_name("test task");
  task.mutable_task_id()->set_value("1");
  task.mutable_slave_id()->MergeFrom(offers[0].slave_id());
  task.mutable_resources()->MergeFrom(offers[0].resources());
  task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);

  vector<TaskInfo> tasks;
  tasks.push_back(task);

  driver.launchTasks(offers[0].id(), tasks);

  WAIT_UNTIL(statusUpdateCall);

  EXPECT_EQ(status.state(), TASK_LOST);

  driver.stop();
  driver.join();

  process::terminate(slave);
  process::wait(slave);

  process::terminate(master);
  process::wait(master);

  process::filter(NULL);
}
Example #22
0
  void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
  {
    if (stringify(tasksLaunched - 1) != status.task_id().value()) {
      // We might receive messages from older tasks. Ignore them.
      LOG(INFO) << "Ignoring status update from older task "
                << status.task_id();
      return;
    }

    switch (status.state()) {
    case TASK_FINISHED:
      if (flags.run_once) {
          driver->stop();
          break;
      }

      taskActive = false;
      ++metrics.tasks_finished;
      break;
    case TASK_FAILED:
      if (flags.run_once) {
          driver->abort();
          break;
      }

      taskActive = false;

      if (status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_DISK) {
        ++metrics.tasks_disk_full;

        // Increment abnormal_termination metric counter in case the task
        // wasn't supposed to consume beyond its disk quota but still got
        // terminated because of disk overuse.
        if (flags.disk_use_limit >= DISK_PER_TASK) {
          ++metrics.abnormal_terminations;
        }

        break;
      }

      ++metrics.abnormal_terminations;
      break;
    case TASK_KILLED:
    case TASK_LOST:
    case TASK_ERROR:
    case TASK_DROPPED:
    case TASK_UNREACHABLE:
    case TASK_GONE:
    case TASK_GONE_BY_OPERATOR:
      if (flags.run_once) {
        driver->abort();
      }

      taskActive = false;
      ++metrics.abnormal_terminations;
      break;
    case TASK_STARTING:
    case TASK_RUNNING:
    case TASK_STAGING:
    case TASK_KILLING:
    case TASK_UNKNOWN:
      break;
    }
  }