Example #1
0
Task createTask(
    const TaskInfo& task,
    const TaskState& state,
    const FrameworkID& frameworkId)
{
  Task t;
  t.mutable_framework_id()->CopyFrom(frameworkId);
  t.set_state(state);
  t.set_name(task.name());
  t.mutable_task_id()->CopyFrom(task.task_id());
  t.mutable_slave_id()->CopyFrom(task.slave_id());
  t.mutable_resources()->CopyFrom(task.resources());

  if (task.has_executor()) {
    t.mutable_executor_id()->CopyFrom(task.executor().executor_id());
  }

  if (task.has_labels()) {
    t.mutable_labels()->CopyFrom(task.labels());
  }

  if (task.has_discovery()) {
    t.mutable_discovery()->CopyFrom(task.discovery());
  }

  if (task.has_container()) {
    t.mutable_container()->CopyFrom(task.container());
  }

  // Copy `user` if set.
  if (task.has_command() && task.command().has_user()) {
    t.set_user(task.command().user());
  } else if (task.has_executor() && task.executor().command().has_user()) {
    t.set_user(task.executor().command().user());
  }

  return t;
}
Example #2
0
  void launchTask(ExecutorDriver* driver, const TaskInfo& task)
  {
    CHECK_EQ(REGISTERED, state);

    if (launched) {
      TaskStatus status;
      status.mutable_task_id()->MergeFrom(task.task_id());
      status.set_state(TASK_FAILED);
      status.set_message(
          "Attempted to run multiple tasks using a \"command\" executor");

      driver->sendStatusUpdate(status);
      return;
    }

    // Capture the TaskID.
    taskId = task.task_id();

    // Determine the command to launch the task.
    CommandInfo command;

    if (taskCommand.isSome()) {
      // Get CommandInfo from a JSON string.
      Try<JSON::Object> object = JSON::parse<JSON::Object>(taskCommand.get());
      if (object.isError()) {
        cerr << "Failed to parse JSON: " << object.error() << endl;
        abort();
      }

      Try<CommandInfo> parse = protobuf::parse<CommandInfo>(object.get());
      if (parse.isError()) {
        cerr << "Failed to parse protobuf: " << parse.error() << endl;
        abort();
      }

      command = parse.get();
    } else if (task.has_command()) {
      command = task.command();
    } else {
      CHECK_SOME(override)
        << "Expecting task '" << task.task_id()
        << "' to have a command!";
    }

    if (override.isNone()) {
      // TODO(jieyu): For now, we just fail the executor if the task's
      // CommandInfo is not valid. The framework will receive
      // TASK_FAILED for the task, and will most likely find out the
      // cause with some debugging. This is a temporary solution. A more
      // correct solution is to perform this validation at master side.
      if (command.shell()) {
        CHECK(command.has_value())
          << "Shell command of task '" << task.task_id()
          << "' is not specified!";
      } else {
        CHECK(command.has_value())
          << "Executable of task '" << task.task_id()
          << "' is not specified!";
      }
    }

    cout << "Starting task " << task.task_id() << endl;

    // TODO(benh): Clean this up with the new 'Fork' abstraction.
    // Use pipes to determine which child has successfully changed
    // session. This is needed as the setsid call can fail from other
    // processes having the same group id.
    int pipes[2];
    if (pipe(pipes) < 0) {
      perror("Failed to create a pipe");
      abort();
    }

    // Set the FD_CLOEXEC flags on these pipes.
    Try<Nothing> cloexec = os::cloexec(pipes[0]);
    if (cloexec.isError()) {
      cerr << "Failed to cloexec(pipe[0]): " << cloexec.error() << endl;
      abort();
    }

    cloexec = os::cloexec(pipes[1]);
    if (cloexec.isError()) {
      cerr << "Failed to cloexec(pipe[1]): " << cloexec.error() << endl;
      abort();
    }

    Option<string> rootfs;
    if (sandboxDirectory.isSome()) {
      // If 'sandbox_diretory' is specified, that means the user
      // task specifies a root filesystem, and that root filesystem has
      // already been prepared at COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH.
      // The command executor is responsible for mounting the sandbox
      // into the root filesystem, chrooting into it and changing the
      // user before exec-ing the user process.
      //
      // TODO(gilbert): Consider a better way to detect if a root
      // filesystem is specified for the command task.
#ifdef __linux__
      Result<string> user = os::user();
      if (user.isError()) {
        cerr << "Failed to get current user: " << user.error() << endl;
        abort();
      } else if (user.isNone()) {
        cerr << "Current username is not found" << endl;
        abort();
      } else if (user.get() != "root") {
        cerr << "The command executor requires root with rootfs" << endl;
        abort();
      }

      rootfs = path::join(
          os::getcwd(), COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH);

      string sandbox = path::join(rootfs.get(), sandboxDirectory.get());
      if (!os::exists(sandbox)) {
        Try<Nothing> mkdir = os::mkdir(sandbox);
        if (mkdir.isError()) {
          cerr << "Failed to create sandbox mount point  at '"
               << sandbox << "': " << mkdir.error() << endl;
          abort();
        }
      }

      // Mount the sandbox into the container rootfs.
      // We need to perform a recursive mount because we want all the
      // volume mounts in the sandbox to be also mounted in the container
      // root filesystem. However, since the container root filesystem
      // is also mounted in the sandbox, after the recursive mount we
      // also need to unmount the root filesystem in the mounted sandbox.
      Try<Nothing> mount = fs::mount(
          os::getcwd(),
          sandbox,
          None(),
          MS_BIND | MS_REC,
          NULL);

      if (mount.isError()) {
        cerr << "Unable to mount the work directory into container "
             << "rootfs: " << mount.error() << endl;;
        abort();
      }

      // Umount the root filesystem path in the mounted sandbox after
      // the recursive mount.
      Try<Nothing> unmountAll = fs::unmountAll(path::join(
          sandbox,
          COMMAND_EXECUTOR_ROOTFS_CONTAINER_PATH));
      if (unmountAll.isError()) {
        cerr << "Unable to unmount rootfs under mounted sandbox: "
             << unmountAll.error() << endl;
        abort();
      }
#else
      cerr << "Not expecting root volume with non-linux platform." << endl;
      abort();
#endif // __linux__
    }

    // Prepare the argv before fork as it's not async signal safe.
    char **argv = new char*[command.arguments().size() + 1];
    for (int i = 0; i < command.arguments().size(); i++) {
      argv[i] = (char*) command.arguments(i).c_str();
    }
    argv[command.arguments().size()] = NULL;

    // Prepare the command log message.
    string commandString;
    if (override.isSome()) {
      char** argv = override.get();
      // argv is guaranteed to be NULL terminated and we rely on
      // that fact to print command to be executed.
      for (int i = 0; argv[i] != NULL; i++) {
        commandString += string(argv[i]) + " ";
      }
    } else if (command.shell()) {
Example #3
0
  void launchTask(ExecutorDriver* driver, const TaskInfo& task)
  {
    if (launched) {
      TaskStatus status;
      status.mutable_task_id()->MergeFrom(task.task_id());
      status.set_state(TASK_FAILED);
      status.set_message(
          "Attempted to run multiple tasks using a \"command\" executor");

      driver->sendStatusUpdate(status);
      return;
    }

    CHECK(task.has_command()) << "Expecting task " << task.task_id()
                              << " to have a command!";

    std::cout << "Starting task " << task.task_id() << std::endl;

    // TODO(benh): Clean this up with the new 'Fork' abstraction.
    // Use pipes to determine which child has successfully changed
    // session. This is needed as the setsid call can fail from other
    // processes having the same group id.
    int pipes[2];
    if (pipe(pipes) < 0) {
      perror("Failed to create a pipe");
      abort();
    }

    // Set the FD_CLOEXEC flags on these pipes
    Try<Nothing> cloexec = os::cloexec(pipes[0]);
    if (cloexec.isError()) {
      std::cerr << "Failed to cloexec(pipe[0]): " << cloexec.error()
                << std::endl;
      abort();
    }

    cloexec = os::cloexec(pipes[1]);
    if (cloexec.isError()) {
      std::cerr << "Failed to cloexec(pipe[1]): " << cloexec.error()
                << std::endl;
      abort();
    }

    if ((pid = fork()) == -1) {
      std::cerr << "Failed to fork to run '" << task.command().value() << "': "
                << strerror(errno) << std::endl;
      abort();
    }

    if (pid == 0) {
      // In child process, we make cleanup easier by putting process
      // into it's own session.
      os::close(pipes[0]);

      // NOTE: We setsid() in a loop because setsid() might fail if another
      // process has the same process group id as the calling process.
      while ((pid = setsid()) == -1) {
        perror("Could not put command in its own session, setsid");

        std::cout << "Forking another process and retrying" << std::endl;

        if ((pid = fork()) == -1) {
          perror("Failed to fork to launch command");
          abort();
        }

        if (pid > 0) {
          // In parent process. It is ok to suicide here, because
          // we're not watching this process.
          exit(0);
        }
      }

      if (write(pipes[1], &pid, sizeof(pid)) != sizeof(pid)) {
        perror("Failed to write PID on pipe");
        abort();
      }

      os::close(pipes[1]);

      // The child has successfully setsid, now run the command.
      std::cout << "sh -c '" << task.command().value() << "'" << std::endl;
      execl("/bin/sh", "sh", "-c",
            task.command().value().c_str(), (char*) NULL);
      perror("Failed to exec");
      abort();
    }

    // In parent process.
    os::close(pipes[1]);

    // Get the child's pid via the pipe.
    if (read(pipes[0], &pid, sizeof(pid)) == -1) {
      std::cerr << "Failed to get child PID from pipe, read: "
                << strerror(errno) << std::endl;
      abort();
    }

    os::close(pipes[0]);

    std::cout << "Forked command at " << pid << std::endl;

    // Monitor this process.
    process::reap(pid)
      .onAny(defer(self(),
                   &Self::reaped,
                   driver,
                   task.task_id(),
                   pid,
                   lambda::_1));

    TaskStatus status;
    status.mutable_task_id()->MergeFrom(task.task_id());
    status.set_state(TASK_RUNNING);
    driver->sendStatusUpdate(status);

    launched = true;
  }
Example #4
0
  void launchTask(ExecutorDriver* driver, const TaskInfo& task)
  {
    if (run.isSome()) {
      // TODO(alexr): Use `protobuf::createTaskStatus()`
      // instead of manually setting fields.
      TaskStatus status;
      status.mutable_task_id()->CopyFrom(task.task_id());
      status.set_state(TASK_FAILED);
      status.set_message(
          "Attempted to run multiple tasks using a \"docker\" executor");

      driver->sendStatusUpdate(status);
      return;
    }

    // Capture the TaskID.
    taskId = task.task_id();

    // Capture the kill policy.
    if (task.has_kill_policy()) {
      killPolicy = task.kill_policy();
    }

    LOG(INFO) << "Starting task " << taskId.get();

    CHECK(task.has_container());
    CHECK(task.has_command());

    CHECK(task.container().type() == ContainerInfo::DOCKER);

    Try<Docker::RunOptions> runOptions = Docker::RunOptions::create(
        task.container(),
        task.command(),
        containerName,
        sandboxDirectory,
        mappedDirectory,
        task.resources() + task.executor().resources(),
        cgroupsEnableCfs,
        taskEnvironment,
        None(), // No extra devices.
        defaultContainerDNS
    );

    if (runOptions.isError()) {
      // TODO(alexr): Use `protobuf::createTaskStatus()`
      // instead of manually setting fields.
      TaskStatus status;
      status.mutable_task_id()->CopyFrom(task.task_id());
      status.set_state(TASK_FAILED);
      status.set_message(
        "Failed to create docker run options: " + runOptions.error());

      driver->sendStatusUpdate(status);

      _stop();
      return;
    }

    // We're adding task and executor resources to launch docker since
    // the DockerContainerizer updates the container cgroup limits
    // directly and it expects it to be the sum of both task and
    // executor resources. This does leave to a bit of unaccounted
    // resources for running this executor, but we are assuming
    // this is just a very small amount of overcommit.
    run = docker->run(
        runOptions.get(),
        Subprocess::FD(STDOUT_FILENO),
        Subprocess::FD(STDERR_FILENO));

    run->onAny(defer(self(), &Self::reaped, lambda::_1));

    // Delay sending TASK_RUNNING status update until we receive
    // inspect output. Note that we store a future that completes
    // after the sending of the running update. This allows us to
    // ensure that the terminal update is sent after the running
    // update (see `reaped()`).
    inspect = docker->inspect(containerName, DOCKER_INSPECT_DELAY)
      .then(defer(self(), [=](const Docker::Container& container) {
        if (!killed) {
          containerPid = container.pid;

          // TODO(alexr): Use `protobuf::createTaskStatus()`
          // instead of manually setting fields.
          TaskStatus status;
          status.mutable_task_id()->CopyFrom(taskId.get());
          status.set_state(TASK_RUNNING);
          status.set_data(container.output);
          if (container.ipAddress.isSome()) {
            // TODO(karya): Deprecated -- Remove after 0.25.0 has shipped.
            Label* label = status.mutable_labels()->add_labels();
            label->set_key("Docker.NetworkSettings.IPAddress");
            label->set_value(container.ipAddress.get());

            NetworkInfo* networkInfo =
              status.mutable_container_status()->add_network_infos();

            // Copy the NetworkInfo if it is specified in the
            // ContainerInfo. A Docker container has at most one
            // NetworkInfo, which is validated in containerizer.
            if (task.container().network_infos().size() > 0) {
              networkInfo->CopyFrom(task.container().network_infos(0));
              networkInfo->clear_ip_addresses();
            }

            NetworkInfo::IPAddress* ipAddress = networkInfo->add_ip_addresses();
            ipAddress->set_ip_address(container.ipAddress.get());

            containerNetworkInfo = *networkInfo;
          }
          driver->sendStatusUpdate(status);
        }

        return Nothing();
      }));

    inspect.onFailed(defer(self(), [=](const string& failure) {
      LOG(ERROR) << "Failed to inspect container '" << containerName << "'"
                 << ": " << failure;

      // TODO(bmahler): This is fatal, try to shut down cleanly.
      // Since we don't have a container id, we can only discard
      // the run future.
    }));

    inspect.onReady(defer(self(), &Self::launchCheck, task));

    inspect.onReady(
        defer(self(), &Self::launchHealthCheck, containerName, task));
  }