Ejemplo n.º 1
0
Future<Termination> ExternalContainerizerProcess::wait(
    const ContainerID& containerId)
{
  VLOG(1) << "Wait triggered on container '" << containerId << "'";

  if (!containers.contains(containerId)) {
    LOG(ERROR) << "not running";
    return Failure("Container '" + containerId.value() + "' not running");
  }

  Try<Subprocess> invoked = invoke("wait", containerId);

  if (invoked.isError()) {
    LOG(ERROR) << "not running";
    terminate(containerId);
    return Failure("Wait on container '" + containerId.value()
      + "' failed (error: " + invoked.error() + ")");
  }

  // Await both, input from the pipe as well as an exit of the
  // process.
  await(read(invoked.get().out()), invoked.get().status())
    .onAny(defer(
        PID<ExternalContainerizerProcess>(this),
        &ExternalContainerizerProcess::_wait,
        containerId,
        lambda::_1));

  return containers[containerId]->termination.future();
}
Ejemplo n.º 2
0
Future<Nothing> NetworkCniIsolatorProcess::detach(
    const ContainerID& containerId,
    const std::string& networkName)
{
  CHECK(infos.contains(containerId));
  CHECK(infos[containerId]->containerNetworks.contains(networkName));

  const ContainerNetwork& containerNetwork =
      infos[containerId]->containerNetworks[networkName];

  // Prepare environment variables for CNI plugin.
  map<string, string> environment;
  environment["CNI_COMMAND"] = "DEL";
  environment["CNI_CONTAINERID"] = containerId.value();
  environment["CNI_PATH"] = pluginDir.get();
  environment["CNI_IFNAME"] = containerNetwork.ifName;
  environment["CNI_NETNS"] =
      paths::getNamespacePath(rootDir.get(), containerId.value());

  // Some CNI plugins need to run "iptables" to set up IP Masquerade, so we
  // need to set the "PATH" environment variable so that the plugin can locate
  // the "iptables" executable file.
  Option<string> value = os::getenv("PATH");
  if (value.isSome()) {
    environment["PATH"] = value.get();
  } else {
    environment["PATH"] =
        "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
  }

  const NetworkConfigInfo& networkConfig = networkConfigs[networkName];

  // Invoke the CNI plugin.
  const string& plugin = networkConfig.config.type();
  Try<Subprocess> s = subprocess(
      path::join(pluginDir.get(), plugin),
      {plugin},
      Subprocess::PATH(networkConfig.path),
      Subprocess::PIPE(),
      Subprocess::PATH("/dev/null"),
      NO_SETSID,
      None(),
      environment);

  if (s.isError()) {
    return Failure(
        "Failed to execute the CNI plugin '" + plugin + "': " + s.error());
  }

  return await(s->status(), io::read(s->out().get()))
    .then(defer(
        PID<NetworkCniIsolatorProcess>(this),
        &NetworkCniIsolatorProcess::_detach,
        containerId,
        networkName,
        plugin,
        lambda::_1));
}
Ejemplo n.º 3
0
Future<Nothing> NetworkCniIsolatorProcess::_cleanup(
    const ContainerID& containerId,
    const list<Future<Nothing>>& detaches)
{
  CHECK(infos.contains(containerId));

  vector<string> messages;
  foreach (const Future<Nothing>& detach, detaches) {
    if (!detach.isReady()) {
      messages.push_back(
          detach.isFailed() ? detach.failure() : "discarded");
    }
  }

  if (!messages.empty()) {
    return Failure(strings::join("\n", messages));
  }

  const string containerDir =
      paths::getContainerDir(rootDir.get(), containerId.value());

  const string target =
      paths::getNamespacePath(rootDir.get(), containerId.value());

  if (os::exists(target)) {
    Try<Nothing> unmount = fs::unmount(target);
    if (unmount.isError()) {
      return Failure(
          "Failed to unmount the network namespace handle '" +
          target + "': " + unmount.error());
    }

    LOG(INFO) << "Unmounted the network namespace handle '"
              << target << "' for container " << containerId;
  }

  Try<Nothing> rmdir = os::rmdir(containerDir);
  if (rmdir.isError()) {
    return Failure(
        "Failed to remove the container directory '" +
        containerDir + "': " + rmdir.error());
  }

  LOG(INFO) << "Removed the container directory '" << containerDir << "'";

  infos.erase(containerId);

  return Nothing();
}
Ejemplo n.º 4
0
Future<Nothing> NetworkCniIsolatorProcess::_detach(
    const ContainerID& containerId,
    const std::string& networkName,
    const string& plugin,
    const tuple<Future<Option<int>>, Future<string>>& t)
{
  CHECK(infos.contains(containerId));
  CHECK(infos[containerId]->containerNetworks.contains(networkName));

  Future<Option<int>> status = std::get<0>(t);
  if (!status.isReady()) {
    return Failure(
        "Failed to get the exit status of the CNI plugin '" +
        plugin + "' subprocess: " +
        (status.isFailed() ? status.failure() : "discarded"));
  }

  if (status->isNone()) {
    return Failure(
        "Failed to reap the CNI plugin '" + plugin + "' subprocess");
  }

  if (status.get() == 0) {
    const string ifDir = paths::getInterfaceDir(
        rootDir.get(),
        containerId.value(),
        networkName,
        infos[containerId]->containerNetworks[networkName].ifName);

    Try<Nothing> rmdir = os::rmdir(ifDir);
    if (rmdir.isError()) {
      return Failure(
          "Failed to remove interface directory '" +
          ifDir + "': " + rmdir.error());
    }

    return Nothing();
  }

  // CNI plugin will print result (in case of success) or error (in
  // case of failure) to stdout.
  Future<string> output = std::get<1>(t);
  if (!output.isReady()) {
    return Failure(
        "Failed to read stdout from the CNI plugin '" +
        plugin + "' subprocess: " +
        (output.isFailed() ? output.failure() : "discarded"));
  }

  return Failure(
      "The CNI plugin '" + plugin + "' failed to detach container "
      "from network '" + networkName + "': " + output.get());
}
Ejemplo n.º 5
0
Future<Option<ContainerLaunchInfo>> CgroupsNetClsIsolatorProcess::prepare(
    const ContainerID& containerId,
    const ContainerConfig& containerConfig)
{
  if (infos.contains(containerId)) {
    return Failure("Container has already been prepared");
  }

  // Use this info to create the cgroup, but do not insert it into
  // infos till the cgroup has been created successfully.
  Info info(path::join(flags.cgroups_root, containerId.value()));

  // Create a cgroup for this container.
  Try<bool> exists = cgroups::exists(hierarchy, info.cgroup);
  if (exists.isError()) {
    return Failure("Failed to check if the cgroup already exists: " +
                   exists.error());
  } else if (exists.get()) {
    return Failure("The cgroup already exists");
  }

  Try<Nothing> create = cgroups::create(hierarchy, info.cgroup);
  if (create.isError()) {
    return Failure("Failed to create the cgroup: " + create.error());
  }

  // 'chown' the cgroup so the executor can create nested cgroups. Do
  // not recurse so the control files are still owned by the slave
  // user and thus cannot be changed by the executor.
  if (containerConfig.has_user()) {
    Try<Nothing> chown = os::chown(
        containerConfig.user(),
        path::join(hierarchy, info.cgroup),
        false);

    if (chown.isError()) {
      return Failure("Failed to change ownership of cgroup hierarchy: " +
                     chown.error());
    }
  }

  infos.emplace(containerId, info);

  return update(containerId, containerConfig.executorinfo().resources())
    .then([]() -> Future<Option<ContainerLaunchInfo>> {
      return None();
    });
}
Ejemplo n.º 6
0
Option<Error> validateContainerId(const ContainerID& containerId)
{
  // Slashes are disallowed as these IDs are mapped to directories.
  //
  // Periods are disallowed because our string representation of
  // ContainerID uses periods: <uuid>.<child>.<grandchild>.
  // For example: <uuid>.redis.backup
  //
  // Spaces are disallowed as they can render logs confusing and
  // need escaping on terminals when dealing with paths.
  //
  // TODO(bmahler): Add common/validation.hpp to share ID validation.
  // Note that this however is slightly stricter than other IDs in
  // that we do not allow periods or spaces.
  auto invalidCharacter = [](char c) {
    return iscntrl(c) ||
      c == os::POSIX_PATH_SEPARATOR ||
      c == os::WINDOWS_PATH_SEPARATOR ||
      c == '.' ||
      c == ' ';
  };

  const string& id = containerId.value();

  if (id.empty()) {
    return Error("'ContainerID.value' must be non-empty");
  }

  if (std::any_of(id.begin(), id.end(), invalidCharacter)) {
    return Error("'ContainerID.value' '" + id + "'"
                 " contains invalid characters");
  }

  // TODO(bmahler): Print the invalid field nicely within the error
  // (e.g. 'parent.parent.parent.value'). For now we only have one
  // level of nesting so it's ok.
  if (containerId.has_parent()) {
    Option<Error> parentError = validateContainerId(containerId.parent());

    if (parentError.isSome()) {
      return Error("'ContainerID.parent' is invalid: " + parentError->message);
    }
  }

  return None();
}
Ejemplo n.º 7
0
Future<Nothing> ExternalContainerizerProcess::update(
    const ContainerID& containerId,
    const Resources& resources)
{
  VLOG(1) << "Update triggered on container '" << containerId << "'";

  if (!containers.contains(containerId)) {
    return Failure("Container '" + containerId.value() + "'' not running");
  }

  containers[containerId]->resources = resources;

  // Wrap the Resource protobufs into a ResourceArray protobuf to
  // avoid any problems with streamed protobufs.
  // See http://goo.gl/d1x14F for more on that issue.
  ResourceArray resourceArray;
  foreach (const Resource& r, resources) {
    Resource *resource = resourceArray.add_resource();
    resource->CopyFrom(r);
  }
Ejemplo n.º 8
0
Option<Error> validateContainerId(const ContainerID& containerId)
{
  const string& id = containerId.value();

  // Check common Mesos ID rules.
  Option<Error> error = common::validation::validateID(id);
  if (error.isSome()) {
    return Error(error->message);
  }

  // Check ContainerID specific rules.
  //
  // Periods are disallowed because our string representation of
  // ContainerID uses periods: <uuid>.<child>.<grandchild>.
  // For example: <uuid>.redis.backup
  //
  // Spaces are disallowed as they can render logs confusing and
  // need escaping on terminals when dealing with paths.
  auto invalidCharacter = [](char c) {
    return  c == '.' || c == ' ';
  };

  if (std::any_of(id.begin(), id.end(), invalidCharacter)) {
    return Error("'ContainerID.value' '" + id + "'"
                 " contains invalid characters");
  }

  // TODO(bmahler): Print the invalid field nicely within the error
  // (e.g. 'parent.parent.parent.value'). For now we only have one
  // level of nesting so it's ok.
  if (containerId.has_parent()) {
    Option<Error> parentError = validateContainerId(containerId.parent());

    if (parentError.isSome()) {
      return Error("'ContainerID.parent' is invalid: " + parentError->message);
    }
  }

  return None();
}
Ejemplo n.º 9
0
process::Future<Nothing> CalicoIsolatorProcess::isolate(
    const ContainerID& containerId,
    pid_t pid)
{
    const Info* info = (*infos)[containerId];
    foreach (const Parameter& parameter, parameters.parameter()) {
        if (parameter.key() == isolateKey) {
            std::vector<std::string> argv(7);
            argv[0] = "python";
            argv[1] = parameter.value();
            argv[2] = "isolate";
            argv[3] = stringify(pid);
            argv[4] = containerId.value();
            argv[5] = stringify(info->ipAddress.get());
            argv[6] = stringify(info->profile.get());
            Try<process::Subprocess> child = process::subprocess(pythonPath, argv);
            CHECK_SOME(child);
            waitpid(child.get().pid(), NULL, 0);
            break;
        }
    }
    return Nothing();
}
Ejemplo n.º 10
0
process::Future<Nothing> CalicoIsolatorProcess::cleanup(
    const ContainerID& containerId)
{
    if (!infos->contains(containerId)) {
        LOG(WARNING) << "Ignoring cleanup for unknown container " << containerId;
        return Nothing();
    }
    infos->erase(containerId);
    foreach (const Parameter& parameter, parameters.parameter()) {
        if (parameter.key() == cleanupKey) {
            std::vector<std::string> argv(4);
            argv[0] = "python";
            argv[1] = parameter.value();
            argv[2] = "cleanup";
            argv[3] = containerId.value();
            Try<process::Subprocess> child = process::subprocess(pythonPath, argv);
            CHECK_SOME(child);
            waitpid(child.get().pid(), NULL, 0);
            break;
        }
    }
    return Nothing();
}
Ejemplo n.º 11
0
Try<RunState> RunState::recover(
    const string& rootDir,
    const SlaveID& slaveId,
    const FrameworkID& frameworkId,
    const ExecutorID& executorId,
    const ContainerID& containerId,
    bool strict)
{
  RunState state;
  state.id = containerId;
  string message;

  // Find the tasks.
  Try<list<string> > tasks = os::glob(strings::format(
      paths::TASK_PATH,
      rootDir,
      slaveId,
      frameworkId,
      executorId,
      containerId,
      "*").get());

  if (tasks.isError()) {
    return Error(
        "Failed to find tasks for executor run " + containerId.value() +
        ": " + tasks.error());
  }

  // Recover tasks.
  foreach (const string& path, tasks.get()) {
    TaskID taskId;
    taskId.set_value(os::basename(path).get());

    Try<TaskState> task = TaskState::recover(
        rootDir, slaveId, frameworkId, executorId, containerId, taskId, strict);

    if (task.isError()) {
      return Error(
          "Failed to recover task " + taskId.value() + ": " + task.error());
    }

    state.tasks[taskId] = task.get();
    state.errors += task.get().errors;
  }

  // Read the forked pid.
  string path = paths::getForkedPidPath(
      rootDir, slaveId, frameworkId, executorId, containerId);
  if (!os::exists(path)) {
    // This could happen if the slave died before the isolator
    // checkpointed the forked pid.
    LOG(WARNING) << "Failed to find executor forked pid file '" << path << "'";
    return state;
  }

  Try<string> pid = os::read(path);

  if (pid.isError()) {
    message = "Failed to read executor forked pid from '" + path +
              "': " + pid.error();

    if (strict) {
      return Error(message);
    } else {
      LOG(WARNING) << message;
      state.errors++;
      return state;
    }
  }

  if (pid.get().empty()) {
    // This could happen if the slave died after opening the file for
    // writing but before it checkpointed anything.
    LOG(WARNING) << "Found empty executor forked pid file '" << path << "'";
    return state;
  }

  Try<pid_t> forkedPid = numify<pid_t>(pid.get());
  if (forkedPid.isError()) {
    return Error("Failed to parse forked pid " + pid.get() +
                 ": " + forkedPid.error());
  }

  state.forkedPid = forkedPid.get();

  // Read the libprocess pid.
  path = paths::getLibprocessPidPath(
      rootDir, slaveId, frameworkId, executorId, containerId);

  if (!os::exists(path)) {
    // This could happen if the slave died before the executor
    // registered with the slave.
    LOG(WARNING)
      << "Failed to find executor libprocess pid file '" << path << "'";
    return state;
  }

  pid = os::read(path);

  if (pid.isError()) {
    message = "Failed to read executor libprocess pid from '" + path +
              "': " + pid.error();

    if (strict) {
      return Error(message);
    } else {
      LOG(WARNING) << message;
      state.errors++;
      return state;
    }
  }

  if (pid.get().empty()) {
    // This could happen if the slave died after opening the file for
    // writing but before it checkpointed anything.
    LOG(WARNING) << "Found empty executor libprocess pid file '" << path << "'";
    return state;
  }

  state.libprocessPid = process::UPID(pid.get());

  // See if the sentinel file exists.
  path = paths::getExecutorSentinelPath(
      rootDir, slaveId, frameworkId, executorId, containerId);

  state.completed = os::exists(path);

  return state;
}
Ejemplo n.º 12
0
Future<ExecutorInfo> ExternalContainerizerProcess::launch(
    const ContainerID& containerId,
    const TaskInfo& taskInfo,
    const FrameworkID& frameworkId,
    const std::string& directory,
    const Option<std::string>& user,
    const SlaveID& slaveId,
    const PID<Slave>& slavePid,
    bool checkpoint)
{
  LOG(INFO) << "Launching container '" << containerId << "'";

  // Get the executor from our task. If no executor is associated with
  // the given task, this function renders an ExecutorInfo using the
  // mesos-executor as its command.
  ExecutorInfo executor = containerExecutorInfo(flags, taskInfo, frameworkId);
  executor.mutable_resources()->MergeFrom(taskInfo.resources());

  if (containers.contains(containerId)) {
    return Failure("Cannot start already running container '"
      + containerId.value() + "'");
  }

  sandboxes.put(containerId, Owned<Sandbox>(new Sandbox(directory, user)));

  map<string, string> environment = executorEnvironment(
      executor,
      directory,
      slaveId,
      slavePid,
      checkpoint,
      flags.recovery_timeout);

  if (!flags.hadoop_home.empty()) {
    environment["HADOOP_HOME"] = flags.hadoop_home;
  }

  TaskInfo task;
  task.CopyFrom(taskInfo);
  CommandInfo* command = task.has_executor()
    ? task.mutable_executor()->mutable_command()
    : task.mutable_command();
  // When the selected command has no container attached, use the
  // default from the slave startup flags, if available.
  if (!command->has_container()) {
    if (flags.default_container_image.isSome()) {
      command->mutable_container()->set_image(
          flags.default_container_image.get());
    } else {
      LOG(INFO) << "No container specified in task and no default given. "
                << "The external containerizer will have to fill in "
                << "defaults.";
    }
  }

  ExternalTask external;
  external.mutable_task()->CopyFrom(task);
  external.set_mesos_executor_path(
      path::join(flags.launcher_dir, "mesos-executor"));

  stringstream output;
  external.SerializeToOstream(&output);

  Try<Subprocess> invoked = invoke(
      "launch",
      containerId,
      output.str(),
      environment);

  if (invoked.isError()) {
    return Failure("Launch of container '" + containerId.value()
      + "' failed (error: " + invoked.error() + ")");
  }

  // Record the process.
  containers.put(
      containerId,
      Owned<Container>(new Container(invoked.get().pid())));

  VLOG(2) << "Now awaiting data from pipe...";

  // Read from the result-pipe and invoke callbacks when reaching EOF.
  return await(read(invoked.get().out()), invoked.get().status())
    .then(defer(
        PID<ExternalContainerizerProcess>(this),
        &ExternalContainerizerProcess::_launch,
        containerId,
        frameworkId,
        executor,
        slaveId,
        checkpoint,
        lambda::_1));
}
Ejemplo n.º 13
0
Future<ResourceStatistics> CpuacctSubsystem::usage(
    const ContainerID& containerId)
{
  ResourceStatistics result;

  // TODO(chzhcn): Getting the number of processes and threads is
  // available as long as any cgroup subsystem is used so this best
  // not be tied to a specific cgroup subsystem. A better place is
  // probably Linux Launcher, which uses the cgroup freezer subsystem.
  // That requires some change for it to adopt the new semantics of
  // reporting subsystem-independent cgroup usage.
  // Note: The complexity of this operation is linear to the number of
  // processes and threads in a container: the kernel has to allocate
  // memory to contain the list of pids or tids; the userspace has to
  // parse the cgroup files to get the size. If this proves to be a
  // performance bottleneck, some kind of rate limiting mechanism
  // needs to be employed.
  if (flags.cgroups_cpu_enable_pids_and_tids_count) {
    Try<set<pid_t>> pids = cgroups::processes(
        hierarchy,
        path::join(flags.cgroups_root, containerId.value()));

    if (pids.isError()) {
      return Failure("Failed to get number of processes: " + pids.error());
    }

    result.set_processes(pids.get().size());

    Try<set<pid_t>> tids = cgroups::threads(
        hierarchy,
        path::join(flags.cgroups_root, containerId.value()));

    if (tids.isError()) {
      return Failure("Failed to get number of threads: " + tids.error());
    }

    result.set_threads(tids.get().size());
  }

  // Get the number of clock ticks, used for cpu accounting.
  static long ticks = sysconf(_SC_CLK_TCK);

  PCHECK(ticks > 0) << "Failed to get sysconf(_SC_CLK_TCK)";

  // Add the cpuacct.stat information.
  Try<hashmap<string, uint64_t>> stat = cgroups::stat(
      hierarchy,
      path::join(flags.cgroups_root, containerId.value()),
      "cpuacct.stat");

  if (stat.isError()) {
    return Failure("Failed to read 'cpuacct.stat': " + stat.error());
  }

  // TODO(bmahler): Add namespacing to cgroups to enforce the expected
  // structure, e.g., cgroups::cpuacct::stat.
  Option<uint64_t> user = stat.get().get("user");
  Option<uint64_t> system = stat.get().get("system");

  if (user.isSome() && system.isSome()) {
    result.set_cpus_user_time_secs((double) user.get() / (double) ticks);
    result.set_cpus_system_time_secs((double) system.get() / (double) ticks);
  }

  return result;
}
Ejemplo n.º 14
0
Try<RunState> RunState::recover(
    const string& rootDir,
    const SlaveID& slaveId,
    const FrameworkID& frameworkId,
    const ExecutorID& executorId,
    const ContainerID& containerId,
    bool strict,
    bool rebooted)
{
  RunState state;
  state.id = containerId;
  string message;

  // See if the sentinel file exists. This is done first so it is
  // known even if partial state is returned, e.g., if the libprocess
  // pid file is not recovered. It indicates the slave removed the
  // executor.
  string path = paths::getExecutorSentinelPath(
      rootDir, slaveId, frameworkId, executorId, containerId);

  state.completed = os::exists(path);

  // Find the tasks.
  Try<list<string>> tasks = paths::getTaskPaths(
      rootDir,
      slaveId,
      frameworkId,
      executorId,
      containerId);

  if (tasks.isError()) {
    return Error(
        "Failed to find tasks for executor run " + containerId.value() +
        ": " + tasks.error());
  }

  // Recover tasks.
  foreach (const string& path, tasks.get()) {
    TaskID taskId;
    taskId.set_value(Path(path).basename());

    Try<TaskState> task = TaskState::recover(
        rootDir, slaveId, frameworkId, executorId, containerId, taskId, strict);

    if (task.isError()) {
      return Error(
          "Failed to recover task " + taskId.value() + ": " + task.error());
    }

    state.tasks[taskId] = task.get();
    state.errors += task->errors;
  }

  path = paths::getForkedPidPath(
      rootDir, slaveId, frameworkId, executorId, containerId);

  // If agent host is rebooted, we do not read the forked pid and libprocess pid
  // since those two pids are obsolete after reboot. And we remove the forked
  // pid file to make sure we will not read it in the case the agent process is
  // restarted after we checkpoint the new boot ID in `Slave::__recover` (i.e.,
  // agent recovery is done after the reboot).
  if (rebooted) {
    if (os::exists(path)) {
      Try<Nothing> rm = os::rm(path);
      if (rm.isError()) {
        return Error(
            "Failed to remove executor forked pid file '" + path + "': " +
            rm.error());
      }
    }

    return state;
  }

  if (!os::exists(path)) {
    // This could happen if the slave died before the containerizer checkpointed
    // the forked pid or agent process is restarted after agent host is rebooted
    // since we remove this file in the above code.
    LOG(WARNING) << "Failed to find executor forked pid file '" << path << "'";
    return state;
  }

  // Read the forked pid.
  Result<string> pid = state::read<string>(path);
  if (pid.isError()) {
    message = "Failed to read executor forked pid from '" + path +
              "': " + pid.error();

    if (strict) {
      return Error(message);
    } else {
      LOG(WARNING) << message;
      state.errors++;
      return state;
    }
  }

  if (pid->empty()) {
    // This could happen if the slave is hard rebooted after the file is created
    // but before the data is synced on disk.
    LOG(WARNING) << "Found empty executor forked pid file '" << path << "'";
    return state;
  }

  Try<pid_t> forkedPid = numify<pid_t>(pid.get());
  if (forkedPid.isError()) {
    return Error("Failed to parse forked pid '" + pid.get() + "' "
                 "from pid file '" + path + "': " +
                 forkedPid.error());
  }

  state.forkedPid = forkedPid.get();

  // Read the libprocess pid.
  path = paths::getLibprocessPidPath(
      rootDir, slaveId, frameworkId, executorId, containerId);

  if (os::exists(path)) {
    pid = state::read<string>(path);

    if (pid.isError()) {
      message = "Failed to read executor libprocess pid from '" + path +
                "': " + pid.error();

      if (strict) {
        return Error(message);
      } else {
        LOG(WARNING) << message;
        state.errors++;
        return state;
      }
    }

    if (pid->empty()) {
      // This could happen if the slave is hard rebooted after the file is
      // created but before the data is synced on disk.
      LOG(WARNING) << "Found empty executor libprocess pid file '" << path
                   << "'";
      return state;
    }

    state.libprocessPid = process::UPID(pid.get());
    state.http = false;

    return state;
  }

  path = paths::getExecutorHttpMarkerPath(
      rootDir, slaveId, frameworkId, executorId, containerId);

  // The marker could be absent if the slave died before the executor
  // registered with the slave.
  if (!os::exists(path)) {
    LOG(WARNING) << "Failed to find '" << paths::LIBPROCESS_PID_FILE
                 << "' or '" << paths::HTTP_MARKER_FILE
                 << "' for container " << containerId
                 << " of executor '" << executorId
                 << "' of framework " << frameworkId;
    return state;
  }

  state.http = true;
  return state;
}
Ejemplo n.º 15
0
Try<ExecutorState> ExecutorState::recover(
    const string& rootDir,
    const SlaveID& slaveId,
    const FrameworkID& frameworkId,
    const ExecutorID& executorId,
    bool strict,
    bool rebooted)
{
  ExecutorState state;
  state.id = executorId;
  string message;

  // Find the runs.
  Try<list<string>> runs = paths::getExecutorRunPaths(
      rootDir,
      slaveId,
      frameworkId,
      executorId);

  if (runs.isError()) {
    return Error("Failed to find runs for executor '" + executorId.value() +
                 "': " + runs.error());
  }

  // Recover the runs.
  foreach (const string& path, runs.get()) {
    if (Path(path).basename() == paths::LATEST_SYMLINK) {
      const Result<string>& latest = os::realpath(path);
      if (!latest.isSome()) {
        return Error(
            "Failed to find latest run of executor '" +
            executorId.value() + "': " +
            (latest.isError()
             ? latest.error()
             : "No such file or directory"));
      }

      // Store the ContainerID of the latest executor run.
      ContainerID containerId;
      containerId.set_value(Path(latest.get()).basename());
      state.latest = containerId;
    } else {
      ContainerID containerId;
      containerId.set_value(Path(path).basename());

      Try<RunState> run = RunState::recover(
          rootDir,
          slaveId,
          frameworkId,
          executorId,
          containerId,
          strict,
          rebooted);

      if (run.isError()) {
        return Error(
            "Failed to recover run " + containerId.value() +
            " of executor '" + executorId.value() +
            "': " + run.error());
      }

      state.runs[containerId] = run.get();
      state.errors += run->errors;
    }
  }

  // Find the latest executor.
  // It is possible that we cannot find the "latest" executor if the
  // slave died before it created the "latest" symlink.
  if (state.latest.isNone()) {
    LOG(WARNING) << "Failed to find the latest run of executor '"
                 << executorId << "' of framework " << frameworkId;
    return state;
  }

  // Read the executor info.
  const string& path =
    paths::getExecutorInfoPath(rootDir, slaveId, frameworkId, executorId);
  if (!os::exists(path)) {
    // This could happen if the slave died after creating the executor
    // directory but before it checkpointed the executor info.
    LOG(WARNING) << "Failed to find executor info file '" << path << "'";
    return state;
  }

  Result<ExecutorInfo> executorInfo = state::read<ExecutorInfo>(path);

  if (executorInfo.isError()) {
    message = "Failed to read executor info from '" + path + "': " +
              executorInfo.error();

    if (strict) {
      return Error(message);
    } else {
      LOG(WARNING) << message;
      state.errors++;
      return state;
    }
  }

  if (executorInfo.isNone()) {
    // This could happen if the slave is hard rebooted after the file is created
    // but before the data is synced on disk.
    LOG(WARNING) << "Found empty executor info file '" << path << "'";
    return state;
  }

  state.info = executorInfo.get();

  return state;
}
Ejemplo n.º 16
0
Future<ExecutorInfo> ExternalContainerizerProcess::_launch(
    const ContainerID& containerId,
    const FrameworkID& frameworkId,
    const ExecutorInfo executorInfo,
    const SlaveID& slaveId,
    bool checkpoint,
    const Future<ResultFutures>& future)
{
  VLOG(1) << "Launch callback triggered on container '" << containerId << "'";

  if (!containers.contains(containerId)) {
    return Failure("Container '" + containerId.value() + "' not running");
  }

  string result;
  Try<bool> support = commandSupported(future, result);
  if (support.isError()) {
    terminate(containerId);
    return Failure(support.error());
  }

  if (!support.get()) {
    // We generally need to use an internal implementation in these
    // cases.
    // For the specific case of a launch however, there can not be an
    // internal implementation for a external containerizer, hence
    // we need to fail or even abort at this point.
    // TODO(tillt): Consider using posix-isolator as a fall back.
    terminate(containerId);
    return Failure("External containerizer does not support launch");
  }

  VLOG(1) << "Launch supported by external containerizer";

  ExternalStatus ps;
  if (!ps.ParseFromString(result)) {
    // TODO(tillt): Consider not terminating the containerizer due
    // to protocol breach but only fail the operation.
    terminate(containerId);
    return Failure("Could not parse launch result protobuf (error: "
      + protobufError(ps) + ")");
  }

  VLOG(2) << "Launch result: '" << ps.message() << "'";
  VLOG(2) << "Executor pid: " << ps.pid();

  containers[containerId]->pid = ps.pid();

  // Observe the executor process and install a callback for status
  // changes.
  process::reap(ps.pid())
    .onAny(defer(
        PID<ExternalContainerizerProcess>(this),
        &ExternalContainerizerProcess::reaped,
        containerId,
        lambda::_1));

  // Checkpoint the container's pid if requested.
  if (checkpoint) {
    const string& path = slave::paths::getForkedPidPath(
        slave::paths::getMetaRootDir(flags.work_dir),
        slaveId,
        frameworkId,
        executorInfo.executor_id(),
        containerId);

    LOG(INFO) << "Checkpointing containerized executor '" << containerId
              << "' pid " << ps.pid() << " to '" << path <<  "'";

    Try<Nothing> checkpointed =
        slave::state::checkpoint(path, stringify(ps.pid()));

    if (checkpointed.isError()) {
      terminate(containerId);
      return Failure("Failed to checkpoint containerized executor '"
        + containerId.value() + "' pid " + stringify(ps.pid()) + " to '" + path
        + "'");
    }
  }

  VLOG(1) << "Launch finishing up for container '" << containerId << "'";
  return executorInfo;
}
Ejemplo n.º 17
0
inline bool operator==(const ContainerID& left, const std::string& right)
{
  return left.value() == right;
}
Ejemplo n.º 18
0
bool operator==(const ContainerID& left, const ContainerID& right)
{
  return left.value() == right.value() &&
    left.has_parent() == right.has_parent() &&
    (!left.has_parent() || left.parent() == right.parent());
}
Ejemplo n.º 19
0
string LinuxLauncher::cgroup(const ContainerID& containerId)
{
  return path::join(flags.cgroups_root, containerId.value());
}
Ejemplo n.º 20
0
inline std::size_t hash_value(const ContainerID& containerId)
{
  size_t seed = 0;
  boost::hash_combine(seed, containerId.value());
  return seed;
}
Ejemplo n.º 21
0
inline bool operator==(const ContainerID& left, const ContainerID& right)
{
  return left.value() == right.value();
}
Ejemplo n.º 22
0
Future<Nothing> NetworkCniIsolatorProcess::_attach(
    const ContainerID& containerId,
    const string& networkName,
    const string& plugin,
    const tuple<Future<Option<int>>, Future<string>>& t)
{
  CHECK(infos.contains(containerId));
  CHECK(infos[containerId]->containerNetworks.contains(networkName));

  Future<Option<int>> status = std::get<0>(t);
  if (!status.isReady()) {
    return Failure(
        "Failed to get the exit status of the CNI plugin '" +
        plugin + "' subprocess: " +
        (status.isFailed() ? status.failure() : "discarded"));
  }

  if (status->isNone()) {
    return Failure(
        "Failed to reap the CNI plugin '" + plugin + "' subprocess");
  }

  // CNI plugin will print result (in case of success) or error (in
  // case of failure) to stdout.
  Future<string> output = std::get<1>(t);
  if (!output.isReady()) {
    return Failure(
        "Failed to read stdout from the CNI plugin '" +
        plugin + "' subprocess: " +
        (output.isFailed() ? output.failure() : "discarded"));
  }

  if (status.get() != 0) {
    return Failure(
        "The CNI plugin '" + plugin + "' failed to attach container " +
        containerId.value() + " to CNI network '" + networkName +
        "': " + output.get());
  }

  // Parse the output of CNI plugin.
  Try<spec::NetworkInfo> parse = spec::parseNetworkInfo(output.get());
  if (parse.isError()) {
    return Failure(
        "Failed to parse the output of the CNI plugin '" +
        plugin + "': " + parse.error());
  }

  if (parse.get().has_ip4()) {
    LOG(INFO) << "Got assigned IPv4 address '" << parse.get().ip4().ip()
              << "' from CNI network '" << networkName
              << "' for container " << containerId;
  }

  if (parse.get().has_ip6()) {
    LOG(INFO) << "Got assigned IPv6 address '" << parse.get().ip6().ip()
              << "' from CNI network '" << networkName
              << "' for container " << containerId;
  }

  // Checkpoint the output of CNI plugin.
  // The destruction of the container cannot happen in the middle of
  // 'attach()' and '_attach()' because the containerizer will wait
  // for 'isolate()' to finish before destroying the container.
  ContainerNetwork& containerNetwork =
      infos[containerId]->containerNetworks[networkName];

  const string networkInfoPath = paths::getNetworkInfoPath(
      rootDir.get(),
      containerId.value(),
      networkName,
      containerNetwork.ifName);

  Try<Nothing> write = os::write(networkInfoPath, output.get());
  if (write.isError()) {
    return Failure(
        "Failed to checkpoint the output of CNI plugin'" +
        output.get() + "': " + write.error());
  }

  containerNetwork.cniNetworkInfo = parse.get();

  return Nothing();
}