Future<Termination> ExternalContainerizerProcess::wait( const ContainerID& containerId) { VLOG(1) << "Wait triggered on container '" << containerId << "'"; if (!containers.contains(containerId)) { LOG(ERROR) << "not running"; return Failure("Container '" + containerId.value() + "' not running"); } Try<Subprocess> invoked = invoke("wait", containerId); if (invoked.isError()) { LOG(ERROR) << "not running"; terminate(containerId); return Failure("Wait on container '" + containerId.value() + "' failed (error: " + invoked.error() + ")"); } // Await both, input from the pipe as well as an exit of the // process. await(read(invoked.get().out()), invoked.get().status()) .onAny(defer( PID<ExternalContainerizerProcess>(this), &ExternalContainerizerProcess::_wait, containerId, lambda::_1)); return containers[containerId]->termination.future(); }
Future<Nothing> NetworkCniIsolatorProcess::detach( const ContainerID& containerId, const std::string& networkName) { CHECK(infos.contains(containerId)); CHECK(infos[containerId]->containerNetworks.contains(networkName)); const ContainerNetwork& containerNetwork = infos[containerId]->containerNetworks[networkName]; // Prepare environment variables for CNI plugin. map<string, string> environment; environment["CNI_COMMAND"] = "DEL"; environment["CNI_CONTAINERID"] = containerId.value(); environment["CNI_PATH"] = pluginDir.get(); environment["CNI_IFNAME"] = containerNetwork.ifName; environment["CNI_NETNS"] = paths::getNamespacePath(rootDir.get(), containerId.value()); // Some CNI plugins need to run "iptables" to set up IP Masquerade, so we // need to set the "PATH" environment variable so that the plugin can locate // the "iptables" executable file. Option<string> value = os::getenv("PATH"); if (value.isSome()) { environment["PATH"] = value.get(); } else { environment["PATH"] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; } const NetworkConfigInfo& networkConfig = networkConfigs[networkName]; // Invoke the CNI plugin. const string& plugin = networkConfig.config.type(); Try<Subprocess> s = subprocess( path::join(pluginDir.get(), plugin), {plugin}, Subprocess::PATH(networkConfig.path), Subprocess::PIPE(), Subprocess::PATH("/dev/null"), NO_SETSID, None(), environment); if (s.isError()) { return Failure( "Failed to execute the CNI plugin '" + plugin + "': " + s.error()); } return await(s->status(), io::read(s->out().get())) .then(defer( PID<NetworkCniIsolatorProcess>(this), &NetworkCniIsolatorProcess::_detach, containerId, networkName, plugin, lambda::_1)); }
Future<Nothing> NetworkCniIsolatorProcess::_cleanup( const ContainerID& containerId, const list<Future<Nothing>>& detaches) { CHECK(infos.contains(containerId)); vector<string> messages; foreach (const Future<Nothing>& detach, detaches) { if (!detach.isReady()) { messages.push_back( detach.isFailed() ? detach.failure() : "discarded"); } } if (!messages.empty()) { return Failure(strings::join("\n", messages)); } const string containerDir = paths::getContainerDir(rootDir.get(), containerId.value()); const string target = paths::getNamespacePath(rootDir.get(), containerId.value()); if (os::exists(target)) { Try<Nothing> unmount = fs::unmount(target); if (unmount.isError()) { return Failure( "Failed to unmount the network namespace handle '" + target + "': " + unmount.error()); } LOG(INFO) << "Unmounted the network namespace handle '" << target << "' for container " << containerId; } Try<Nothing> rmdir = os::rmdir(containerDir); if (rmdir.isError()) { return Failure( "Failed to remove the container directory '" + containerDir + "': " + rmdir.error()); } LOG(INFO) << "Removed the container directory '" << containerDir << "'"; infos.erase(containerId); return Nothing(); }
Future<Nothing> NetworkCniIsolatorProcess::_detach( const ContainerID& containerId, const std::string& networkName, const string& plugin, const tuple<Future<Option<int>>, Future<string>>& t) { CHECK(infos.contains(containerId)); CHECK(infos[containerId]->containerNetworks.contains(networkName)); Future<Option<int>> status = std::get<0>(t); if (!status.isReady()) { return Failure( "Failed to get the exit status of the CNI plugin '" + plugin + "' subprocess: " + (status.isFailed() ? status.failure() : "discarded")); } if (status->isNone()) { return Failure( "Failed to reap the CNI plugin '" + plugin + "' subprocess"); } if (status.get() == 0) { const string ifDir = paths::getInterfaceDir( rootDir.get(), containerId.value(), networkName, infos[containerId]->containerNetworks[networkName].ifName); Try<Nothing> rmdir = os::rmdir(ifDir); if (rmdir.isError()) { return Failure( "Failed to remove interface directory '" + ifDir + "': " + rmdir.error()); } return Nothing(); } // CNI plugin will print result (in case of success) or error (in // case of failure) to stdout. Future<string> output = std::get<1>(t); if (!output.isReady()) { return Failure( "Failed to read stdout from the CNI plugin '" + plugin + "' subprocess: " + (output.isFailed() ? output.failure() : "discarded")); } return Failure( "The CNI plugin '" + plugin + "' failed to detach container " "from network '" + networkName + "': " + output.get()); }
Future<Option<ContainerLaunchInfo>> CgroupsNetClsIsolatorProcess::prepare( const ContainerID& containerId, const ContainerConfig& containerConfig) { if (infos.contains(containerId)) { return Failure("Container has already been prepared"); } // Use this info to create the cgroup, but do not insert it into // infos till the cgroup has been created successfully. Info info(path::join(flags.cgroups_root, containerId.value())); // Create a cgroup for this container. Try<bool> exists = cgroups::exists(hierarchy, info.cgroup); if (exists.isError()) { return Failure("Failed to check if the cgroup already exists: " + exists.error()); } else if (exists.get()) { return Failure("The cgroup already exists"); } Try<Nothing> create = cgroups::create(hierarchy, info.cgroup); if (create.isError()) { return Failure("Failed to create the cgroup: " + create.error()); } // 'chown' the cgroup so the executor can create nested cgroups. Do // not recurse so the control files are still owned by the slave // user and thus cannot be changed by the executor. if (containerConfig.has_user()) { Try<Nothing> chown = os::chown( containerConfig.user(), path::join(hierarchy, info.cgroup), false); if (chown.isError()) { return Failure("Failed to change ownership of cgroup hierarchy: " + chown.error()); } } infos.emplace(containerId, info); return update(containerId, containerConfig.executorinfo().resources()) .then([]() -> Future<Option<ContainerLaunchInfo>> { return None(); }); }
Option<Error> validateContainerId(const ContainerID& containerId) { // Slashes are disallowed as these IDs are mapped to directories. // // Periods are disallowed because our string representation of // ContainerID uses periods: <uuid>.<child>.<grandchild>. // For example: <uuid>.redis.backup // // Spaces are disallowed as they can render logs confusing and // need escaping on terminals when dealing with paths. // // TODO(bmahler): Add common/validation.hpp to share ID validation. // Note that this however is slightly stricter than other IDs in // that we do not allow periods or spaces. auto invalidCharacter = [](char c) { return iscntrl(c) || c == os::POSIX_PATH_SEPARATOR || c == os::WINDOWS_PATH_SEPARATOR || c == '.' || c == ' '; }; const string& id = containerId.value(); if (id.empty()) { return Error("'ContainerID.value' must be non-empty"); } if (std::any_of(id.begin(), id.end(), invalidCharacter)) { return Error("'ContainerID.value' '" + id + "'" " contains invalid characters"); } // TODO(bmahler): Print the invalid field nicely within the error // (e.g. 'parent.parent.parent.value'). For now we only have one // level of nesting so it's ok. if (containerId.has_parent()) { Option<Error> parentError = validateContainerId(containerId.parent()); if (parentError.isSome()) { return Error("'ContainerID.parent' is invalid: " + parentError->message); } } return None(); }
Future<Nothing> ExternalContainerizerProcess::update( const ContainerID& containerId, const Resources& resources) { VLOG(1) << "Update triggered on container '" << containerId << "'"; if (!containers.contains(containerId)) { return Failure("Container '" + containerId.value() + "'' not running"); } containers[containerId]->resources = resources; // Wrap the Resource protobufs into a ResourceArray protobuf to // avoid any problems with streamed protobufs. // See http://goo.gl/d1x14F for more on that issue. ResourceArray resourceArray; foreach (const Resource& r, resources) { Resource *resource = resourceArray.add_resource(); resource->CopyFrom(r); }
Option<Error> validateContainerId(const ContainerID& containerId) { const string& id = containerId.value(); // Check common Mesos ID rules. Option<Error> error = common::validation::validateID(id); if (error.isSome()) { return Error(error->message); } // Check ContainerID specific rules. // // Periods are disallowed because our string representation of // ContainerID uses periods: <uuid>.<child>.<grandchild>. // For example: <uuid>.redis.backup // // Spaces are disallowed as they can render logs confusing and // need escaping on terminals when dealing with paths. auto invalidCharacter = [](char c) { return c == '.' || c == ' '; }; if (std::any_of(id.begin(), id.end(), invalidCharacter)) { return Error("'ContainerID.value' '" + id + "'" " contains invalid characters"); } // TODO(bmahler): Print the invalid field nicely within the error // (e.g. 'parent.parent.parent.value'). For now we only have one // level of nesting so it's ok. if (containerId.has_parent()) { Option<Error> parentError = validateContainerId(containerId.parent()); if (parentError.isSome()) { return Error("'ContainerID.parent' is invalid: " + parentError->message); } } return None(); }
process::Future<Nothing> CalicoIsolatorProcess::isolate( const ContainerID& containerId, pid_t pid) { const Info* info = (*infos)[containerId]; foreach (const Parameter& parameter, parameters.parameter()) { if (parameter.key() == isolateKey) { std::vector<std::string> argv(7); argv[0] = "python"; argv[1] = parameter.value(); argv[2] = "isolate"; argv[3] = stringify(pid); argv[4] = containerId.value(); argv[5] = stringify(info->ipAddress.get()); argv[6] = stringify(info->profile.get()); Try<process::Subprocess> child = process::subprocess(pythonPath, argv); CHECK_SOME(child); waitpid(child.get().pid(), NULL, 0); break; } } return Nothing(); }
process::Future<Nothing> CalicoIsolatorProcess::cleanup( const ContainerID& containerId) { if (!infos->contains(containerId)) { LOG(WARNING) << "Ignoring cleanup for unknown container " << containerId; return Nothing(); } infos->erase(containerId); foreach (const Parameter& parameter, parameters.parameter()) { if (parameter.key() == cleanupKey) { std::vector<std::string> argv(4); argv[0] = "python"; argv[1] = parameter.value(); argv[2] = "cleanup"; argv[3] = containerId.value(); Try<process::Subprocess> child = process::subprocess(pythonPath, argv); CHECK_SOME(child); waitpid(child.get().pid(), NULL, 0); break; } } return Nothing(); }
Try<RunState> RunState::recover( const string& rootDir, const SlaveID& slaveId, const FrameworkID& frameworkId, const ExecutorID& executorId, const ContainerID& containerId, bool strict) { RunState state; state.id = containerId; string message; // Find the tasks. Try<list<string> > tasks = os::glob(strings::format( paths::TASK_PATH, rootDir, slaveId, frameworkId, executorId, containerId, "*").get()); if (tasks.isError()) { return Error( "Failed to find tasks for executor run " + containerId.value() + ": " + tasks.error()); } // Recover tasks. foreach (const string& path, tasks.get()) { TaskID taskId; taskId.set_value(os::basename(path).get()); Try<TaskState> task = TaskState::recover( rootDir, slaveId, frameworkId, executorId, containerId, taskId, strict); if (task.isError()) { return Error( "Failed to recover task " + taskId.value() + ": " + task.error()); } state.tasks[taskId] = task.get(); state.errors += task.get().errors; } // Read the forked pid. string path = paths::getForkedPidPath( rootDir, slaveId, frameworkId, executorId, containerId); if (!os::exists(path)) { // This could happen if the slave died before the isolator // checkpointed the forked pid. LOG(WARNING) << "Failed to find executor forked pid file '" << path << "'"; return state; } Try<string> pid = os::read(path); if (pid.isError()) { message = "Failed to read executor forked pid from '" + path + "': " + pid.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } if (pid.get().empty()) { // This could happen if the slave died after opening the file for // writing but before it checkpointed anything. LOG(WARNING) << "Found empty executor forked pid file '" << path << "'"; return state; } Try<pid_t> forkedPid = numify<pid_t>(pid.get()); if (forkedPid.isError()) { return Error("Failed to parse forked pid " + pid.get() + ": " + forkedPid.error()); } state.forkedPid = forkedPid.get(); // Read the libprocess pid. path = paths::getLibprocessPidPath( rootDir, slaveId, frameworkId, executorId, containerId); if (!os::exists(path)) { // This could happen if the slave died before the executor // registered with the slave. LOG(WARNING) << "Failed to find executor libprocess pid file '" << path << "'"; return state; } pid = os::read(path); if (pid.isError()) { message = "Failed to read executor libprocess pid from '" + path + "': " + pid.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } if (pid.get().empty()) { // This could happen if the slave died after opening the file for // writing but before it checkpointed anything. LOG(WARNING) << "Found empty executor libprocess pid file '" << path << "'"; return state; } state.libprocessPid = process::UPID(pid.get()); // See if the sentinel file exists. path = paths::getExecutorSentinelPath( rootDir, slaveId, frameworkId, executorId, containerId); state.completed = os::exists(path); return state; }
Future<ExecutorInfo> ExternalContainerizerProcess::launch( const ContainerID& containerId, const TaskInfo& taskInfo, const FrameworkID& frameworkId, const std::string& directory, const Option<std::string>& user, const SlaveID& slaveId, const PID<Slave>& slavePid, bool checkpoint) { LOG(INFO) << "Launching container '" << containerId << "'"; // Get the executor from our task. If no executor is associated with // the given task, this function renders an ExecutorInfo using the // mesos-executor as its command. ExecutorInfo executor = containerExecutorInfo(flags, taskInfo, frameworkId); executor.mutable_resources()->MergeFrom(taskInfo.resources()); if (containers.contains(containerId)) { return Failure("Cannot start already running container '" + containerId.value() + "'"); } sandboxes.put(containerId, Owned<Sandbox>(new Sandbox(directory, user))); map<string, string> environment = executorEnvironment( executor, directory, slaveId, slavePid, checkpoint, flags.recovery_timeout); if (!flags.hadoop_home.empty()) { environment["HADOOP_HOME"] = flags.hadoop_home; } TaskInfo task; task.CopyFrom(taskInfo); CommandInfo* command = task.has_executor() ? task.mutable_executor()->mutable_command() : task.mutable_command(); // When the selected command has no container attached, use the // default from the slave startup flags, if available. if (!command->has_container()) { if (flags.default_container_image.isSome()) { command->mutable_container()->set_image( flags.default_container_image.get()); } else { LOG(INFO) << "No container specified in task and no default given. " << "The external containerizer will have to fill in " << "defaults."; } } ExternalTask external; external.mutable_task()->CopyFrom(task); external.set_mesos_executor_path( path::join(flags.launcher_dir, "mesos-executor")); stringstream output; external.SerializeToOstream(&output); Try<Subprocess> invoked = invoke( "launch", containerId, output.str(), environment); if (invoked.isError()) { return Failure("Launch of container '" + containerId.value() + "' failed (error: " + invoked.error() + ")"); } // Record the process. containers.put( containerId, Owned<Container>(new Container(invoked.get().pid()))); VLOG(2) << "Now awaiting data from pipe..."; // Read from the result-pipe and invoke callbacks when reaching EOF. return await(read(invoked.get().out()), invoked.get().status()) .then(defer( PID<ExternalContainerizerProcess>(this), &ExternalContainerizerProcess::_launch, containerId, frameworkId, executor, slaveId, checkpoint, lambda::_1)); }
Future<ResourceStatistics> CpuacctSubsystem::usage( const ContainerID& containerId) { ResourceStatistics result; // TODO(chzhcn): Getting the number of processes and threads is // available as long as any cgroup subsystem is used so this best // not be tied to a specific cgroup subsystem. A better place is // probably Linux Launcher, which uses the cgroup freezer subsystem. // That requires some change for it to adopt the new semantics of // reporting subsystem-independent cgroup usage. // Note: The complexity of this operation is linear to the number of // processes and threads in a container: the kernel has to allocate // memory to contain the list of pids or tids; the userspace has to // parse the cgroup files to get the size. If this proves to be a // performance bottleneck, some kind of rate limiting mechanism // needs to be employed. if (flags.cgroups_cpu_enable_pids_and_tids_count) { Try<set<pid_t>> pids = cgroups::processes( hierarchy, path::join(flags.cgroups_root, containerId.value())); if (pids.isError()) { return Failure("Failed to get number of processes: " + pids.error()); } result.set_processes(pids.get().size()); Try<set<pid_t>> tids = cgroups::threads( hierarchy, path::join(flags.cgroups_root, containerId.value())); if (tids.isError()) { return Failure("Failed to get number of threads: " + tids.error()); } result.set_threads(tids.get().size()); } // Get the number of clock ticks, used for cpu accounting. static long ticks = sysconf(_SC_CLK_TCK); PCHECK(ticks > 0) << "Failed to get sysconf(_SC_CLK_TCK)"; // Add the cpuacct.stat information. Try<hashmap<string, uint64_t>> stat = cgroups::stat( hierarchy, path::join(flags.cgroups_root, containerId.value()), "cpuacct.stat"); if (stat.isError()) { return Failure("Failed to read 'cpuacct.stat': " + stat.error()); } // TODO(bmahler): Add namespacing to cgroups to enforce the expected // structure, e.g., cgroups::cpuacct::stat. Option<uint64_t> user = stat.get().get("user"); Option<uint64_t> system = stat.get().get("system"); if (user.isSome() && system.isSome()) { result.set_cpus_user_time_secs((double) user.get() / (double) ticks); result.set_cpus_system_time_secs((double) system.get() / (double) ticks); } return result; }
Try<RunState> RunState::recover( const string& rootDir, const SlaveID& slaveId, const FrameworkID& frameworkId, const ExecutorID& executorId, const ContainerID& containerId, bool strict, bool rebooted) { RunState state; state.id = containerId; string message; // See if the sentinel file exists. This is done first so it is // known even if partial state is returned, e.g., if the libprocess // pid file is not recovered. It indicates the slave removed the // executor. string path = paths::getExecutorSentinelPath( rootDir, slaveId, frameworkId, executorId, containerId); state.completed = os::exists(path); // Find the tasks. Try<list<string>> tasks = paths::getTaskPaths( rootDir, slaveId, frameworkId, executorId, containerId); if (tasks.isError()) { return Error( "Failed to find tasks for executor run " + containerId.value() + ": " + tasks.error()); } // Recover tasks. foreach (const string& path, tasks.get()) { TaskID taskId; taskId.set_value(Path(path).basename()); Try<TaskState> task = TaskState::recover( rootDir, slaveId, frameworkId, executorId, containerId, taskId, strict); if (task.isError()) { return Error( "Failed to recover task " + taskId.value() + ": " + task.error()); } state.tasks[taskId] = task.get(); state.errors += task->errors; } path = paths::getForkedPidPath( rootDir, slaveId, frameworkId, executorId, containerId); // If agent host is rebooted, we do not read the forked pid and libprocess pid // since those two pids are obsolete after reboot. And we remove the forked // pid file to make sure we will not read it in the case the agent process is // restarted after we checkpoint the new boot ID in `Slave::__recover` (i.e., // agent recovery is done after the reboot). if (rebooted) { if (os::exists(path)) { Try<Nothing> rm = os::rm(path); if (rm.isError()) { return Error( "Failed to remove executor forked pid file '" + path + "': " + rm.error()); } } return state; } if (!os::exists(path)) { // This could happen if the slave died before the containerizer checkpointed // the forked pid or agent process is restarted after agent host is rebooted // since we remove this file in the above code. LOG(WARNING) << "Failed to find executor forked pid file '" << path << "'"; return state; } // Read the forked pid. Result<string> pid = state::read<string>(path); if (pid.isError()) { message = "Failed to read executor forked pid from '" + path + "': " + pid.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } if (pid->empty()) { // This could happen if the slave is hard rebooted after the file is created // but before the data is synced on disk. LOG(WARNING) << "Found empty executor forked pid file '" << path << "'"; return state; } Try<pid_t> forkedPid = numify<pid_t>(pid.get()); if (forkedPid.isError()) { return Error("Failed to parse forked pid '" + pid.get() + "' " "from pid file '" + path + "': " + forkedPid.error()); } state.forkedPid = forkedPid.get(); // Read the libprocess pid. path = paths::getLibprocessPidPath( rootDir, slaveId, frameworkId, executorId, containerId); if (os::exists(path)) { pid = state::read<string>(path); if (pid.isError()) { message = "Failed to read executor libprocess pid from '" + path + "': " + pid.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } if (pid->empty()) { // This could happen if the slave is hard rebooted after the file is // created but before the data is synced on disk. LOG(WARNING) << "Found empty executor libprocess pid file '" << path << "'"; return state; } state.libprocessPid = process::UPID(pid.get()); state.http = false; return state; } path = paths::getExecutorHttpMarkerPath( rootDir, slaveId, frameworkId, executorId, containerId); // The marker could be absent if the slave died before the executor // registered with the slave. if (!os::exists(path)) { LOG(WARNING) << "Failed to find '" << paths::LIBPROCESS_PID_FILE << "' or '" << paths::HTTP_MARKER_FILE << "' for container " << containerId << " of executor '" << executorId << "' of framework " << frameworkId; return state; } state.http = true; return state; }
Try<ExecutorState> ExecutorState::recover( const string& rootDir, const SlaveID& slaveId, const FrameworkID& frameworkId, const ExecutorID& executorId, bool strict, bool rebooted) { ExecutorState state; state.id = executorId; string message; // Find the runs. Try<list<string>> runs = paths::getExecutorRunPaths( rootDir, slaveId, frameworkId, executorId); if (runs.isError()) { return Error("Failed to find runs for executor '" + executorId.value() + "': " + runs.error()); } // Recover the runs. foreach (const string& path, runs.get()) { if (Path(path).basename() == paths::LATEST_SYMLINK) { const Result<string>& latest = os::realpath(path); if (!latest.isSome()) { return Error( "Failed to find latest run of executor '" + executorId.value() + "': " + (latest.isError() ? latest.error() : "No such file or directory")); } // Store the ContainerID of the latest executor run. ContainerID containerId; containerId.set_value(Path(latest.get()).basename()); state.latest = containerId; } else { ContainerID containerId; containerId.set_value(Path(path).basename()); Try<RunState> run = RunState::recover( rootDir, slaveId, frameworkId, executorId, containerId, strict, rebooted); if (run.isError()) { return Error( "Failed to recover run " + containerId.value() + " of executor '" + executorId.value() + "': " + run.error()); } state.runs[containerId] = run.get(); state.errors += run->errors; } } // Find the latest executor. // It is possible that we cannot find the "latest" executor if the // slave died before it created the "latest" symlink. if (state.latest.isNone()) { LOG(WARNING) << "Failed to find the latest run of executor '" << executorId << "' of framework " << frameworkId; return state; } // Read the executor info. const string& path = paths::getExecutorInfoPath(rootDir, slaveId, frameworkId, executorId); if (!os::exists(path)) { // This could happen if the slave died after creating the executor // directory but before it checkpointed the executor info. LOG(WARNING) << "Failed to find executor info file '" << path << "'"; return state; } Result<ExecutorInfo> executorInfo = state::read<ExecutorInfo>(path); if (executorInfo.isError()) { message = "Failed to read executor info from '" + path + "': " + executorInfo.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } if (executorInfo.isNone()) { // This could happen if the slave is hard rebooted after the file is created // but before the data is synced on disk. LOG(WARNING) << "Found empty executor info file '" << path << "'"; return state; } state.info = executorInfo.get(); return state; }
Future<ExecutorInfo> ExternalContainerizerProcess::_launch( const ContainerID& containerId, const FrameworkID& frameworkId, const ExecutorInfo executorInfo, const SlaveID& slaveId, bool checkpoint, const Future<ResultFutures>& future) { VLOG(1) << "Launch callback triggered on container '" << containerId << "'"; if (!containers.contains(containerId)) { return Failure("Container '" + containerId.value() + "' not running"); } string result; Try<bool> support = commandSupported(future, result); if (support.isError()) { terminate(containerId); return Failure(support.error()); } if (!support.get()) { // We generally need to use an internal implementation in these // cases. // For the specific case of a launch however, there can not be an // internal implementation for a external containerizer, hence // we need to fail or even abort at this point. // TODO(tillt): Consider using posix-isolator as a fall back. terminate(containerId); return Failure("External containerizer does not support launch"); } VLOG(1) << "Launch supported by external containerizer"; ExternalStatus ps; if (!ps.ParseFromString(result)) { // TODO(tillt): Consider not terminating the containerizer due // to protocol breach but only fail the operation. terminate(containerId); return Failure("Could not parse launch result protobuf (error: " + protobufError(ps) + ")"); } VLOG(2) << "Launch result: '" << ps.message() << "'"; VLOG(2) << "Executor pid: " << ps.pid(); containers[containerId]->pid = ps.pid(); // Observe the executor process and install a callback for status // changes. process::reap(ps.pid()) .onAny(defer( PID<ExternalContainerizerProcess>(this), &ExternalContainerizerProcess::reaped, containerId, lambda::_1)); // Checkpoint the container's pid if requested. if (checkpoint) { const string& path = slave::paths::getForkedPidPath( slave::paths::getMetaRootDir(flags.work_dir), slaveId, frameworkId, executorInfo.executor_id(), containerId); LOG(INFO) << "Checkpointing containerized executor '" << containerId << "' pid " << ps.pid() << " to '" << path << "'"; Try<Nothing> checkpointed = slave::state::checkpoint(path, stringify(ps.pid())); if (checkpointed.isError()) { terminate(containerId); return Failure("Failed to checkpoint containerized executor '" + containerId.value() + "' pid " + stringify(ps.pid()) + " to '" + path + "'"); } } VLOG(1) << "Launch finishing up for container '" << containerId << "'"; return executorInfo; }
inline bool operator==(const ContainerID& left, const std::string& right) { return left.value() == right; }
bool operator==(const ContainerID& left, const ContainerID& right) { return left.value() == right.value() && left.has_parent() == right.has_parent() && (!left.has_parent() || left.parent() == right.parent()); }
string LinuxLauncher::cgroup(const ContainerID& containerId) { return path::join(flags.cgroups_root, containerId.value()); }
inline std::size_t hash_value(const ContainerID& containerId) { size_t seed = 0; boost::hash_combine(seed, containerId.value()); return seed; }
inline bool operator==(const ContainerID& left, const ContainerID& right) { return left.value() == right.value(); }
Future<Nothing> NetworkCniIsolatorProcess::_attach( const ContainerID& containerId, const string& networkName, const string& plugin, const tuple<Future<Option<int>>, Future<string>>& t) { CHECK(infos.contains(containerId)); CHECK(infos[containerId]->containerNetworks.contains(networkName)); Future<Option<int>> status = std::get<0>(t); if (!status.isReady()) { return Failure( "Failed to get the exit status of the CNI plugin '" + plugin + "' subprocess: " + (status.isFailed() ? status.failure() : "discarded")); } if (status->isNone()) { return Failure( "Failed to reap the CNI plugin '" + plugin + "' subprocess"); } // CNI plugin will print result (in case of success) or error (in // case of failure) to stdout. Future<string> output = std::get<1>(t); if (!output.isReady()) { return Failure( "Failed to read stdout from the CNI plugin '" + plugin + "' subprocess: " + (output.isFailed() ? output.failure() : "discarded")); } if (status.get() != 0) { return Failure( "The CNI plugin '" + plugin + "' failed to attach container " + containerId.value() + " to CNI network '" + networkName + "': " + output.get()); } // Parse the output of CNI plugin. Try<spec::NetworkInfo> parse = spec::parseNetworkInfo(output.get()); if (parse.isError()) { return Failure( "Failed to parse the output of the CNI plugin '" + plugin + "': " + parse.error()); } if (parse.get().has_ip4()) { LOG(INFO) << "Got assigned IPv4 address '" << parse.get().ip4().ip() << "' from CNI network '" << networkName << "' for container " << containerId; } if (parse.get().has_ip6()) { LOG(INFO) << "Got assigned IPv6 address '" << parse.get().ip6().ip() << "' from CNI network '" << networkName << "' for container " << containerId; } // Checkpoint the output of CNI plugin. // The destruction of the container cannot happen in the middle of // 'attach()' and '_attach()' because the containerizer will wait // for 'isolate()' to finish before destroying the container. ContainerNetwork& containerNetwork = infos[containerId]->containerNetworks[networkName]; const string networkInfoPath = paths::getNetworkInfoPath( rootDir.get(), containerId.value(), networkName, containerNetwork.ifName); Try<Nothing> write = os::write(networkInfoPath, output.get()); if (write.isError()) { return Failure( "Failed to checkpoint the output of CNI plugin'" + output.get() + "': " + write.error()); } containerNetwork.cniNetworkInfo = parse.get(); return Nothing(); }