Exemple #1
0
Future<http::Response> RegistryClientProcess::doHttpGet(
    const http::URL& url,
    const Option<http::Headers>& headers,
    bool isStreaming,
    bool resend,
    const Option<string>& lastResponseStatus) const
{
  Future<http::Response> response;

  if (isStreaming) {
    response = process::http::streaming::get(url, headers);
  } else {
    response = process::http::get(url, headers);
  }

  return response
    .then(defer(self(), [=](const http::Response& httpResponse)
        -> Future<http::Response> {
      VLOG(1) << "Response status for url '" << url << "': "
              << httpResponse.status;

      // Set the future if we get a OK response.
      if (httpResponse.status == "200 OK") {
        return httpResponse;
      }

      if (httpResponse.status == "400 Bad Request") {
        return handleHttpBadResponse(httpResponse, isStreaming)
          .then([](const string& errorResponse) -> Future<http::Response> {
            return Failure(errorResponse);
          });
      }

      // Prevent infinite recursion.
      if (lastResponseStatus.isSome() &&
          (lastResponseStatus.get() == httpResponse.status)) {
        return Failure("Invalid response: " + httpResponse.status);
      }

      // If resend is not set, we dont try again and stop here.
      if (!resend) {
        return Failure("Bad response: " + httpResponse.status);
      }

      // Handle 401 Unauthorized.
      if (httpResponse.status == "401 Unauthorized") {
        return handleHttpUnauthResponse(
            httpResponse,
            url,
            isStreaming);
      }

      // Handle redirect.
      if (httpResponse.status == "307 Temporary Redirect") {
        return handleHttpRedirect(httpResponse, headers, isStreaming);
      }

      return Failure("Invalid response: " + httpResponse.status);
    }));
}
Exemple #2
0
Future<Nothing> NvidiaGpuIsolatorProcess::_update(
    const ContainerID& containerId,
    const set<Gpu>& allocation)
{
  if (!infos.contains(containerId)) {
    return Failure("Failed to complete GPU allocation: unknown container");
  }

  Info* info = CHECK_NOTNULL(infos.at(containerId));

  foreach (const Gpu& gpu, allocation) {
    cgroups::devices::Entry entry;
    entry.selector.type = Entry::Selector::Type::CHARACTER;
    entry.selector.major = gpu.major;
    entry.selector.minor = gpu.minor;
    entry.access.read = true;
    entry.access.write = true;
    entry.access.mknod = true;

    Try<Nothing> allow = cgroups::devices::allow(
        hierarchy, info->cgroup, entry);

    if (allow.isError()) {
      return Failure("Failed to grant cgroups access to GPU device"
                     " '" + stringify(entry) + "': " + allow.error());
    }
  }
Exemple #3
0
// Wait for a subprocess and test the status code for the following
// conditions of 'expected_status':
//   1. 'None' = Anything but '0'.
//   2. 'Some' = the value of 'expected_status'.
// Returns Nothing if the resulting status code matches the
// expectation otherwise a Failure with the output of the subprocess.
// TODO(jmlvanre): Turn this into a generally useful abstraction for
// gtest where we can have a more straigtforward 'expected_status'.
Future<Nothing> await_subprocess(
    const Subprocess& subprocess,
    const Option<int>& expected_status = None())
{
  // Dup the pipe fd of the subprocess so we can read the output if
  // needed.
  int out = dup(subprocess.out().get());

  // Once we get the status of the process.
  return subprocess.status()
    .then([=](const Option<int>& status) -> Future<Nothing> {
      // If the status is not set, fail out.
      if (status.isNone()) {
        return Failure("Subprocess status is none");
      }

      // If the status is not what we expect then fail out with the
      // output of the subprocess. The failure message will include
      // the assertion failures of the subprocess.
      if ((expected_status.isSome() && status.get() != expected_status.get()) ||
          (expected_status.isNone() && status.get() == 0)) {
        return io::read(out)
          .then([](const string& output) -> Future<Nothing> {
            return Failure("\n[++++++++++] Subprocess output.\n" + output +
                           "[++++++++++]\n");
          });
      }

      // If the subprocess ran successfully then return nothing.
      return Nothing();
    }).onAny([=]() {
      os::close(out);
    });
}
Exemple #4
0
Future<Nothing> HealthCheckerProcess::__tcpHealthCheck(
    const tuple<
        Future<Option<int>>,
        Future<string>,
        Future<string>>& t)
{
  Future<Option<int>> status = std::get<0>(t);
  if (!status.isReady()) {
    return Failure(
        "Failed to get the exit status of the " + string(TCP_CHECK_COMMAND) +
        " process: " + (status.isFailed() ? status.failure() : "discarded"));
  }

  if (status->isNone()) {
    return Failure(
        "Failed to reap the " + string(TCP_CHECK_COMMAND) + " process");
  }

  int statusCode = status->get();
  if (statusCode != 0) {
    Future<string> error = std::get<2>(t);
    if (!error.isReady()) {
      return Failure(
          string(TCP_CHECK_COMMAND) + " returned " +
          WSTRINGIFY(statusCode) + "; reading stderr failed: " +
          (error.isFailed() ? error.failure() : "discarded"));
    }

    return Failure(
        string(TCP_CHECK_COMMAND) + " returned " +
        WSTRINGIFY(statusCode) + ": " + error.get());
  }

  return Nothing();
}
Exemple #5
0
Future<bool> ProvisionerProcess::_destroy(
    const ContainerID& containerId,
    const list<Future<bool>>& destroys)
{
  CHECK(infos.contains(containerId));
  CHECK(infos[containerId]->destroying);

  vector<string> errors;
  foreach (const Future<bool>& future, destroys) {
    if (!future.isReady()) {
      errors.push_back(future.isFailed()
        ? future.failure()
        : "discarded");
    }
  }

  if (!errors.empty()) {
    ++metrics.remove_container_errors;

    return Failure(
        "Failed to destory nested containers: " +
        strings::join("; ", errors));
  }

  const Owned<Info>& info = infos[containerId];

  list<Future<bool>> futures;
  foreachkey (const string& backend, info->rootfses) {
    if (!backends.contains(backend)) {
      return Failure("Unknown backend '" + backend + "'");
    }

    foreach (const string& rootfsId, info->rootfses[backend]) {
      string rootfs = provisioner::paths::getContainerRootfsDir(
          rootDir,
          containerId,
          backend,
          rootfsId);

      string backendDir = provisioner::paths::getBackendDir(
          rootDir,
          containerId,
          backend);

      LOG(INFO) << "Destroying container rootfs at '" << rootfs
                << "' for container " << containerId;

      futures.push_back(
          backends.get(backend).get()->destroy(rootfs, backendDir));
    }
  }

  // TODO(xujyan): Revisit the usefulness of this return value.
  return collect(futures)
    .then(defer(self(), &ProvisionerProcess::__destroy, containerId));
}
Exemple #6
0
Future<Nothing> NetworkCniIsolatorProcess::_detach(
    const ContainerID& containerId,
    const std::string& networkName,
    const string& plugin,
    const tuple<Future<Option<int>>, Future<string>>& t)
{
  CHECK(infos.contains(containerId));
  CHECK(infos[containerId]->containerNetworks.contains(networkName));

  Future<Option<int>> status = std::get<0>(t);
  if (!status.isReady()) {
    return Failure(
        "Failed to get the exit status of the CNI plugin '" +
        plugin + "' subprocess: " +
        (status.isFailed() ? status.failure() : "discarded"));
  }

  if (status->isNone()) {
    return Failure(
        "Failed to reap the CNI plugin '" + plugin + "' subprocess");
  }

  if (status.get() == 0) {
    const string ifDir = paths::getInterfaceDir(
        rootDir.get(),
        containerId.value(),
        networkName,
        infos[containerId]->containerNetworks[networkName].ifName);

    Try<Nothing> rmdir = os::rmdir(ifDir);
    if (rmdir.isError()) {
      return Failure(
          "Failed to remove interface directory '" +
          ifDir + "': " + rmdir.error());
    }

    return Nothing();
  }

  // CNI plugin will print result (in case of success) or error (in
  // case of failure) to stdout.
  Future<string> output = std::get<1>(t);
  if (!output.isReady()) {
    return Failure(
        "Failed to read stdout from the CNI plugin '" +
        plugin + "' subprocess: " +
        (output.isFailed() ? output.failure() : "discarded"));
  }

  return Failure(
      "The CNI plugin '" + plugin + "' failed to detach container "
      "from network '" + networkName + "': " + output.get());
}
Exemple #7
0
Future<Nothing> NetworkCniIsolatorProcess::_cleanup(
    const ContainerID& containerId,
    const list<Future<Nothing>>& detaches)
{
  CHECK(infos.contains(containerId));

  vector<string> messages;
  foreach (const Future<Nothing>& detach, detaches) {
    if (!detach.isReady()) {
      messages.push_back(
          detach.isFailed() ? detach.failure() : "discarded");
    }
  }

  if (!messages.empty()) {
    return Failure(strings::join("\n", messages));
  }

  const string containerDir =
      paths::getContainerDir(rootDir.get(), containerId.value());

  const string target =
      paths::getNamespacePath(rootDir.get(), containerId.value());

  if (os::exists(target)) {
    Try<Nothing> unmount = fs::unmount(target);
    if (unmount.isError()) {
      return Failure(
          "Failed to unmount the network namespace handle '" +
          target + "': " + unmount.error());
    }

    LOG(INFO) << "Unmounted the network namespace handle '"
              << target << "' for container " << containerId;
  }

  Try<Nothing> rmdir = os::rmdir(containerDir);
  if (rmdir.isError()) {
    return Failure(
        "Failed to remove the container directory '" +
        containerDir + "': " + rmdir.error());
  }

  LOG(INFO) << "Removed the container directory '" << containerDir << "'";

  infos.erase(containerId);

  return Nothing();
}
Exemple #8
0
Future<Option<ContainerLaunchInfo>> CgroupsNetClsIsolatorProcess::prepare(
    const ContainerID& containerId,
    const ContainerConfig& containerConfig)
{
  if (infos.contains(containerId)) {
    return Failure("Container has already been prepared");
  }

  // Use this info to create the cgroup, but do not insert it into
  // infos till the cgroup has been created successfully.
  Info info(path::join(flags.cgroups_root, containerId.value()));

  // Create a cgroup for this container.
  Try<bool> exists = cgroups::exists(hierarchy, info.cgroup);
  if (exists.isError()) {
    return Failure("Failed to check if the cgroup already exists: " +
                   exists.error());
  } else if (exists.get()) {
    return Failure("The cgroup already exists");
  }

  Try<Nothing> create = cgroups::create(hierarchy, info.cgroup);
  if (create.isError()) {
    return Failure("Failed to create the cgroup: " + create.error());
  }

  // 'chown' the cgroup so the executor can create nested cgroups. Do
  // not recurse so the control files are still owned by the slave
  // user and thus cannot be changed by the executor.
  if (containerConfig.has_user()) {
    Try<Nothing> chown = os::chown(
        containerConfig.user(),
        path::join(hierarchy, info.cgroup),
        false);

    if (chown.isError()) {
      return Failure("Failed to change ownership of cgroup hierarchy: " +
                     chown.error());
    }
  }

  infos.emplace(containerId, info);

  return update(containerId, containerConfig.executorinfo().resources())
    .then([]() -> Future<Option<ContainerLaunchInfo>> {
      return None();
    });
}
Exemple #9
0
Future<Nothing> NetworkCniIsolatorProcess::detach(
    const ContainerID& containerId,
    const std::string& networkName)
{
  CHECK(infos.contains(containerId));
  CHECK(infos[containerId]->containerNetworks.contains(networkName));

  const ContainerNetwork& containerNetwork =
      infos[containerId]->containerNetworks[networkName];

  // Prepare environment variables for CNI plugin.
  map<string, string> environment;
  environment["CNI_COMMAND"] = "DEL";
  environment["CNI_CONTAINERID"] = containerId.value();
  environment["CNI_PATH"] = pluginDir.get();
  environment["CNI_IFNAME"] = containerNetwork.ifName;
  environment["CNI_NETNS"] =
      paths::getNamespacePath(rootDir.get(), containerId.value());

  // Some CNI plugins need to run "iptables" to set up IP Masquerade, so we
  // need to set the "PATH" environment variable so that the plugin can locate
  // the "iptables" executable file.
  Option<string> value = os::getenv("PATH");
  if (value.isSome()) {
    environment["PATH"] = value.get();
  } else {
    environment["PATH"] =
        "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
  }

  const NetworkConfigInfo& networkConfig = networkConfigs[networkName];

  // Invoke the CNI plugin.
  const string& plugin = networkConfig.config.type();
  Try<Subprocess> s = subprocess(
      path::join(pluginDir.get(), plugin),
      {plugin},
      Subprocess::PATH(networkConfig.path),
      Subprocess::PIPE(),
      Subprocess::PATH("/dev/null"),
      NO_SETSID,
      None(),
      environment);

  if (s.isError()) {
    return Failure(
        "Failed to execute the CNI plugin '" + plugin + "': " + s.error());
  }

  return await(s->status(), io::read(s->out().get()))
    .then(defer(
        PID<NetworkCniIsolatorProcess>(this),
        &NetworkCniIsolatorProcess::_detach,
        containerId,
        networkName,
        plugin,
        lambda::_1));
}
Exemple #10
0
// The net_cls handles are labels and hence there are no limitations associated
// with them . This function would therefore always return a pending future
// since the limitation is never reached.
Future<ContainerLimitation> CgroupsNetClsIsolatorProcess::watch(
    const ContainerID& containerId)
{
  if (!infos.contains(containerId)) {
    return Failure("Unknown container");
  }

  return Future<ContainerLimitation>();
}
Exemple #11
0
// The net_cls handles aren't treated as resources. Further, they have fixed
// values and hence don't have a notion of usage. We are therefore returning an
// empty 'ResourceStatistics' object.
Future<ResourceStatistics> CgroupsNetClsIsolatorProcess::usage(
    const ContainerID& containerId)
{
  if (!infos.contains(containerId)) {
    return Failure("Unknown container");
  }

  return ResourceStatistics();
}
Exemple #12
0
Future<bool> RegistrarProcess::apply(Owned<Operation> operation)
{
  if (recovered.isNone()) {
    return Failure("Attempted to apply the operation before recovering");
  }

  return recovered.get()->future()
    .then(defer(self(), &Self::_apply, operation));
}
Exemple #13
0
Future<Nothing> Fetcher::fetch(
    const URI& uri,
    const string& directory) const
{
  if (!plugins.contains(uri.scheme())) {
    return Failure("Scheme '" + uri.scheme() + "' is not supported");
  }

  return plugins.at(uri.scheme())->fetch(uri, directory);
}
Exemple #14
0
Future<Nothing> HealthCheckerProcess::__httpHealthCheck(
    const tuple<
        Future<Option<int>>,
        Future<string>,
        Future<string>>& t)
{
  Future<Option<int>> status = std::get<0>(t);
  if (!status.isReady()) {
    return Failure(
        "Failed to get the exit status of the " + string(HTTP_CHECK_COMMAND) +
        " process: " + (status.isFailed() ? status.failure() : "discarded"));
  }

  if (status->isNone()) {
    return Failure(
        "Failed to reap the " + string(HTTP_CHECK_COMMAND) + " process");
  }

  int statusCode = status->get();
  if (statusCode != 0) {
    Future<string> error = std::get<2>(t);
    if (!error.isReady()) {
      return Failure(
          string(HTTP_CHECK_COMMAND) + " returned " +
          WSTRINGIFY(statusCode) + "; reading stderr failed: " +
          (error.isFailed() ? error.failure() : "discarded"));
    }

    return Failure(
        string(HTTP_CHECK_COMMAND) + " returned " +
        WSTRINGIFY(statusCode) + ": " + error.get());
  }

  Future<string> output = std::get<1>(t);
  if (!output.isReady()) {
    return Failure(
        "Failed to read stdout from " + string(HTTP_CHECK_COMMAND) + ": " +
        (output.isFailed() ? output.failure() : "discarded"));
  }

  // Parse the output and get the HTTP response code.
  Try<int> code = numify<int>(output.get());
  if (code.isError()) {
    return Failure(
        "Unexpected output from " + string(HTTP_CHECK_COMMAND) + ": " +
        output.get());
  }

  if (code.get() < process::http::Status::OK ||
      code.get() >= process::http::Status::BAD_REQUEST) {
    return Failure(
        "Unexpected HTTP response code: " +
        process::http::Status::string(code.get()));
  }

  return Nothing();
}
Exemple #15
0
Future<Nothing> CgroupsNetClsIsolatorProcess::isolate(
    const ContainerID& containerId,
    pid_t pid)
{
  if (!infos.contains(containerId)) {
    return Failure("Unknown container");
  }

  const Info& info = infos.at(containerId);

  Try<Nothing> assign = cgroups::assign(hierarchy, info.cgroup, pid);
  if (assign.isError()) {
    return Failure("Failed to assign container '" +
                   stringify(containerId) + "' to its own cgroup '" +
                   path::join(hierarchy, info.cgroup) +
                   "': " + assign.error());
  }

  return Nothing();
}
Exemple #16
0
Future<bool> RegistrarProcess::_apply(Owned<Operation> operation)
{
  if (error.isSome()) {
    return Failure(error.get());
  }

  CHECK_SOME(variable);

  operations.push_back(operation);
  Future<bool> future = operation->future();
  if (!updating) {
    update();
  }
  return future;
}
Exemple #17
0
Future<vector<string>> StoreProcess::fetchDependencies(
    const string& imageId,
    bool cached)
{
  const string imagePath = paths::getImagePath(rootDir, imageId);

  Try<spec::ImageManifest> manifest = spec::getManifest(imagePath);
  if (manifest.isError()) {
    return Failure(
        "Failed to get dependencies for image id '" + imageId +
        "': " + manifest.error());
  }

  vector<Image::Appc> dependencies;
  foreach (const spec::ImageManifest::Dependency& dependency,
           manifest->dependencies()) {
    Image::Appc appc;
    appc.set_name(dependency.imagename());
    if (dependency.has_imageid()) {
      appc.set_id(dependency.imageid());
    }

    // TODO(jojy): Make Image::Appc use appc::spec::Label instead of
    // mesos::Label so that we can avoid this loop here.
    foreach (const spec::ImageManifest::Label& label, dependency.labels()) {
      mesos::Label appcLabel;
      appcLabel.set_key(label.name());
      appcLabel.set_value(label.value());

      appc.mutable_labels()->add_labels()->CopyFrom(appcLabel);
    }

    dependencies.emplace_back(appc);
  }

  if (dependencies.size() == 0) {
    return vector<string>();
  }

  // Do a depth first search.
  vector<Future<vector<string>>> futures;
  futures.reserve(dependencies.size());
  foreach (const Image::Appc& appc, dependencies) {
    futures.emplace_back(fetchImage(appc, cached));
  }
  // In this hook, we check for the presence of a label, and if set
  // we return a failure, effectively failing the container creation.
  // Otherwise we add an environment variable to the executor and task.
  // Additionally, this hook creates a file named "foo" in the container
  // work directory (sandbox).
  Future<Option<DockerTaskExecutorPrepareInfo>>
    slavePreLaunchDockerTaskExecutorDecorator(
        const Option<TaskInfo>& taskInfo,
        const ExecutorInfo& executorInfo,
        const string& containerName,
        const string& containerWorkDirectory,
        const string& mappedSandboxDirectory,
        const Option<map<string, string>>& env) override
  {
    LOG(INFO) << "Executing 'slavePreLaunchDockerTaskExecutorDecorator' hook";

    if (taskInfo.isSome()) {
      foreach (const Label& label, taskInfo->labels().labels()) {
        if (label.key() == testErrorLabelKey) {
          return Failure("Spotted error label");
        }
      }
    }
Exemple #19
0
Future<ProvisionInfo> ProvisionerProcess::provision(
    const ContainerID& containerId,
    const Image& image)
{
  if (!stores.contains(image.type())) {
    return Failure(
        "Unsupported container image type: " +
        stringify(image.type()));
  }

  // Get and then provision image layers from the store.
  return stores.get(image.type()).get()->get(image, defaultBackend)
    .then(defer(self(),
                &Self::_provision,
                containerId,
                image,
                defaultBackend,
                lambda::_1));
}
Exemple #20
0
Future<Nothing> MetadataManagerProcess::recover()
{
  string storedImagesPath = paths::getStoredImagesPath(flags.docker_store_dir);

  if (!os::exists(storedImagesPath)) {
    LOG(INFO) << "No images to load from disk. Docker provisioner image "
              << "storage path '" << storedImagesPath << "' does not exist";
    return Nothing();
  }

  Result<Images> images = state::read<Images>(storedImagesPath);
  if (images.isError()) {
    return Failure("Failed to read images from '" + storedImagesPath + "' " +
                   images.error());
  }

  if (images.isNone()) {
    // This could happen if the slave died after opening the file for
    // writing but before persisted on disk.
    LOG(WARNING) << "The images file '" << storedImagesPath << "' is empty";

    return Nothing();
  }

  foreach (const Image& image, images->images()) {
    const string imageReference = stringify(image.reference());

    if (storedImages.contains(imageReference)) {
      LOG(WARNING) << "Found duplicate image in recovery for image reference '"
                   << imageReference << "'";
    } else {
      storedImages[imageReference] = image;
    }

    VLOG(1) << "Successfully loaded image '" << imageReference << "'";
  }

  LOG(INFO) << "Successfully loaded " << storedImages.size()
            << " Docker images";

  return Nothing();
}
Exemple #21
0
Future<Manifest> RegistryClientProcess::getManifest(
    const Image::Name& imageName)
{
  http::URL manifestURL(registryServer_);
  manifestURL.path =
    "v2/" + imageName.repository() + "/manifests/" + imageName.tag();

  return doHttpGet(manifestURL, None(), false, true, None())
    .then(defer(self(), [this] (
        const http::Response& response) -> Future<Manifest> {
      // TODO(jojy): We dont use the digest that is returned in header.
      // This is a good place to validate the manifest.

      Try<Manifest> manifest = Manifest::create(response.body);
      if (manifest.isError()) {
        return Failure(
            "Failed to parse manifest response: " + manifest.error());
      }

      return manifest.get();
    }));
}
Exemple #22
0
Future<vector<string>> RegistryPullerProcess::__pull(
    const spec::ImageReference& reference,
    const string& directory,
    const spec::v2::ImageManifest& manifest,
    const hashset<string>& blobSums)
{
  vector<string> layerIds;
  list<Future<Nothing>> futures;

  for (int i = 0; i < manifest.fslayers_size(); i++) {
    CHECK(manifest.history(i).has_v1());
    const spec::v1::ImageManifest& v1 = manifest.history(i).v1();
    const string& blobSum = manifest.fslayers(i).blobsum();

    // NOTE: We put parent layer ids in front because that's what the
    // provisioner backends assume.
    layerIds.insert(layerIds.begin(), v1.id());

    // Skip if the layer is already in the store.
    if (os::exists(paths::getImageLayerPath(storeDir, v1.id()))) {
      continue;
    }

    const string layerPath = path::join(directory, v1.id());
    const string tar = path::join(directory, blobSum);
    const string rootfs = paths::getImageLayerRootfsPath(layerPath);
    const string json = paths::getImageLayerManifestPath(layerPath);

    VLOG(1) << "Extracting layer tar ball '" << tar
            << " to rootfs '" << rootfs << "'";

    // NOTE: This will create 'layerPath' as well.
    Try<Nothing> mkdir = os::mkdir(rootfs, true);
    if (mkdir.isError()) {
      return Failure(
          "Failed to create rootfs directory '" + rootfs + "' "
          "for layer '" + v1.id() + "': " + mkdir.error());
    }

    Try<Nothing> write = os::write(json, stringify(JSON::protobuf(v1)));
    if (write.isError()) {
      return Failure(
          "Failed to save the layer manifest for layer '" +
          v1.id() + "': " + write.error());
    }

    futures.push_back(command::untar(Path(tar), Path(rootfs)));
  }

  return collect(futures)
    .then([=]() -> Future<vector<string>> {
      // Remove the tarballs after the extraction.
      foreach (const string& blobSum, blobSums) {
        const string tar = path::join(directory, blobSum);

        Try<Nothing> rm = os::rm(tar);
        if (rm.isError()) {
          return Failure(
              "Failed to remove '" + tar + "' "
              "after extraction: " + rm.error());
        }
      }

      return layerIds;
    });
Exemple #23
0
Future<Nothing> NvidiaGpuIsolatorProcess::update(
    const ContainerID& containerId,
    const Resources& resources)
{
  if (containerId.has_parent()) {
    return Failure("Not supported for nested containers");
  }

  if (!infos.contains(containerId)) {
    return Failure("Unknown container");
  }

  Info* info = CHECK_NOTNULL(infos[containerId]);

  Option<double> gpus = resources.gpus();

  // Make sure that the `gpus` resource is not fractional.
  // We rely on scalar resources only having 3 digits of precision.
  if (static_cast<long long>(gpus.getOrElse(0.0) * 1000.0) % 1000 != 0) {
    return Failure("The 'gpus' resource must be an unsigned integer");
  }

  size_t requested = static_cast<size_t>(resources.gpus().getOrElse(0.0));

  // Update the GPU allocation to reflect the new total.
  if (requested > info->allocated.size()) {
    size_t additional = requested - info->allocated.size();

    return allocator.allocate(additional)
      .then(defer(PID<NvidiaGpuIsolatorProcess>(this),
                  &NvidiaGpuIsolatorProcess::_update,
                  containerId,
                  lambda::_1));
  } else if (requested < info->allocated.size()) {
    size_t fewer = info->allocated.size() - requested;

    set<Gpu> deallocated;

    for (size_t i = 0; i < fewer; i++) {
      const auto gpu = info->allocated.begin();

      cgroups::devices::Entry entry;
      entry.selector.type = Entry::Selector::Type::CHARACTER;
      entry.selector.major = gpu->major;
      entry.selector.minor = gpu->minor;
      entry.access.read = true;
      entry.access.write = true;
      entry.access.mknod = true;

      Try<Nothing> deny = cgroups::devices::deny(
          hierarchy, info->cgroup, entry);

      if (deny.isError()) {
        return Failure("Failed to deny cgroups access to GPU device"
                       " '" + stringify(entry) + "': " + deny.error());
      }

      deallocated.insert(*gpu);
      info->allocated.erase(gpu);
    }

    return allocator.deallocate(deallocated);
  }

  return Nothing();
}
 Future<double> fail()
 {
   return Failure("failure");
 }
Exemple #25
0
Future<int> CheckerProcess::commandCheck(
    const check::Command& cmd,
    const runtime::Plain& plain)
{
  const CommandInfo& command = cmd.info;

  map<string, string> environment = os::environment();

  foreach (const Environment::Variable& variable,
           command.environment().variables()) {
    environment[variable.name()] = variable.value();
  }

  // Launch the subprocess.
  Try<Subprocess> s = Error("Not launched");

  if (command.shell()) {
    // Use the shell variant.
    VLOG(1) << "Launching " << name << " '" << command.value() << "'"
            << " for task '" << taskId << "'";

    s = process::subprocess(
        command.value(),
        Subprocess::PATH(os::DEV_NULL),
        Subprocess::FD(STDERR_FILENO),
        Subprocess::FD(STDERR_FILENO),
        environment,
        getCustomCloneFunc(plain));
  } else {
    // Use the exec variant.
    vector<string> argv(
        std::begin(command.arguments()), std::end(command.arguments()));

    VLOG(1) << "Launching " << name << " [" << command.value() << ", "
            << strings::join(", ", argv) << "] for task '" << taskId << "'";

    s = process::subprocess(
        command.value(),
        argv,
        Subprocess::PATH(os::DEV_NULL),
        Subprocess::FD(STDERR_FILENO),
        Subprocess::FD(STDERR_FILENO),
        nullptr,
        environment,
        getCustomCloneFunc(plain));
  }

  if (s.isError()) {
    return Failure("Failed to create subprocess: " + s.error());
  }

  // TODO(alexr): Use lambda named captures for
  // these cached values once it is available.
  const pid_t commandPid = s->pid();
  const string _name = name;
  const Duration timeout = checkTimeout;
  const TaskID _taskId = taskId;

  return s->status()
    .after(
        timeout,
        [timeout, commandPid, _name, _taskId](Future<Option<int>> future)
    {
      future.discard();

      if (commandPid != -1) {
        // Cleanup the external command process.
        VLOG(1) << "Killing the " << _name << " process '" << commandPid
                << "' for task '" << _taskId << "'";

        os::killtree(commandPid, SIGKILL);
      }

      return Failure("Command timed out after " + stringify(timeout));
    })
    .then([](const Option<int>& exitCode) -> Future<int> {
      if (exitCode.isNone()) {
        return Failure("Failed to reap the command process");
      }

      return exitCode.get();
    });
}
Exemple #26
0
Future<size_t> RegistryClientProcess::getBlob(
    const string& path,
    const Option<string>& digest,
    const Path& filePath)
{
  const string dirName = filePath.dirname();

  Try<Nothing> mkdir = os::mkdir(dirName, true);
  if (mkdir.isError()) {
    return Failure(
        "Failed to create directory to download blob: " + mkdir.error());
  }

  if (strings::contains(path, " ")) {
    return Failure("Invalid repository path: " + path);
  }

  const string blobURLPath = "v2/" + path + "/blobs/" + digest.getOrElse("");

  http::URL blobURL(registryServer_);
  blobURL.path = blobURLPath;

  return doHttpGet(blobURL, None(), true, true, None())
    .then([this, blobURLPath, digest, filePath](
        const http::Response& response) -> Future<size_t> {
      Try<int> fd = os::open(
          filePath.value,
          O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC,
          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);

      if (fd.isError()) {
        return Failure("Failed to open file '" + filePath.value + "': " +
                       fd.error());
      }

      Try<Nothing> nonblock = os::nonblock(fd.get());
      if (nonblock.isError()) {
        Try<Nothing> close = os::close(fd.get());
        if (close.isError()) {
          LOG(WARNING) << "Failed to close the file descriptor for file '"
                       << stringify(filePath) << "': " << close.error();
        }

        return Failure(
            "Failed to set non-blocking mode for file: " + filePath.value);
      }

      // TODO(jojy): Add blob validation.
      // TODO(jojy): Add check for max size.

      Option<Pipe::Reader> reader = response.reader;
      if (reader.isNone()) {
        Try<Nothing> close = os::close(fd.get());
        if (close.isError()) {
          LOG(WARNING) << "Failed to close the file descriptor for file '"
                       << stringify(filePath) << "': " << close.error();
        }

        return Failure("Failed to get streaming reader from blob response");
      }

      return saveBlob(fd.get(), reader.get())
        .onAny([blobURLPath, digest, filePath, fd](
            const Future<size_t>& future) {
          Try<Nothing> close = os::close(fd.get());
          if (close.isError()) {
            LOG(WARNING) << "Failed to close the file descriptor for blob '"
                         << stringify(filePath) << "': " << close.error();
          }

          if (future.isFailed()) {
            LOG(WARNING) << "Failed to save blob requested from '"
                         << blobURLPath << "' to path '"
                         << stringify(filePath) << "': " << future.failure();
          }

          if (future.isDiscarded()) {
            LOG(WARNING) << "Failed to save blob requested from '"
                         << blobURLPath << "' to path '" << stringify(filePath)
                         << "': future discarded";
          }
        });
    });
}
Exemple #27
0
Future<string> StoreProcess::_fetchImage(const Image::Appc& appc)
{
  VLOG(1) << "Fetching image '" << appc.name() << "'";

  Try<string> _tmpFetchDir = os::mkdtemp(
      path::join(paths::getStagingDir(rootDir), "XXXXXX"));

  if (_tmpFetchDir.isError()) {
    return Failure(
        "Failed to create temporary fetch directory for image '" +
        appc.name() + "': " + _tmpFetchDir.error());
  }

  const string tmpFetchDir = _tmpFetchDir.get();

  return fetcher->fetch(appc, Path(tmpFetchDir))
    .then(defer(self(), [=]() -> Future<string> {
      Try<list<string>> imageIds = os::ls(tmpFetchDir);
      if (imageIds.isError()) {
        return Failure(
            "Failed to list images under '" + tmpFetchDir +
            "': " + imageIds.error());
      }

      if (imageIds->size() != 1) {
        return Failure(
            "Unexpected number of images under '" + tmpFetchDir +
            "': " + stringify(imageIds->size()));
      }

      const string& imageId = imageIds->front();
      const string source = path::join(tmpFetchDir, imageId);
      const string target = paths::getImagePath(rootDir, imageId);

      if (os::exists(target)) {
        LOG(WARNING) << "Image id '" << imageId
                     << "' already exists in the store";
      } else {
        Try<Nothing> rename = os::rename(source, target);
        if (rename.isError()) {
          return Failure(
              "Failed to rename directory '" + source +
              "' to '" + target + "': " + rename.error());
        }
      }

      Try<Nothing> addCache = cache->add(imageId);
      if (addCache.isError()) {
        return Failure(
            "Failed to add image '" + appc.name() + "' with image id '" +
            imageId + "' to the cache: " + addCache.error());
      }

      Try<Nothing> rmdir = os::rmdir(tmpFetchDir);
      if (rmdir.isError()) {
        return Failure(
            "Failed to remove temporary fetch directory '" +
            tmpFetchDir + "' for image '" + appc.name() + "': " +
            rmdir.error());
      }

      return imageId;
    }));
}
Exemple #28
0
Future<Nothing> HealthCheckerProcess::_commandHealthCheck()
{
  CHECK_EQ(HealthCheck::COMMAND, check.type());
  CHECK(check.has_command());

  const CommandInfo& command = check.command();

  map<string, string> environment = os::environment();

  foreach (const Environment::Variable& variable,
           command.environment().variables()) {
    environment[variable.name()] = variable.value();
  }

  // Launch the subprocess.
  Try<Subprocess> external = Error("Not launched");

  if (command.shell()) {
    // Use the shell variant.
    VLOG(1) << "Launching command health check '" << command.value() << "'";

    external = subprocess(
        command.value(),
        Subprocess::PATH("/dev/null"),
        Subprocess::FD(STDERR_FILENO),
        Subprocess::FD(STDERR_FILENO),
        environment,
        clone);
  } else {
    // Use the exec variant.
    vector<string> argv;
    foreach (const string& arg, command.arguments()) {
      argv.push_back(arg);
    }

    VLOG(1) << "Launching command health check [" << command.value() << ", "
            << strings::join(", ", argv) << "]";

    external = subprocess(
        command.value(),
        argv,
        Subprocess::PATH("/dev/null"),
        Subprocess::FD(STDERR_FILENO),
        Subprocess::FD(STDERR_FILENO),
        nullptr,
        environment,
        clone);
  }

  if (external.isError()) {
    return Failure("Failed to create subprocess: " + external.error());
  }

  pid_t commandPid = external->pid();
  Duration timeout = Seconds(static_cast<int64_t>(check.timeout_seconds()));

  return external->status()
    .after(timeout, [timeout, commandPid](Future<Option<int>> future) {
      future.discard();

      if (commandPid != -1) {
        // Cleanup the external command process.
        VLOG(1) << "Killing the command health check process " << commandPid;

        os::killtree(commandPid, SIGKILL);
      }

      return Failure(
          "Command has not returned after " + stringify(timeout) +
          "; aborting");
    })
    .then([](const Option<int>& status) -> Future<Nothing> {
      if (status.isNone()) {
        return Failure("Failed to reap the command process");
      }

      int statusCode = status.get();
      if (statusCode != 0) {
        return Failure("Command returned " + WSTRINGIFY(statusCode));
      }

      return Nothing();
    });
}
Exemple #29
0
Future<Nothing> HealthCheckerProcess::_tcpHealthCheck()
{
  CHECK_EQ(HealthCheck::TCP, check.type());
  CHECK(check.has_tcp());

  // TCP_CHECK_COMMAND should be reachable.
  CHECK(os::exists(launcherDir));

  const HealthCheck::TCPCheckInfo& tcp = check.tcp();

  VLOG(1) << "Launching TCP health check at port '" << tcp.port() << "'";

  const string tcpConnectPath = path::join(launcherDir, TCP_CHECK_COMMAND);

  const vector<string> tcpConnectArguments = {
    tcpConnectPath,
    "--ip=" + DEFAULT_DOMAIN,
    "--port=" + stringify(tcp.port())
  };

  Try<Subprocess> s = subprocess(
      tcpConnectPath,
      tcpConnectArguments,
      Subprocess::PATH("/dev/null"),
      Subprocess::PIPE(),
      Subprocess::PIPE(),
      nullptr,
      None(),
      clone);

  if (s.isError()) {
    return Failure(
        "Failed to create the " + string(TCP_CHECK_COMMAND) +
        " subprocess: " + s.error());
  }

  pid_t tcpConnectPid = s->pid();
  Duration timeout = Seconds(static_cast<int64_t>(check.timeout_seconds()));

  return await(
      s->status(),
      process::io::read(s->out().get()),
      process::io::read(s->err().get()))
    .after(timeout,
      [timeout, tcpConnectPid](Future<tuple<Future<Option<int>>,
                                            Future<string>,
                                            Future<string>>> future) {
      future.discard();

      if (tcpConnectPid != -1) {
        // Cleanup the TCP_CHECK_COMMAND process.
        VLOG(1) << "Killing the TCP health check process " << tcpConnectPid;

        os::killtree(tcpConnectPid, SIGKILL);
      }

      return Failure(
          string(TCP_CHECK_COMMAND) + " has not returned after " +
          stringify(timeout) + "; aborting");
    })
    .then(defer(self(), &Self::__tcpHealthCheck, lambda::_1));
}
Exemple #30
0
Future<Nothing> HealthCheckerProcess::_httpHealthCheck()
{
  CHECK_EQ(HealthCheck::HTTP, check.type());
  CHECK(check.has_http());

  const HealthCheck::HTTPCheckInfo& http = check.http();

  const string scheme = http.has_scheme() ? http.scheme() : DEFAULT_HTTP_SCHEME;
  const string path = http.has_path() ? http.path() : "";
  const string url = scheme + "://" + DEFAULT_DOMAIN + ":" +
                     stringify(http.port()) + path;

  VLOG(1) << "Launching HTTP health check '" << url << "'";

  const vector<string> argv = {
    HTTP_CHECK_COMMAND,
    "-s",                 // Don't show progress meter or error messages.
    "-S",                 // Makes curl show an error message if it fails.
    "-L",                 // Follows HTTP 3xx redirects.
    "-k",                 // Ignores SSL validation when scheme is https.
    "-w", "%{http_code}", // Displays HTTP response code on stdout.
    "-o", "/dev/null",    // Ignores output.
    url
  };

  Try<Subprocess> s = subprocess(
      HTTP_CHECK_COMMAND,
      argv,
      Subprocess::PATH("/dev/null"),
      Subprocess::PIPE(),
      Subprocess::PIPE(),
      nullptr,
      None(),
      clone);

  if (s.isError()) {
    return Failure(
        "Failed to create the " + string(HTTP_CHECK_COMMAND) +
        " subprocess: " + s.error());
  }

  pid_t curlPid = s->pid();
  Duration timeout = Seconds(static_cast<int64_t>(check.timeout_seconds()));

  return await(
      s->status(),
      process::io::read(s->out().get()),
      process::io::read(s->err().get()))
    .after(timeout,
      [timeout, curlPid](Future<tuple<Future<Option<int>>,
                                      Future<string>,
                                      Future<string>>> future) {
      future.discard();

      if (curlPid != -1) {
        // Cleanup the HTTP_CHECK_COMMAND process.
        VLOG(1) << "Killing the HTTP health check process " << curlPid;

        os::killtree(curlPid, SIGKILL);
      }

      return Failure(
          string(HTTP_CHECK_COMMAND) + " has not returned after " +
          stringify(timeout) + "; aborting");
    })
    .then(defer(self(), &Self::__httpHealthCheck, lambda::_1));
}