Example #1
0
TEST_F(DiskResourcesTest, Addition)
{
  Resources r1 = createDiskResource("10", "role", None(), "path");
  Resources r2 = createDiskResource("10", "role", None(), None());
  Resources r3 = createDiskResource("20", "role", None(), "path");

  EXPECT_EQ(r3, r1 + r2);

  Resources r4 = createDiskResource("10", "role", "1", "path");
  Resources r5 = createDiskResource("10", "role", "2", "path");
  Resources r6 = createDiskResource("20", "role", "1", "path");

  Resources sum = r4 + r5;

  EXPECT_TRUE(sum.contains(r4));
  EXPECT_TRUE(sum.contains(r5));
  EXPECT_FALSE(sum.contains(r3));
  EXPECT_FALSE(sum.contains(r6));
}
Example #2
0
 void selfs_free(void* ptr) {
   int32* p = (int32*)ptr;
   TrackCHeapInMonitor::adjust(-true_size_of_malloced_obj(p));
 # if GENERATE_DEBUGGING_AIDS
   if (CheckAssertions) {
     if (resources.contains((char*)p)) 
       fatal("should not delete resource object");
     // cannot use assert because printf uses malloc() ...
     if ( true_size_of_malloced_obj(p) < 0 
     ||   TrackCHeapInMonitor::allocated() < 0 )
       breakpoint();
     if (ptr == (void*)catchThisOne) 
       breakpoint();
   }
 # endif
   if (MallocInProgress) fatal("malloc/free are't reentrant");
   MallocInProgress = true;              // for hprofiler
   free(ptr);
   MallocInProgress = false;
 }
Example #3
0
Future<Nothing> PosixFilesystemIsolatorProcess::update(
    const ContainerID& containerId,
    const Resources& resources)
{
  if (!infos.contains(containerId)) {
    return Failure("Unknown container");
  }

  const Owned<Info>& info = infos[containerId];

  // TODO(jieyu): Currently, we only allow non-nested relative
  // container paths for volumes. This is enforced by the master. For
  // those volumes, we create symlinks in the executor directory.
  Resources current = info->resources;

  // We first remove unneeded persistent volumes.
  foreach (const Resource& resource, current.persistentVolumes()) {
    // This is enforced by the master.
    CHECK(resource.disk().has_volume());

    // Ignore absolute and nested paths.
    const string& containerPath = resource.disk().volume().container_path();
    if (strings::contains(containerPath, "/")) {
      LOG(WARNING) << "Skipping updating symlink for persistent volume "
                   << resource << " of container " << containerId
                   << " because the container path '" << containerPath
                   << "' contains slash";
      continue;
    }

    if (resources.contains(resource)) {
      continue;
    }

    string link = path::join(info->directory, containerPath);

    LOG(INFO) << "Removing symlink '" << link << "' for persistent volume "
              << resource << " of container " << containerId;

    Try<Nothing> rm = os::rm(link);
    if (rm.isError()) {
      return Failure(
          "Failed to remove the symlink for the unneeded "
          "persistent volume at '" + link + "'");
    }
  }

  // We then link additional persistent volumes.
  foreach (const Resource& resource, resources.persistentVolumes()) {
    // This is enforced by the master.
    CHECK(resource.disk().has_volume());

    // Ignore absolute and nested paths.
    const string& containerPath = resource.disk().volume().container_path();
    if (strings::contains(containerPath, "/")) {
      LOG(WARNING) << "Skipping updating symlink for persistent volume "
                   << resource << " of container " << containerId
                   << " because the container path '" << containerPath
                   << "' contains slash";
      continue;
    }

    if (current.contains(resource)) {
      continue;
    }

    string original = paths::getPersistentVolumePath(flags.work_dir, resource);

    // Set the ownership of the persistent volume to match that of the
    // sandbox directory.
    //
    // NOTE: Currently, persistent volumes in Mesos are exclusive,
    // meaning that if a persistent volume is used by one task or
    // executor, it cannot be concurrently used by other task or
    // executor. But if we allow multiple executors to use same
    // persistent volume at the same time in the future, the ownership
    // of the persistent volume may conflict here.
    //
    // TODO(haosdent): Consider letting the frameworks specify the
    // user/group of the persistent volumes.
    struct stat s;
    if (::stat(info->directory.c_str(), &s) < 0) {
      return Failure("Failed to get ownership for '" + info->directory + "': " +
                     os::strerror(errno));
    }

    // TODO(hausdorff): (MESOS-5461) Persistent volumes maintain the invariant
    // that they are used by one task at a time. This is currently enforced by
    // `os::chown`. Windows does not support `os::chown`, we will need to
    // revisit this later.
#ifndef __WINDOWS__
    LOG(INFO) << "Changing the ownership of the persistent volume at '"
              << original << "' with uid " << s.st_uid
              << " and gid " << s.st_gid;

    Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, original, false);
    if (chown.isError()) {
      return Failure(
          "Failed to change the ownership of the persistent volume at '" +
          original + "' with uid " + stringify(s.st_uid) +
          " and gid " + stringify(s.st_gid) + ": " + chown.error());
    }
#endif
    string link = path::join(info->directory, containerPath);

    if (os::exists(link)) {
      // NOTE: This is possible because 'info->resources' will be
      // reset when slave restarts and recovers. When the slave calls
      // 'containerizer->update' after the executor re-registers,
      // we'll try to relink all the already symlinked volumes.
      Result<string> realpath = os::realpath(link);
      if (!realpath.isSome()) {
        return Failure(
            "Failed to get the realpath of symlink '" + link + "': " +
            (realpath.isError() ? realpath.error() : "No such directory"));
      }

      // A sanity check to make sure the target of the symlink does
      // not change. In fact, this is not supposed to happen.
      // NOTE: Here, we compare the realpaths because 'original' might
      // contain symbolic links.
      Result<string> _original = os::realpath(original);
      if (!_original.isSome()) {
        return Failure(
            "Failed to get the realpath of volume '" + original + "': " +
            (_original.isError() ? _original.error() : "No such directory"));
      }

      if (realpath.get() != _original.get()) {
        return Failure(
            "The existing symlink '" + link + "' points to '" +
            _original.get() + "' and the new target is '" +
            realpath.get() + "'");
      }
    } else {
      LOG(INFO) << "Adding symlink from '" << original << "' to '"
                << link << "' for persistent volume " << resource
                << " of container " << containerId;

      Try<Nothing> symlink = ::fs::symlink(original, link);
      if (symlink.isError()) {
        return Failure(
            "Failed to symlink persistent volume from '" +
            original + "' to '" + link + "'");
      }
    }
  }

  // Store the updated resources.
  info->resources = resources;

  return Nothing();
}
Example #4
0
Future<Nothing> LinuxFilesystemIsolatorProcess::update(
    const ContainerID& containerId,
    const Resources& resources)
{
  // Mount persistent volumes. We do this in the host namespace and
  // rely on mount propagation for them to be visible inside the
  // container.
  if (!infos.contains(containerId)) {
    return Failure("Unknown container");
  }

  const Owned<Info>& info = infos[containerId];

  Resources current = info->resources;

  // We first remove unneeded persistent volumes.
  foreach (const Resource& resource, current.persistentVolumes()) {
    // This is enforced by the master.
    CHECK(resource.disk().has_volume());

    // Ignore absolute and nested paths.
    const string& containerPath = resource.disk().volume().container_path();
    if (strings::contains(containerPath, "/")) {
      LOG(WARNING) << "Skipping updating mount for persistent volume "
                   << resource << " of container " << containerId
                   << " because the container path '" << containerPath
                   << "' contains slash";
      continue;
    }

    if (resources.contains(resource)) {
      continue;
    }

    // Determine the target of the mount.
    string target = path::join(info->directory, containerPath);

    LOG(INFO) << "Removing mount '" << target << "' for persistent volume "
              << resource << " of container " << containerId;

    // The unmount will fail if the task/executor is still using files
    // or directories under 'target'.
    Try<Nothing> unmount = fs::unmount(target);
    if (unmount.isError()) {
      return Failure(
          "Failed to unmount unneeded persistent volume at '" +
          target + "': " + unmount.error());
    }

    // NOTE: This is a non-recursive rmdir.
    Try<Nothing> rmdir = os::rmdir(target, false);
    if (rmdir.isError()) {
      return Failure(
          "Failed to remove persistent volume mount point at '" +
          target + "': " + rmdir.error());
    }
  }

  // We then mount new persistent volumes.
  foreach (const Resource& resource, resources.persistentVolumes()) {
    // This is enforced by the master.
    CHECK(resource.disk().has_volume());

    // Ignore absolute and nested paths.
    const string& containerPath = resource.disk().volume().container_path();
    if (strings::contains(containerPath, "/")) {
      LOG(WARNING) << "Skipping updating mount for persistent volume "
                   << resource << " of container " << containerId
                   << " because the container path '" << containerPath
                   << "' contains slash";
      continue;
    }

    if (current.contains(resource)) {
      continue;
    }

    // Determine the source of the mount.
    string source = paths::getPersistentVolumePath(flags.work_dir, resource);

    // Set the ownership of the persistent volume to match that of the
    // sandbox directory.
    //
    // NOTE: Currently, persistent volumes in Mesos are exclusive,
    // meaning that if a persistent volume is used by one task or
    // executor, it cannot be concurrently used by other task or
    // executor. But if we allow multiple executors to use same
    // persistent volume at the same time in the future, the ownership
    // of the persistent volume may conflict here.
    //
    // TODO(haosdent): Consider letting the frameworks specify the
    // user/group of the persistent volumes.
    struct stat s;
    if (::stat(info->directory.c_str(), &s) < 0) {
      return Failure("Failed to get ownership for '" + info->directory + "': " +
                     os::strerror(errno));
    }

    LOG(INFO) << "Changing the ownership of the persistent volume at '"
              << source << "' with uid " << s.st_uid
              << " and gid " << s.st_gid;

    Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, source, true);
    if (chown.isError()) {
      return Failure(
          "Failed to change the ownership of the persistent volume at '" +
          source + "' with uid " + stringify(s.st_uid) +
          " and gid " + stringify(s.st_gid) + ": " + chown.error());
    }

    // Determine the target of the mount.
    string target = path::join(info->directory, containerPath);

    if (os::exists(target)) {
      // NOTE: This is possible because 'info->resources' will be
      // reset when slave restarts and recovers. When the slave calls
      // 'containerizer->update' after the executor re-registers,
      // we'll try to re-mount all the already mounted volumes.

      // TODO(jieyu): Check the source of the mount matches the entry
      // with the same target in the mount table if one can be found.
      // If not, mount the persistent volume as we did below. This is
      // possible because the slave could crash after it unmounts the
      // volume but before it is able to delete the mount point.
    } else {
      Try<Nothing> mkdir = os::mkdir(target);
      if (mkdir.isError()) {
        return Failure(
            "Failed to create persistent volume mount point at '" +
            target + "': " + mkdir.error());
      }

      LOG(INFO) << "Mounting '" << source << "' to '" << target
                << "' for persistent volume " << resource
                << " of container " << containerId;

      Try<Nothing> mount = fs::mount(source, target, None(), MS_BIND, NULL);
      if (mount.isError()) {
        return Failure(
            "Failed to mount persistent volume from '" +
            source + "' to '" + target + "': " + mount.error());
      }
    }
  }

  // Store the new resources;
  info->resources = resources;

  return Nothing();
}
  virtual void resourceOffers(SchedulerDriver* driver,
                              const vector<Offer>& offers)
  {
    foreach (const Offer& offer, offers) {
      LOG(INFO) << "Received offer " << offer.id() << " with "
                << offer.resources();

      // If the framework got this offer for the first time, the state is
      // `State::INIT`; framework will reserve it (sending RESERVE operation
      // to master) in this loop.
      if (!states.contains(offer.slave_id())) {
        // If all tasks were launched, do not reserve more resources; wait
        // for them to finish and unreserve resources.
        if (tasksLaunched == totalTasks) {
          continue;
        }

        states[offer.slave_id()] = State::INIT;
      }

      const State state = states[offer.slave_id()];

      Filters filters;
      filters.set_refuse_seconds(0);

      switch (state) {
        case State::INIT: {
          // Framework reserves resources from this offer for only one task;
          // the task'll be dispatched when reserved resources are re-offered
          // to this framework.
          Resources resources = offer.resources();
          Offer::Operation reserve = RESERVE(taskResources);

          Try<Resources> apply = resources.apply(reserve);
          if (apply.isError()) {
            LOG(INFO) << "Failed to reserve resources for task in offer "
                      << stringify(offer.id()) << ": " << apply.error();
            break;
          }

          driver->acceptOffers({offer.id()}, {reserve}, filters);
          states[offer.slave_id()] = State::RESERVING;
          break;
        }
        case State::RESERVING: {
          Resources resources = offer.resources();
          Resources reserved = resources.reserved(role);
          if (!reserved.contains(taskResources)) {
            break;
          }
          states[offer.slave_id()] = State::RESERVED;

          // We fallthrough here to save an offer cycle.
        }
        case State::RESERVED: {
          Resources resources = offer.resources();
          Resources reserved = resources.reserved(role);

          CHECK(reserved.contains(taskResources));

          // If all tasks were launched, unreserve those resources.
          if (tasksLaunched == totalTasks) {
            driver->acceptOffers(
                {offer.id()}, {UNRESERVE(taskResources)}, filters);
            states[offer.slave_id()] = State::UNRESERVING;
            break;
          }

          // Framework dispatches task on the reserved resources.
          CHECK(tasksLaunched < totalTasks);

          // Launch tasks on reserved resources.
          const string& taskId = stringify(tasksLaunched++);
          LOG(INFO) << "Launching task " << taskId << " using offer "
                    << offer.id();
          TaskInfo task;
          task.set_name("Task " + taskId + ": " + command);
          task.mutable_task_id()->set_value(taskId);
          task.mutable_slave_id()->MergeFrom(offer.slave_id());
          task.mutable_command()->set_shell(true);
          task.mutable_command()->set_value(command);
          task.mutable_resources()->MergeFrom(taskResources);
          driver->launchTasks(offer.id(), {task}, filters);
          states[offer.slave_id()] = State::TASK_RUNNING;
          break;
        }
        case State::TASK_RUNNING:
          LOG(INFO) << "The task on " << offer.slave_id()
                    << " is running, waiting for task done";
          break;
        case State::UNRESERVING: {
          Resources resources = offer.resources();
          Resources reserved = resources.reserved(role);
          if (!reserved.contains(taskResources)) {
            states[offer.slave_id()] = State::UNRESERVED;
          }
          break;
        }
        case State::UNRESERVED:
          // If state of slave is UNRESERVED, ignore it. The driver is stopped
          // when all tasks are done and all resources are unreserved.
          break;
      }
    }
Example #6
0
Future<Nothing> LinuxFilesystemIsolatorProcess::update(
    const ContainerID& containerId,
    const Resources& resources)
{
  if (containerId.has_parent()) {
    return Failure("Not supported for nested containers");
  }

  // Mount persistent volumes. We do this in the host namespace and
  // rely on mount propagation for them to be visible inside the
  // container.
  if (!infos.contains(containerId)) {
    return Failure("Unknown container");
  }

  const Owned<Info>& info = infos[containerId];

  Resources current = info->resources;

  // We first remove unneeded persistent volumes.
  foreach (const Resource& resource, current.persistentVolumes()) {
    // This is enforced by the master.
    CHECK(resource.disk().has_volume());

    // Ignore absolute and nested paths.
    const string& containerPath = resource.disk().volume().container_path();
    if (strings::contains(containerPath, "/")) {
      LOG(WARNING) << "Skipping updating mount for persistent volume "
                   << resource << " of container " << containerId
                   << " because the container path '" << containerPath
                   << "' contains slash";
      continue;
    }

    if (resources.contains(resource)) {
      continue;
    }

    // Determine the target of the mount.
    string target = path::join(info->directory, containerPath);

    LOG(INFO) << "Removing mount '" << target << "' for persistent volume "
              << resource << " of container " << containerId;

    // The unmount will fail if the task/executor is still using files
    // or directories under 'target'.
    Try<Nothing> unmount = fs::unmount(target);
    if (unmount.isError()) {
      return Failure(
          "Failed to unmount unneeded persistent volume at '" +
          target + "': " + unmount.error());
    }

    // NOTE: This is a non-recursive rmdir.
    Try<Nothing> rmdir = os::rmdir(target, false);
    if (rmdir.isError()) {
      return Failure(
          "Failed to remove persistent volume mount point at '" +
          target + "': " + rmdir.error());
    }
  }

  // Get user and group info for this task based on the task's sandbox.
  struct stat s;
  if (::stat(info->directory.c_str(), &s) < 0) {
    return Failure("Failed to get ownership for '" + info->directory +
                   "': " + os::strerror(errno));
  }

  const uid_t uid = s.st_uid;
  const gid_t gid = s.st_gid;

  // We then mount new persistent volumes.
  foreach (const Resource& resource, resources.persistentVolumes()) {
    // This is enforced by the master.
    CHECK(resource.disk().has_volume());

    // Ignore absolute and nested paths.
    const string& containerPath = resource.disk().volume().container_path();
    if (strings::contains(containerPath, "/")) {
      LOG(WARNING) << "Skipping updating mount for persistent volume "
                   << resource << " of container " << containerId
                   << " because the container path '" << containerPath
                   << "' contains slash";
      continue;
    }

    if (current.contains(resource)) {
      continue;
    }

    // Determine the source of the mount.
    string source = paths::getPersistentVolumePath(flags.work_dir, resource);

    bool isVolumeInUse = false;

    foreachvalue (const Owned<Info>& info, infos) {
      if (info->resources.contains(resource)) {
        isVolumeInUse = true;
        break;
      }
    }

    // Set the ownership of the persistent volume to match that of the sandbox
    // directory if the volume is not already in use. If the volume is
    // currently in use by other containers, tasks in this container may fail
    // to read from or write to the persistent volume due to incompatible
    // ownership and file system permissions.
    if (!isVolumeInUse) {
      LOG(INFO) << "Changing the ownership of the persistent volume at '"
                << source << "' with uid " << uid << " and gid " << gid;

      Try<Nothing> chown = os::chown(uid, gid, source, false);

      if (chown.isError()) {
        return Failure(
            "Failed to change the ownership of the persistent volume at '" +
            source + "' with uid " + stringify(uid) +
            " and gid " + stringify(gid) + ": " + chown.error());
      }
    }

    // Determine the target of the mount.
    string target = path::join(info->directory, containerPath);

    if (os::exists(target)) {
      // NOTE: There are two scenarios that we may have the mount
      // target existed:
      // 1. This is possible because 'info->resources' will be reset
      //    when slave restarts and recovers. When the slave calls
      //    'containerizer->update' after the executor re-registers,
      //    we'll try to re-mount all the already mounted volumes.
      // 2. There may be multiple references to the persistent
      //    volume's mount target. E.g., a host volume and a
      //    persistent volume are both specified, and the source
      //    of the host volume is the same as the container path
      //    of the persistent volume.

      // Check the source of the mount matches the entry with the
      // same target in the mount table if one can be found. If
      // not, mount the persistent volume as we did below. This is
      // possible because the slave could crash after it unmounts the
      // volume but before it is able to delete the mount point.
      Try<fs::MountInfoTable> table = fs::MountInfoTable::read();
      if (table.isError()) {
        return Failure("Failed to get mount table: " + table.error());
      }

      // Check a particular persistent volume is mounted or not.
      bool volumeMounted = false;

      foreach (const fs::MountInfoTable::Entry& entry, table->entries) {
        // TODO(gilbert): Check source of the mount matches the entry's
        // root. Note that the root is relative to the root of its parent
        // mount. See:
        // http://man7.org/linux/man-pages/man5/proc.5.html
        if (target == entry.target) {
          volumeMounted = true;
          break;
        }
      }

      if (volumeMounted) {
        continue;
      }
    }

    Try<Nothing> mkdir = os::mkdir(target);
    if (mkdir.isError()) {
      return Failure(
          "Failed to create persistent volume mount point at '" +
          target + "': " + mkdir.error());
    }

    LOG(INFO) << "Mounting '" << source << "' to '" << target
              << "' for persistent volume " << resource
              << " of container " << containerId;

    Try<Nothing> mount = fs::mount(source, target, None(), MS_BIND, nullptr);
    if (mount.isError()) {
      return Failure(
          "Failed to mount persistent volume from '" +
          source + "' to '" + target + "': " + mount.error());
    }

    // If the mount needs to be read-only, do a remount.
    if (resource.disk().volume().mode() == Volume::RO) {
      mount = fs::mount(
          None(), target, None(), MS_BIND | MS_RDONLY | MS_REMOUNT, nullptr);

      if (mount.isError()) {
        return Failure(
            "Failed to remount persistent volume as read-only from '" +
            source + "' to '" + target + "': " + mount.error());
      }
    }
  }

  // Store the new resources;
  info->resources = resources;

  return Nothing();
}