TEST_F(DiskResourcesTest, Addition) { Resources r1 = createDiskResource("10", "role", None(), "path"); Resources r2 = createDiskResource("10", "role", None(), None()); Resources r3 = createDiskResource("20", "role", None(), "path"); EXPECT_EQ(r3, r1 + r2); Resources r4 = createDiskResource("10", "role", "1", "path"); Resources r5 = createDiskResource("10", "role", "2", "path"); Resources r6 = createDiskResource("20", "role", "1", "path"); Resources sum = r4 + r5; EXPECT_TRUE(sum.contains(r4)); EXPECT_TRUE(sum.contains(r5)); EXPECT_FALSE(sum.contains(r3)); EXPECT_FALSE(sum.contains(r6)); }
void selfs_free(void* ptr) { int32* p = (int32*)ptr; TrackCHeapInMonitor::adjust(-true_size_of_malloced_obj(p)); # if GENERATE_DEBUGGING_AIDS if (CheckAssertions) { if (resources.contains((char*)p)) fatal("should not delete resource object"); // cannot use assert because printf uses malloc() ... if ( true_size_of_malloced_obj(p) < 0 || TrackCHeapInMonitor::allocated() < 0 ) breakpoint(); if (ptr == (void*)catchThisOne) breakpoint(); } # endif if (MallocInProgress) fatal("malloc/free are't reentrant"); MallocInProgress = true; // for hprofiler free(ptr); MallocInProgress = false; }
Future<Nothing> PosixFilesystemIsolatorProcess::update( const ContainerID& containerId, const Resources& resources) { if (!infos.contains(containerId)) { return Failure("Unknown container"); } const Owned<Info>& info = infos[containerId]; // TODO(jieyu): Currently, we only allow non-nested relative // container paths for volumes. This is enforced by the master. For // those volumes, we create symlinks in the executor directory. Resources current = info->resources; // We first remove unneeded persistent volumes. foreach (const Resource& resource, current.persistentVolumes()) { // This is enforced by the master. CHECK(resource.disk().has_volume()); // Ignore absolute and nested paths. const string& containerPath = resource.disk().volume().container_path(); if (strings::contains(containerPath, "/")) { LOG(WARNING) << "Skipping updating symlink for persistent volume " << resource << " of container " << containerId << " because the container path '" << containerPath << "' contains slash"; continue; } if (resources.contains(resource)) { continue; } string link = path::join(info->directory, containerPath); LOG(INFO) << "Removing symlink '" << link << "' for persistent volume " << resource << " of container " << containerId; Try<Nothing> rm = os::rm(link); if (rm.isError()) { return Failure( "Failed to remove the symlink for the unneeded " "persistent volume at '" + link + "'"); } } // We then link additional persistent volumes. foreach (const Resource& resource, resources.persistentVolumes()) { // This is enforced by the master. CHECK(resource.disk().has_volume()); // Ignore absolute and nested paths. const string& containerPath = resource.disk().volume().container_path(); if (strings::contains(containerPath, "/")) { LOG(WARNING) << "Skipping updating symlink for persistent volume " << resource << " of container " << containerId << " because the container path '" << containerPath << "' contains slash"; continue; } if (current.contains(resource)) { continue; } string original = paths::getPersistentVolumePath(flags.work_dir, resource); // Set the ownership of the persistent volume to match that of the // sandbox directory. // // NOTE: Currently, persistent volumes in Mesos are exclusive, // meaning that if a persistent volume is used by one task or // executor, it cannot be concurrently used by other task or // executor. But if we allow multiple executors to use same // persistent volume at the same time in the future, the ownership // of the persistent volume may conflict here. // // TODO(haosdent): Consider letting the frameworks specify the // user/group of the persistent volumes. struct stat s; if (::stat(info->directory.c_str(), &s) < 0) { return Failure("Failed to get ownership for '" + info->directory + "': " + os::strerror(errno)); } // TODO(hausdorff): (MESOS-5461) Persistent volumes maintain the invariant // that they are used by one task at a time. This is currently enforced by // `os::chown`. Windows does not support `os::chown`, we will need to // revisit this later. #ifndef __WINDOWS__ LOG(INFO) << "Changing the ownership of the persistent volume at '" << original << "' with uid " << s.st_uid << " and gid " << s.st_gid; Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, original, false); if (chown.isError()) { return Failure( "Failed to change the ownership of the persistent volume at '" + original + "' with uid " + stringify(s.st_uid) + " and gid " + stringify(s.st_gid) + ": " + chown.error()); } #endif string link = path::join(info->directory, containerPath); if (os::exists(link)) { // NOTE: This is possible because 'info->resources' will be // reset when slave restarts and recovers. When the slave calls // 'containerizer->update' after the executor re-registers, // we'll try to relink all the already symlinked volumes. Result<string> realpath = os::realpath(link); if (!realpath.isSome()) { return Failure( "Failed to get the realpath of symlink '" + link + "': " + (realpath.isError() ? realpath.error() : "No such directory")); } // A sanity check to make sure the target of the symlink does // not change. In fact, this is not supposed to happen. // NOTE: Here, we compare the realpaths because 'original' might // contain symbolic links. Result<string> _original = os::realpath(original); if (!_original.isSome()) { return Failure( "Failed to get the realpath of volume '" + original + "': " + (_original.isError() ? _original.error() : "No such directory")); } if (realpath.get() != _original.get()) { return Failure( "The existing symlink '" + link + "' points to '" + _original.get() + "' and the new target is '" + realpath.get() + "'"); } } else { LOG(INFO) << "Adding symlink from '" << original << "' to '" << link << "' for persistent volume " << resource << " of container " << containerId; Try<Nothing> symlink = ::fs::symlink(original, link); if (symlink.isError()) { return Failure( "Failed to symlink persistent volume from '" + original + "' to '" + link + "'"); } } } // Store the updated resources. info->resources = resources; return Nothing(); }
Future<Nothing> LinuxFilesystemIsolatorProcess::update( const ContainerID& containerId, const Resources& resources) { // Mount persistent volumes. We do this in the host namespace and // rely on mount propagation for them to be visible inside the // container. if (!infos.contains(containerId)) { return Failure("Unknown container"); } const Owned<Info>& info = infos[containerId]; Resources current = info->resources; // We first remove unneeded persistent volumes. foreach (const Resource& resource, current.persistentVolumes()) { // This is enforced by the master. CHECK(resource.disk().has_volume()); // Ignore absolute and nested paths. const string& containerPath = resource.disk().volume().container_path(); if (strings::contains(containerPath, "/")) { LOG(WARNING) << "Skipping updating mount for persistent volume " << resource << " of container " << containerId << " because the container path '" << containerPath << "' contains slash"; continue; } if (resources.contains(resource)) { continue; } // Determine the target of the mount. string target = path::join(info->directory, containerPath); LOG(INFO) << "Removing mount '" << target << "' for persistent volume " << resource << " of container " << containerId; // The unmount will fail if the task/executor is still using files // or directories under 'target'. Try<Nothing> unmount = fs::unmount(target); if (unmount.isError()) { return Failure( "Failed to unmount unneeded persistent volume at '" + target + "': " + unmount.error()); } // NOTE: This is a non-recursive rmdir. Try<Nothing> rmdir = os::rmdir(target, false); if (rmdir.isError()) { return Failure( "Failed to remove persistent volume mount point at '" + target + "': " + rmdir.error()); } } // We then mount new persistent volumes. foreach (const Resource& resource, resources.persistentVolumes()) { // This is enforced by the master. CHECK(resource.disk().has_volume()); // Ignore absolute and nested paths. const string& containerPath = resource.disk().volume().container_path(); if (strings::contains(containerPath, "/")) { LOG(WARNING) << "Skipping updating mount for persistent volume " << resource << " of container " << containerId << " because the container path '" << containerPath << "' contains slash"; continue; } if (current.contains(resource)) { continue; } // Determine the source of the mount. string source = paths::getPersistentVolumePath(flags.work_dir, resource); // Set the ownership of the persistent volume to match that of the // sandbox directory. // // NOTE: Currently, persistent volumes in Mesos are exclusive, // meaning that if a persistent volume is used by one task or // executor, it cannot be concurrently used by other task or // executor. But if we allow multiple executors to use same // persistent volume at the same time in the future, the ownership // of the persistent volume may conflict here. // // TODO(haosdent): Consider letting the frameworks specify the // user/group of the persistent volumes. struct stat s; if (::stat(info->directory.c_str(), &s) < 0) { return Failure("Failed to get ownership for '" + info->directory + "': " + os::strerror(errno)); } LOG(INFO) << "Changing the ownership of the persistent volume at '" << source << "' with uid " << s.st_uid << " and gid " << s.st_gid; Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, source, true); if (chown.isError()) { return Failure( "Failed to change the ownership of the persistent volume at '" + source + "' with uid " + stringify(s.st_uid) + " and gid " + stringify(s.st_gid) + ": " + chown.error()); } // Determine the target of the mount. string target = path::join(info->directory, containerPath); if (os::exists(target)) { // NOTE: This is possible because 'info->resources' will be // reset when slave restarts and recovers. When the slave calls // 'containerizer->update' after the executor re-registers, // we'll try to re-mount all the already mounted volumes. // TODO(jieyu): Check the source of the mount matches the entry // with the same target in the mount table if one can be found. // If not, mount the persistent volume as we did below. This is // possible because the slave could crash after it unmounts the // volume but before it is able to delete the mount point. } else { Try<Nothing> mkdir = os::mkdir(target); if (mkdir.isError()) { return Failure( "Failed to create persistent volume mount point at '" + target + "': " + mkdir.error()); } LOG(INFO) << "Mounting '" << source << "' to '" << target << "' for persistent volume " << resource << " of container " << containerId; Try<Nothing> mount = fs::mount(source, target, None(), MS_BIND, NULL); if (mount.isError()) { return Failure( "Failed to mount persistent volume from '" + source + "' to '" + target + "': " + mount.error()); } } } // Store the new resources; info->resources = resources; return Nothing(); }
virtual void resourceOffers(SchedulerDriver* driver, const vector<Offer>& offers) { foreach (const Offer& offer, offers) { LOG(INFO) << "Received offer " << offer.id() << " with " << offer.resources(); // If the framework got this offer for the first time, the state is // `State::INIT`; framework will reserve it (sending RESERVE operation // to master) in this loop. if (!states.contains(offer.slave_id())) { // If all tasks were launched, do not reserve more resources; wait // for them to finish and unreserve resources. if (tasksLaunched == totalTasks) { continue; } states[offer.slave_id()] = State::INIT; } const State state = states[offer.slave_id()]; Filters filters; filters.set_refuse_seconds(0); switch (state) { case State::INIT: { // Framework reserves resources from this offer for only one task; // the task'll be dispatched when reserved resources are re-offered // to this framework. Resources resources = offer.resources(); Offer::Operation reserve = RESERVE(taskResources); Try<Resources> apply = resources.apply(reserve); if (apply.isError()) { LOG(INFO) << "Failed to reserve resources for task in offer " << stringify(offer.id()) << ": " << apply.error(); break; } driver->acceptOffers({offer.id()}, {reserve}, filters); states[offer.slave_id()] = State::RESERVING; break; } case State::RESERVING: { Resources resources = offer.resources(); Resources reserved = resources.reserved(role); if (!reserved.contains(taskResources)) { break; } states[offer.slave_id()] = State::RESERVED; // We fallthrough here to save an offer cycle. } case State::RESERVED: { Resources resources = offer.resources(); Resources reserved = resources.reserved(role); CHECK(reserved.contains(taskResources)); // If all tasks were launched, unreserve those resources. if (tasksLaunched == totalTasks) { driver->acceptOffers( {offer.id()}, {UNRESERVE(taskResources)}, filters); states[offer.slave_id()] = State::UNRESERVING; break; } // Framework dispatches task on the reserved resources. CHECK(tasksLaunched < totalTasks); // Launch tasks on reserved resources. const string& taskId = stringify(tasksLaunched++); LOG(INFO) << "Launching task " << taskId << " using offer " << offer.id(); TaskInfo task; task.set_name("Task " + taskId + ": " + command); task.mutable_task_id()->set_value(taskId); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_command()->set_shell(true); task.mutable_command()->set_value(command); task.mutable_resources()->MergeFrom(taskResources); driver->launchTasks(offer.id(), {task}, filters); states[offer.slave_id()] = State::TASK_RUNNING; break; } case State::TASK_RUNNING: LOG(INFO) << "The task on " << offer.slave_id() << " is running, waiting for task done"; break; case State::UNRESERVING: { Resources resources = offer.resources(); Resources reserved = resources.reserved(role); if (!reserved.contains(taskResources)) { states[offer.slave_id()] = State::UNRESERVED; } break; } case State::UNRESERVED: // If state of slave is UNRESERVED, ignore it. The driver is stopped // when all tasks are done and all resources are unreserved. break; } }
Future<Nothing> LinuxFilesystemIsolatorProcess::update( const ContainerID& containerId, const Resources& resources) { if (containerId.has_parent()) { return Failure("Not supported for nested containers"); } // Mount persistent volumes. We do this in the host namespace and // rely on mount propagation for them to be visible inside the // container. if (!infos.contains(containerId)) { return Failure("Unknown container"); } const Owned<Info>& info = infos[containerId]; Resources current = info->resources; // We first remove unneeded persistent volumes. foreach (const Resource& resource, current.persistentVolumes()) { // This is enforced by the master. CHECK(resource.disk().has_volume()); // Ignore absolute and nested paths. const string& containerPath = resource.disk().volume().container_path(); if (strings::contains(containerPath, "/")) { LOG(WARNING) << "Skipping updating mount for persistent volume " << resource << " of container " << containerId << " because the container path '" << containerPath << "' contains slash"; continue; } if (resources.contains(resource)) { continue; } // Determine the target of the mount. string target = path::join(info->directory, containerPath); LOG(INFO) << "Removing mount '" << target << "' for persistent volume " << resource << " of container " << containerId; // The unmount will fail if the task/executor is still using files // or directories under 'target'. Try<Nothing> unmount = fs::unmount(target); if (unmount.isError()) { return Failure( "Failed to unmount unneeded persistent volume at '" + target + "': " + unmount.error()); } // NOTE: This is a non-recursive rmdir. Try<Nothing> rmdir = os::rmdir(target, false); if (rmdir.isError()) { return Failure( "Failed to remove persistent volume mount point at '" + target + "': " + rmdir.error()); } } // Get user and group info for this task based on the task's sandbox. struct stat s; if (::stat(info->directory.c_str(), &s) < 0) { return Failure("Failed to get ownership for '" + info->directory + "': " + os::strerror(errno)); } const uid_t uid = s.st_uid; const gid_t gid = s.st_gid; // We then mount new persistent volumes. foreach (const Resource& resource, resources.persistentVolumes()) { // This is enforced by the master. CHECK(resource.disk().has_volume()); // Ignore absolute and nested paths. const string& containerPath = resource.disk().volume().container_path(); if (strings::contains(containerPath, "/")) { LOG(WARNING) << "Skipping updating mount for persistent volume " << resource << " of container " << containerId << " because the container path '" << containerPath << "' contains slash"; continue; } if (current.contains(resource)) { continue; } // Determine the source of the mount. string source = paths::getPersistentVolumePath(flags.work_dir, resource); bool isVolumeInUse = false; foreachvalue (const Owned<Info>& info, infos) { if (info->resources.contains(resource)) { isVolumeInUse = true; break; } } // Set the ownership of the persistent volume to match that of the sandbox // directory if the volume is not already in use. If the volume is // currently in use by other containers, tasks in this container may fail // to read from or write to the persistent volume due to incompatible // ownership and file system permissions. if (!isVolumeInUse) { LOG(INFO) << "Changing the ownership of the persistent volume at '" << source << "' with uid " << uid << " and gid " << gid; Try<Nothing> chown = os::chown(uid, gid, source, false); if (chown.isError()) { return Failure( "Failed to change the ownership of the persistent volume at '" + source + "' with uid " + stringify(uid) + " and gid " + stringify(gid) + ": " + chown.error()); } } // Determine the target of the mount. string target = path::join(info->directory, containerPath); if (os::exists(target)) { // NOTE: There are two scenarios that we may have the mount // target existed: // 1. This is possible because 'info->resources' will be reset // when slave restarts and recovers. When the slave calls // 'containerizer->update' after the executor re-registers, // we'll try to re-mount all the already mounted volumes. // 2. There may be multiple references to the persistent // volume's mount target. E.g., a host volume and a // persistent volume are both specified, and the source // of the host volume is the same as the container path // of the persistent volume. // Check the source of the mount matches the entry with the // same target in the mount table if one can be found. If // not, mount the persistent volume as we did below. This is // possible because the slave could crash after it unmounts the // volume but before it is able to delete the mount point. Try<fs::MountInfoTable> table = fs::MountInfoTable::read(); if (table.isError()) { return Failure("Failed to get mount table: " + table.error()); } // Check a particular persistent volume is mounted or not. bool volumeMounted = false; foreach (const fs::MountInfoTable::Entry& entry, table->entries) { // TODO(gilbert): Check source of the mount matches the entry's // root. Note that the root is relative to the root of its parent // mount. See: // http://man7.org/linux/man-pages/man5/proc.5.html if (target == entry.target) { volumeMounted = true; break; } } if (volumeMounted) { continue; } } Try<Nothing> mkdir = os::mkdir(target); if (mkdir.isError()) { return Failure( "Failed to create persistent volume mount point at '" + target + "': " + mkdir.error()); } LOG(INFO) << "Mounting '" << source << "' to '" << target << "' for persistent volume " << resource << " of container " << containerId; Try<Nothing> mount = fs::mount(source, target, None(), MS_BIND, nullptr); if (mount.isError()) { return Failure( "Failed to mount persistent volume from '" + source + "' to '" + target + "': " + mount.error()); } // If the mount needs to be read-only, do a remount. if (resource.disk().volume().mode() == Volume::RO) { mount = fs::mount( None(), target, None(), MS_BIND | MS_RDONLY | MS_REMOUNT, nullptr); if (mount.isError()) { return Failure( "Failed to remount persistent volume as read-only from '" + source + "' to '" + target + "': " + mount.error()); } } } // Store the new resources; info->resources = resources; return Nothing(); }