int main(int argc, char** argv) { GOOGLE_PROTOBUF_VERIFY_VERSION; master::Flags flags; // The following flags are executable specific (e.g., since we only // have one instance of libprocess per execution, we only want to // advertise the IP and port option once, here). Option<string> ip; flags.add(&ip, "ip", "IP address to listen on"); uint16_t port; flags.add(&port, "port", "Port to listen on", MasterInfo().port()); Option<string> zk; flags.add(&zk, "zk", "ZooKeeper URL (used for leader election amongst masters)\n" "May be one of:\n" " zk://host1:port1,host2:port2,.../path\n" " zk://username:password@host1:port1,host2:port2,.../path\n" " file:///path/to/file (where file contains one of the above)"); bool help; flags.add(&help, "help", "Prints this help message", false); Try<Nothing> load = flags.load("MESOS_", argc, argv); if (load.isError()) { cerr << load.error() << endl; usage(argv[0], flags); exit(1); } if (flags.version) { version(); exit(0); } if (help) { usage(argv[0], flags); exit(1); } // Initialize modules. Note that since other subsystems may depend // upon modules, we should initialize modules before anything else. if (flags.modules.isSome()) { Try<Nothing> result = ModuleManager::load(flags.modules.get()); if (result.isError()) { cerr << "Error loading modules: " << result.error() << endl; exit(1); } } // Initialize libprocess. if (ip.isSome()) { os::setenv("LIBPROCESS_IP", ip.get()); } os::setenv("LIBPROCESS_PORT", stringify(port)); process::initialize("master"); logging::initialize(argv[0], flags, true); // Catch signals. LOG(INFO) << "Build: " << build::DATE << " by " << build::USER; LOG(INFO) << "Version: " << MESOS_VERSION; if (build::GIT_TAG.isSome()) { LOG(INFO) << "Git tag: " << build::GIT_TAG.get(); } if (build::GIT_SHA.isSome()) { LOG(INFO) << "Git SHA: " << build::GIT_SHA.get(); } allocator::AllocatorProcess* allocatorProcess = new allocator::HierarchicalDRFAllocatorProcess(); allocator::Allocator* allocator = new allocator::Allocator(allocatorProcess); state::Storage* storage = NULL; Log* log = NULL; if (flags.registry == "in_memory") { if (flags.registry_strict) { EXIT(1) << "Cannot use '--registry_strict' when using in-memory storage" << " based registry"; } storage = new state::InMemoryStorage(); } else if (flags.registry == "replicated_log" || flags.registry == "log_storage") { // TODO(bmahler): "log_storage" is present for backwards // compatibility, can be removed before 0.19.0. if (flags.work_dir.isNone()) { EXIT(1) << "--work_dir needed for replicated log based registry"; } Try<Nothing> mkdir = os::mkdir(flags.work_dir.get()); if (mkdir.isError()) { EXIT(1) << "Failed to create work directory '" << flags.work_dir.get() << "': " << mkdir.error(); } if (zk.isSome()) { // Use replicated log with ZooKeeper. if (flags.quorum.isNone()) { EXIT(1) << "Need to specify --quorum for replicated log based" << " registry when using ZooKeeper"; } string zk_; if (strings::startsWith(zk.get(), "file://")) { const string& path = zk.get().substr(7); const Try<string> read = os::read(path); if (read.isError()) { EXIT(1) << "Failed to read from file at '" + path + "': " << read.error(); } zk_ = read.get(); } else { zk_ = zk.get(); } Try<URL> url = URL::parse(zk_); if (url.isError()) { EXIT(1) << "Error parsing ZooKeeper URL: " << url.error(); } log = new Log( flags.quorum.get(), path::join(flags.work_dir.get(), "replicated_log"), url.get().servers, flags.zk_session_timeout, path::join(url.get().path, "log_replicas"), url.get().authentication, flags.log_auto_initialize); } else { // Use replicated log without ZooKeeper. log = new Log( 1, path::join(flags.work_dir.get(), "replicated_log"), set<UPID>(), flags.log_auto_initialize); } storage = new state::LogStorage(log); } else { EXIT(1) << "'" << flags.registry << "' is not a supported" << " option for registry persistence"; } CHECK_NOTNULL(storage); state::protobuf::State* state = new state::protobuf::State(storage); Registrar* registrar = new Registrar(flags, state); Repairer* repairer = new Repairer(); Files files; MasterContender* contender; MasterDetector* detector; // TODO(vinod): 'MasterContender::create()' should take // Option<string>. Try<MasterContender*> contender_ = MasterContender::create(zk.get("")); if (contender_.isError()) { EXIT(1) << "Failed to create a master contender: " << contender_.error(); } contender = contender_.get(); // TODO(vinod): 'MasterDetector::create()' should take // Option<string>. Try<MasterDetector*> detector_ = MasterDetector::create(zk.get("")); if (detector_.isError()) { EXIT(1) << "Failed to create a master detector: " << detector_.error(); } detector = detector_.get(); Option<Authorizer*> authorizer = None(); if (flags.acls.isSome()) { Try<Owned<Authorizer> > authorizer_ = Authorizer::create(flags.acls.get()); if (authorizer_.isError()) { EXIT(1) << "Failed to initialize the authorizer: " << authorizer_.error() << " (see --acls flag)"; } Owned<Authorizer> authorizer__ = authorizer_.get(); authorizer = authorizer__.release(); } LOG(INFO) << "Starting Mesos master"; Master* master = new Master( allocator, registrar, repairer, &files, contender, detector, authorizer, flags); if (zk.isNone()) { // It means we are using the standalone detector so we need to // appoint this Master as the leader. dynamic_cast<StandaloneMasterDetector*>(detector)->appoint(master->info()); } process::spawn(master); process::wait(master->self()); delete master; delete allocator; delete allocatorProcess; delete registrar; delete repairer; delete state; delete storage; delete log; delete contender; delete detector; if (authorizer.isSome()) { delete authorizer.get(); } return 0; }
// TODO(karya): Show library author info for failed library/module. Try<Nothing> ModuleManager::verifyModule( const string& moduleName, const ModuleBase* moduleBase) { CHECK_NOTNULL(moduleBase); if (moduleBase->mesosVersion == NULL || moduleBase->moduleApiVersion == NULL || moduleBase->authorName == NULL || moduleBase->authorEmail == NULL || moduleBase->description == NULL || moduleBase->kind == NULL) { return Error("Error loading module '" + moduleName + "'; missing fields"); } // Verify module API version. if (stringify(moduleBase->moduleApiVersion) != MESOS_MODULE_API_VERSION) { return Error( "Module API version mismatch. Mesos has: " MESOS_MODULE_API_VERSION ", " "library requires: " + stringify(moduleBase->moduleApiVersion)); } if (!kindToVersion.contains(moduleBase->kind)) { return Error("Unknown module kind: " + stringify(moduleBase->kind)); } Try<Version> mesosVersion = Version::parse(MESOS_VERSION); CHECK_SOME(mesosVersion); Try<Version> minimumVersion = Version::parse(kindToVersion[moduleBase->kind]); CHECK_SOME(minimumVersion); Try<Version> moduleMesosVersion = Version::parse(moduleBase->mesosVersion); if (moduleMesosVersion.isError()) { return Error(moduleMesosVersion.error()); } if (moduleMesosVersion.get() < minimumVersion.get()) { return Error( "Minimum supported mesos version for '" + stringify(moduleBase->kind) + "' is " + stringify(minimumVersion.get()) + ", but module is compiled " "with version " + stringify(moduleMesosVersion.get())); } if (moduleBase->compatible == NULL) { if (moduleMesosVersion.get() != mesosVersion.get()) { return Error( "Mesos has version " + stringify(mesosVersion.get()) + ", but module is compiled with version " + stringify(moduleMesosVersion.get())); } return Nothing(); } if (moduleMesosVersion.get() > mesosVersion.get()) { return Error( "Mesos has version " + stringify(mesosVersion.get()) + ", but module is compiled with version " + stringify(moduleMesosVersion.get())); } bool result = moduleBase->compatible(); if (!result) { return Error("Module " + moduleName + "has determined to be incompatible"); } return Nothing(); }
Try<Nothing> ModuleManager::load(const Modules& modules) { Lock lock(&mutex); initialize(); foreach (const Modules::Library& library, modules.libraries()) { string libraryName; if (library.has_file()) { libraryName = library.file(); } else if (library.has_name()) { libraryName = os::libraries::expandName(library.name()); } else { return Error("Library name or path not provided"); } if (!dynamicLibraries.contains(libraryName)) { Owned<DynamicLibrary> dynamicLibrary(new DynamicLibrary()); Try<Nothing> result = dynamicLibrary->open(libraryName); if (!result.isSome()) { return Error( "Error opening library: '" + libraryName + "': " + result.error()); } dynamicLibraries[libraryName] = dynamicLibrary; } // Load module manifests. foreach (const Modules::Library::Module& module, library.modules()) { if (!module.has_name()) { return Error( "Error: module name not provided with library '" + libraryName + "'"); } // Check for possible duplicate module names. const std::string moduleName = module.name(); if (moduleBases.contains(moduleName)) { return Error("Error loading duplicate module '" + moduleName + "'"); } // Load ModuleBase. Try<void*> symbol = dynamicLibraries[libraryName]->loadSymbol(moduleName); if (symbol.isError()) { return Error( "Error loading module '" + moduleName + "': " + symbol.error()); } ModuleBase* moduleBase = (ModuleBase*) symbol.get(); // Verify module compatibility including version, etc. Try<Nothing> result = verifyModule(moduleName, moduleBase); if (result.isError()) { return Error( "Error verifying module '" + moduleName + "': " + result.error()); } moduleBases[moduleName] = (ModuleBase*) symbol.get(); // Now copy the supplied module-specific parameters. moduleParameters[moduleName].mutable_parameter()->CopyFrom( module.parameters()); } } return Nothing(); }
// TODO(josephw): Parse this string with a protobuf. Try<Docker::Container> Docker::Container::create(const string& output) { Try<JSON::Array> parse = JSON::parse<JSON::Array>(output); if (parse.isError()) { return Error("Failed to parse JSON: " + parse.error()); } // TODO(benh): Handle the case where the short container ID was // not sufficiently unique and 'array.values.size() > 1'. JSON::Array array = parse.get(); if (array.values.size() != 1) { return Error("Failed to find container"); } CHECK(array.values.front().is<JSON::Object>()); JSON::Object json = array.values.front().as<JSON::Object>(); Result<JSON::String> idValue = json.find<JSON::String>("Id"); if (idValue.isNone()) { return Error("Unable to find Id in container"); } else if (idValue.isError()) { return Error("Error finding Id in container: " + idValue.error()); } string id = idValue.get().value; Result<JSON::String> nameValue = json.find<JSON::String>("Name"); if (nameValue.isNone()) { return Error("Unable to find Name in container"); } else if (nameValue.isError()) { return Error("Error finding Name in container: " + nameValue.error()); } string name = nameValue.get().value; Result<JSON::Object> stateValue = json.find<JSON::Object>("State"); if (stateValue.isNone()) { return Error("Unable to find State in container"); } else if (stateValue.isError()) { return Error("Error finding State in container: " + stateValue.error()); } Result<JSON::Number> pidValue = stateValue.get().find<JSON::Number>("Pid"); if (pidValue.isNone()) { return Error("Unable to find Pid in State"); } else if (pidValue.isError()) { return Error("Error finding Pid in State: " + pidValue.error()); } pid_t pid = pid_t(pidValue.get().as<int64_t>()); Option<pid_t> optionalPid; if (pid != 0) { optionalPid = pid; } Result<JSON::String> startedAtValue = stateValue.get().find<JSON::String>("StartedAt"); if (startedAtValue.isNone()) { return Error("Unable to find StartedAt in State"); } else if (startedAtValue.isError()) { return Error("Error finding StartedAt in State: " + startedAtValue.error()); } bool started = startedAtValue.get().value != "0001-01-01T00:00:00Z"; Result<JSON::String> ipAddressValue = json.find<JSON::String>("NetworkSettings.IPAddress"); if (ipAddressValue.isNone()) { return Error("Unable to find NetworkSettings.IPAddress in container"); } else if (ipAddressValue.isError()) { return Error( "Error finding NetworkSettings.Name in container: " + ipAddressValue.error()); } string ipAddress = ipAddressValue.get().value; return Docker::Container(output, id, name, optionalPid, started, ipAddress); }
// This method will be called when a container running as non-root user tries // to use a shared persistent volume or a PARENT type SANDBOX_PATH volume, the // parameter `path` will be the source path of the volume. Future<gid_t> allocate(const string& path, VolumeGidInfo::Type type) { gid_t gid; // If a gid has already been allocated for the specified path, // just return the gid. if (infos.contains(path)) { gid = infos[path].gid(); LOG(INFO) << "Use the allocated gid " << gid << " of the volume path '" << path << "'"; // If we are already setting ownership for the specified path, skip the // additional setting. if (setting.contains(path)) { return setting[path]->future(); } } else { struct stat s; if (::stat(path.c_str(), &s) < 0) { return Failure("Failed to stat '" + path + "': " + os::strerror(errno)); } // If the gid of the specified path is in the total gid range, just // return the gid. This could happen in the case that nested container // uses persistent volume, in which case we did a workaround in the // default executor to set up a volume mapping (i.e., map the persistent // volume to a PARENT type SANDBOX_PATH volume for the nested container) // so that the nested container can access the persistent volume. // // Please note that in the case of shared persistent volume, operator // should NOT restart agent with a different total gid range, otherwise // the gid of the shared persistent volume may be overwritten if a nested // container tries to use the shared persistent volume after the restart. if (totalGids.contains(s.st_gid)) { gid = s.st_gid; LOG(INFO) << "Use the gid " << gid << " for the volume path '" << path << "' which should be the mount point of another volume " << "which is actually allocated with the gid"; } else { // Allocate a free gid to the specified path and then set the // ownership for it. if (freeGids.empty()) { return Failure( "Failed to allocate gid to the volume path '" + path + "' because the free gid range is exhausted"); } gid = freeGids.begin()->lower(); LOG(INFO) << "Allocating gid " << gid << " to the volume path '" << path << "'"; freeGids -= gid; --metrics.volume_gids_free; VolumeGidInfo info; info.set_type(type); info.set_path(path); info.set_gid(gid); infos.put(path, info); Try<Nothing> status = persist(); if (status.isError()) { return Failure( "Failed to save state of volume gid infos: " + status.error()); } Owned<Promise<gid_t>> promise(new Promise<gid_t>()); Future<gid_t> future = async(&setVolumeOwnership, path, gid, true) .then([path, gid](const Try<Nothing>& result) -> Future<gid_t> { if (result.isError()) { return Failure( "Failed to set the owner group of the volume path '" + path + "' to " + stringify(gid) + ": " + result.error()); } return gid; }) .onAny(defer(self(), [=](const Future<gid_t>&) { setting.erase(path); })); promise->associate(future); setting[path] = promise; return promise->future(); } } return gid; }
Try<ResourcesState> ResourcesState::recover( const std::string& rootDir, bool strict) { ResourcesState state; const string& path = paths::getResourcesInfoPath(rootDir); if (!os::exists(path)) { LOG(INFO) << "Failed to find resources file '" << path << "'"; return state; } Try<int> fd = os::open(path, O_RDWR | O_CLOEXEC); if (fd.isError()) { string message = "Failed to open resources file '" + path + "': " + fd.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } Result<Resource> resource = None(); while (true) { // Ignore errors due to partial protobuf read and enable undoing // failed reads by reverting to the previous seek position. resource = ::protobuf::read<Resource>(fd.get(), true, true); if (!resource.isSome()) { break; } state.resources += resource.get(); } // Always truncate the file to contain only valid resources. // NOTE: This is safe even though we ignore partial protobuf read // errors above, because the 'fd' is properly set to the end of the // last valid resource by 'protobuf::read()'. if (ftruncate(fd.get(), lseek(fd.get(), 0, SEEK_CUR)) != 0) { return ErrnoError("Failed to truncate resources file '" + path + "'"); } // After reading a non-corrupted resources file, 'record' should be // 'none'. if (resource.isError()) { string message = "Failed to read resources file '" + path + "': " + resource.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } Try<Nothing> close = os::close(fd.get()); if (close.isError()) { string message = "Failed to close resources file '" + path + "': " + close.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } return state; }
int main(int argc, char** argv) { GOOGLE_PROTOBUF_VERIFY_VERSION; using mesos::internal::tests::flags; // Needed to disabmiguate. // Load flags from environment and command line but allow unknown // flags (since we might have gtest/gmock flags as well). Try<Nothing> load = flags.load("MESOS_", argc, argv, true); if (load.isError()) { cerr << flags.usage(load.error()) << endl; return EXIT_FAILURE; } if (flags.help) { cout << flags.usage() << endl; testing::InitGoogleTest(&argc, argv); // Get usage from gtest too. return EXIT_SUCCESS; } // Initialize Modules. Try<Nothing> result = tests::initModules(flags.modules); if (result.isError()) { cerr << "Error initializing modules: " << result.error() << endl; return EXIT_FAILURE; } // Disable /metrics/snapshot rate limiting, but do not // overwrite whatever the user set. os::setenv("LIBPROCESS_METRICS_SNAPSHOT_ENDPOINT_RATE_LIMIT", "", false); // If `process::initialize()` returns `false`, then it was called before this // invocation, meaning the authentication realm for libprocess-level HTTP // endpoints was set incorrectly. This should be the first invocation. if (!process::initialize(None(), DEFAULT_HTTP_AUTHENTICATION_REALM)) { EXIT(EXIT_FAILURE) << "The call to `process::initialize()` in the tests' " << "`main()` was not the function's first invocation"; } // Be quiet by default! if (!flags.verbose) { flags.quiet = true; } // Initialize logging. logging::initialize(argv[0], flags, true); // Initialize gmock/gtest. testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; cout << "Source directory: " << flags.source_dir << endl; cout << "Build directory: " << flags.build_dir << endl; // Instantiate our environment. Note that it will be managed by // gtest after we add it via testing::AddGlobalTestEnvironment. environment = new Environment(flags); testing::AddGlobalTestEnvironment(environment); return RUN_ALL_TESTS(); }
Future<Nothing> PosixFilesystemIsolatorProcess::update( const ContainerID& containerId, const Resources& resources) { if (!infos.contains(containerId)) { return Failure("Unknown container"); } const Owned<Info>& info = infos[containerId]; // TODO(jieyu): Currently, we only allow non-nested relative // container paths for volumes. This is enforced by the master. For // those volumes, we create symlinks in the executor directory. Resources current = info->resources; // We first remove unneeded persistent volumes. foreach (const Resource& resource, current.persistentVolumes()) { // This is enforced by the master. CHECK(resource.disk().has_volume()); // Ignore absolute and nested paths. const string& containerPath = resource.disk().volume().container_path(); if (strings::contains(containerPath, "/")) { LOG(WARNING) << "Skipping updating symlink for persistent volume " << resource << " of container " << containerId << " because the container path '" << containerPath << "' contains slash"; continue; } if (resources.contains(resource)) { continue; } string link = path::join(info->directory, containerPath); LOG(INFO) << "Removing symlink '" << link << "' for persistent volume " << resource << " of container " << containerId; Try<Nothing> rm = os::rm(link); if (rm.isError()) { return Failure( "Failed to remove the symlink for the unneeded " "persistent volume at '" + link + "'"); } } // We then link additional persistent volumes. foreach (const Resource& resource, resources.persistentVolumes()) { // This is enforced by the master. CHECK(resource.disk().has_volume()); // Ignore absolute and nested paths. const string& containerPath = resource.disk().volume().container_path(); if (strings::contains(containerPath, "/")) { LOG(WARNING) << "Skipping updating symlink for persistent volume " << resource << " of container " << containerId << " because the container path '" << containerPath << "' contains slash"; continue; } if (current.contains(resource)) { continue; } string original = paths::getPersistentVolumePath(flags.work_dir, resource); // Set the ownership of the persistent volume to match that of the // sandbox directory. // // NOTE: Currently, persistent volumes in Mesos are exclusive, // meaning that if a persistent volume is used by one task or // executor, it cannot be concurrently used by other task or // executor. But if we allow multiple executors to use same // persistent volume at the same time in the future, the ownership // of the persistent volume may conflict here. // // TODO(haosdent): Consider letting the frameworks specify the // user/group of the persistent volumes. struct stat s; if (::stat(info->directory.c_str(), &s) < 0) { return Failure("Failed to get ownership for '" + info->directory + "': " + os::strerror(errno)); } // TODO(hausdorff): (MESOS-5461) Persistent volumes maintain the invariant // that they are used by one task at a time. This is currently enforced by // `os::chown`. Windows does not support `os::chown`, we will need to // revisit this later. #ifndef __WINDOWS__ LOG(INFO) << "Changing the ownership of the persistent volume at '" << original << "' with uid " << s.st_uid << " and gid " << s.st_gid; Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, original, true); if (chown.isError()) { return Failure( "Failed to change the ownership of the persistent volume at '" + original + "' with uid " + stringify(s.st_uid) + " and gid " + stringify(s.st_gid) + ": " + chown.error()); } #endif string link = path::join(info->directory, containerPath); if (os::exists(link)) { // NOTE: This is possible because 'info->resources' will be // reset when slave restarts and recovers. When the slave calls // 'containerizer->update' after the executor re-registers, // we'll try to relink all the already symlinked volumes. Result<string> realpath = os::realpath(link); if (!realpath.isSome()) { return Failure( "Failed to get the realpath of symlink '" + link + "': " + (realpath.isError() ? realpath.error() : "No such directory")); } // A sanity check to make sure the target of the symlink does // not change. In fact, this is not supposed to happen. // NOTE: Here, we compare the realpaths because 'original' might // contain symbolic links. Result<string> _original = os::realpath(original); if (!_original.isSome()) { return Failure( "Failed to get the realpath of volume '" + original + "': " + (_original.isError() ? _original.error() : "No such directory")); } if (realpath.get() != _original.get()) { return Failure( "The existing symlink '" + link + "' points to '" + _original.get() + "' and the new target is '" + realpath.get() + "'"); } } else { LOG(INFO) << "Adding symlink from '" << original << "' to '" << link << "' for persistent volume " << resource << " of container " << containerId; Try<Nothing> symlink = ::fs::symlink(original, link); if (symlink.isError()) { return Failure( "Failed to symlink persistent volume from '" + original + "' to '" + link + "'"); } } } // Store the updated resources. info->resources = resources; return Nothing(); }
void launchTask(ExecutorDriver* driver, const TaskInfo& task) { if (launched) { TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_FAILED); status.set_message( "Attempted to run multiple tasks using a \"command\" executor"); driver->sendStatusUpdate(status); return; } CHECK(task.has_command()) << "Expecting task " << task.task_id() << " to have a command!"; std::cout << "Starting task " << task.task_id() << std::endl; // TODO(benh): Clean this up with the new 'Fork' abstraction. // Use pipes to determine which child has successfully changed // session. This is needed as the setsid call can fail from other // processes having the same group id. int pipes[2]; if (pipe(pipes) < 0) { perror("Failed to create a pipe"); abort(); } // Set the FD_CLOEXEC flags on these pipes Try<Nothing> cloexec = os::cloexec(pipes[0]); if (cloexec.isError()) { std::cerr << "Failed to cloexec(pipe[0]): " << cloexec.error() << std::endl; abort(); } cloexec = os::cloexec(pipes[1]); if (cloexec.isError()) { std::cerr << "Failed to cloexec(pipe[1]): " << cloexec.error() << std::endl; abort(); } if ((pid = fork()) == -1) { std::cerr << "Failed to fork to run '" << task.command().value() << "': " << strerror(errno) << std::endl; abort(); } if (pid == 0) { // In child process, we make cleanup easier by putting process // into it's own session. os::close(pipes[0]); // NOTE: We setsid() in a loop because setsid() might fail if another // process has the same process group id as the calling process. while ((pid = setsid()) == -1) { perror("Could not put command in its own session, setsid"); std::cout << "Forking another process and retrying" << std::endl; if ((pid = fork()) == -1) { perror("Failed to fork to launch command"); abort(); } if (pid > 0) { // In parent process. It is ok to suicide here, because // we're not watching this process. exit(0); } } if (write(pipes[1], &pid, sizeof(pid)) != sizeof(pid)) { perror("Failed to write PID on pipe"); abort(); } os::close(pipes[1]); // The child has successfully setsid, now run the command. std::cout << "sh -c '" << task.command().value() << "'" << std::endl; execl("/bin/sh", "sh", "-c", task.command().value().c_str(), (char*) NULL); perror("Failed to exec"); abort(); } // In parent process. os::close(pipes[1]); // Get the child's pid via the pipe. if (read(pipes[0], &pid, sizeof(pid)) == -1) { std::cerr << "Failed to get child PID from pipe, read: " << strerror(errno) << std::endl; abort(); } os::close(pipes[0]); std::cout << "Forked command at " << pid << std::endl; // Monitor this process. process::reap(pid) .onAny(defer(self(), &Self::reaped, driver, task.task_id(), pid, lambda::_1)); TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); status.set_state(TASK_RUNNING); driver->sendStatusUpdate(status); launched = true; }
// Fetch URI into directory. Try<string> fetch( const string& uri, const string& directory) { LOG(INFO) << "Fetching URI '" << uri << "'"; // Some checks to make sure using the URI value in shell commands // is safe. TODO(benh): These should be pushed into the scheduler // driver and reported to the user. if (uri.find_first_of('\\') != string::npos || uri.find_first_of('\'') != string::npos || uri.find_first_of('\0') != string::npos) { LOG(ERROR) << "URI contains illegal characters, refusing to fetch"; return Error("Illegal characters in URI"); } // Grab the resource using the hadoop client if it's one of the known schemes // TODO(tarnfeld): This isn't very scalable with hadoop's pluggable // filesystem implementations. // TODO(matei): Enforce some size limits on files we get from HDFS if (strings::startsWith(uri, "hdfs://") || strings::startsWith(uri, "hftp://") || strings::startsWith(uri, "s3://") || strings::startsWith(uri, "s3n://")) { Try<string> base = os::basename(uri); if (base.isError()) { LOG(ERROR) << "Invalid basename for URI: " << base.error(); return Error("Invalid basename for URI"); } string path = path::join(directory, base.get()); HDFS hdfs; LOG(INFO) << "Downloading resource from '" << uri << "' to '" << path << "'"; Try<Nothing> result = hdfs.copyToLocal(uri, path); if (result.isError()) { LOG(ERROR) << "HDFS copyToLocal failed: " << result.error(); return Error(result.error()); } return path; } else if (strings::startsWith(uri, "http://") || strings::startsWith(uri, "https://") || strings::startsWith(uri, "ftp://") || strings::startsWith(uri, "ftps://")) { string path = uri.substr(uri.find("://") + 3); if (path.find("/") == string::npos || path.size() <= path.find("/") + 1) { LOG(ERROR) << "Malformed URL (missing path)"; return Error("Malformed URI"); } path = path::join(directory, path.substr(path.find_last_of("/") + 1)); LOG(INFO) << "Downloading '" << uri << "' to '" << path << "'"; Try<int> code = net::download(uri, path); if (code.isError()) { LOG(ERROR) << "Error downloading resource: " << code.error().c_str(); return Error("Fetch of URI failed (" + code.error() + ")"); } else if (code.get() != 200) { LOG(ERROR) << "Error downloading resource, received HTTP/FTP return code " << code.get(); return Error("HTTP/FTP error (" + stringify(code.get()) + ")"); } return path; } else { // Copy the local resource. string local = uri; bool fileUri = false; if (strings::startsWith(local, string(FILE_URI_LOCALHOST))) { local = local.substr(sizeof(FILE_URI_LOCALHOST) - 1); fileUri = true; } else if (strings::startsWith(local, string(FILE_URI_PREFIX))) { local = local.substr(sizeof(FILE_URI_PREFIX) - 1); fileUri = true; } if(fileUri && !strings::startsWith(local, "/")) { return Error("File URI only supports absolute paths"); } if (local.find_first_of("/") != 0) { // We got a non-Hadoop and non-absolute path. if (os::hasenv("MESOS_FRAMEWORKS_HOME")) { local = path::join(os::getenv("MESOS_FRAMEWORKS_HOME"), local); LOG(INFO) << "Prepended environment variable " << "MESOS_FRAMEWORKS_HOME to relative path, " << "making it: '" << local << "'"; } else { LOG(ERROR) << "A relative path was passed for the resource but the " << "environment variable MESOS_FRAMEWORKS_HOME is not set. " << "Please either specify this config option " << "or avoid using a relative path"; return Error("Could not resolve relative URI"); } } Try<string> base = os::basename(local); if (base.isError()) { LOG(ERROR) << base.error(); return Error("Fetch of URI failed"); } // Copy the resource to the directory. string path = path::join(directory, base.get()); std::ostringstream command; command << "cp '" << local << "' '" << path << "'"; LOG(INFO) << "Copying resource from '" << local << "' to '" << directory << "'"; int status = os::system(command.str()); if (status != 0) { LOG(ERROR) << "Failed to copy '" << local << "' : Exit status " << status; return Error("Local copy failed"); } return path; } }
Result(const Try<T>& _t) : data(_t.isSome() ? Try<Option<T>>(Some(_t.get())) : Try<Option<T>>(Error(_t.error()))) {}
int main(int argc, char* argv[]) { GOOGLE_PROTOBUF_VERIFY_VERSION; CommandInfo commandInfo; // Construct URIs from the encoded environment string. const std::string& uris = os::getenv("MESOS_EXECUTOR_URIS"); foreach (const std::string& token, strings::tokenize(uris, " ")) { // Delimiter between URI, execute permission and extract options // Expected format: {URI}+[01][XN] // {URI} - The actual URI for the asset to fetch // [01] - 1 if the execute permission should be set else 0 // [XN] - X if we should extract the URI (if it's compressed) else N size_t pos = token.rfind("+"); CHECK(pos != std::string::npos) << "Invalid executor uri token in env " << token; CommandInfo::URI uri; uri.set_value(token.substr(0, pos)); uri.set_executable(token.substr(pos + 1, 1) == "1"); uri.set_extract(token.substr(pos + 2, 1) == "X"); commandInfo.add_uris()->MergeFrom(uri); } CHECK(os::hasenv("MESOS_WORK_DIRECTORY")) << "Missing MESOS_WORK_DIRECTORY environment variable"; std::string directory = os::getenv("MESOS_WORK_DIRECTORY"); // We cannot use Some in the ternary expression because the compiler needs to // be able to infer the type, thus the explicit Option<string>. // TODO(idownes): Add an os::hasenv that returns an Option<string>. Option<std::string> user = os::hasenv("MESOS_USER") ? Option<std::string>(os::getenv("MESOS_USER")) // Explicit so it compiles. : None(); // Fetch each URI to a local file, chmod, then chown if a user is provided. foreach (const CommandInfo::URI& uri, commandInfo.uris()) { // Fetch the URI to a local file. Try<string> fetched = fetch(uri.value(), directory); if (fetched.isError()) { EXIT(1) << "Failed to fetch: " << uri.value(); } // Chmod the fetched URI if it's executable, else assume it's an archive // that should be extracted. if (uri.executable()) { Try<Nothing> chmod = os::chmod( fetched.get(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); if (chmod.isError()) { EXIT(1) << "Failed to chmod " << fetched.get() << ": " << chmod.error(); } } else if (uri.extract()) { //TODO(idownes): Consider removing the archive once extracted. // Try to extract the file if it's recognized as an archive. Try<bool> extracted = extract(fetched.get(), directory); if (extracted.isError()) { EXIT(1) << "Failed to extract " << fetched.get() << ":" << extracted.error(); } } else { LOG(INFO) << "Skipped extracting path '" << fetched.get() << "'"; } // Recursively chown the directory if a user is provided. if (user.isSome()) { Try<Nothing> chowned = os::chown(user.get(), directory); if (chowned.isError()) { EXIT(1) << "Failed to chown " << directory << ": " << chowned.error(); } } } return 0; }
static Future<string> launch( const string& path, const vector<string>& argv) { Try<Subprocess> s = subprocess( path, argv, Subprocess::PATH("/dev/null"), Subprocess::PIPE(), Subprocess::PIPE()); string command = strings::join( ", ", path, strings::join(", ", argv)); if (s.isError()) { return Failure( "Failed to execute the subprocess '" + command + "': " + s.error()); } return await( s.get().status(), process::io::read(s.get().out().get()), process::io::read(s.get().err().get())) .then([command](const tuple< Future<Option<int>>, Future<string>, Future<string>>& t) -> Future<string> { Future<Option<int>> status = std::get<0>(t); if (!status.isReady()) { return Failure( "Failed to get the exit status of the subprocess: " + (status.isFailed() ? status.failure() : "discarded")); } if (status->isNone()) { return Failure("Failed to reap the subprocess"); } if (status->get() != 0) { Future<string> error = std::get<2>(t); if (!error.isReady()) { return Failure( "Unexpected result from the subprocess: " + WSTRINGIFY(status->get()) + ", stderr='" + error.get() + "'"); } return Failure("Subprocess '" + command + "' failed: " + error.get()); } Future<string> output = std::get<1>(t); if (!output.isReady()) { return Failure( "Failed to read stdout from '" + command + "': " + (output.isFailed() ? output.failure() : "discarded")); } return output; }); }
// This method will be called in two cases: // 1. When a shared persistent volume is destroyed by agent, the parameter // `path` will be the shared persistent volume's path. // 2. When a container is destroyed by containerizer, the parameter `path` // will be the container's sandbox path. // We search if the given path is contained in `infos` (for the case 1) or is // the parent directory of any volume paths in `infos` (for the case 2, i.e., // the PARENT type SANDBOX_PATH volume must be a subdirectory in the parent // container's sandbox) and then free the allocated gid for the found path(s). Future<Nothing> deallocate(const string& path) { vector<string> sandboxPathVolumes; bool changed = false; for (auto it = infos.begin(); it != infos.end(); ) { const VolumeGidInfo& info = it->second; const string& volumePath = info.path(); if (strings::startsWith(volumePath, path)) { if (volumePath != path) { // This is the case of the PARENT type SANDBOX_PATH volume. sandboxPathVolumes.push_back(volumePath); } gid_t gid = info.gid(); LOG(INFO) << "Deallocated gid " << gid << " for the volume path '" << volumePath << "'"; // Only return the gid to the free range if it is in the total // range. The gid may not be in the total range in the case that // Mesos agent is restarted with a different total range and we // deallocate gid for a previous volume path from the old range. if (totalGids.contains(gid)) { freeGids += gid; ++metrics.volume_gids_free; } it = infos.erase(it); changed = true; } else { ++it; } } // For the PARENT type SANDBOX_PATH volume, it will exist for a while // (depending on GC policy) after the container is destroyed. So to // avoid leaking it to other containers in the case that its gid is // allocated to another volume, we need to change its owner group back // to the original one (i.e., the primary group of its owner). vector<Future<Try<Nothing>>> futures; vector<pair<string, gid_t>> volumeGids; foreach (const string& volume, sandboxPathVolumes) { // Get the uid of the volume's owner. struct stat s; if (::stat(volume.c_str(), &s) < 0) { LOG(WARNING) << "Failed to stat '" << volume << "': " << os::strerror(errno); continue; } Result<string> user = os::user(s.st_uid); if (!user.isSome()) { LOG(WARNING) << "Failed to get username for the uid " << s.st_uid << ": " << (user.isError() ? user.error() : "not found"); continue; } // Get the primary group ID of the user. Result<gid_t> gid = os::getgid(user.get()); if (!gid.isSome()) { LOG(WARNING) << "Failed to get gid for the user '" << user.get() << "': " << (gid.isError() ? gid.error() : "not found"); continue; } futures.push_back(async(&setVolumeOwnership, volume, gid.get(), false)); volumeGids.push_back({volume, gid.get()}); } return await(futures) .then(defer( self(), [=](const vector<Future<Try<Nothing>>>& results) -> Future<Nothing> { for (size_t i = 0; i < results.size(); ++i) { const Future<Try<Nothing>>& result = results[i]; const string& path = volumeGids[i].first; const gid_t gid = volumeGids[i].second; if (!result.isReady()) { LOG(WARNING) << "Failed to set the owner group of the volume " << "path '" << path << "' back to " << gid << ": " << (result.isFailed() ? result.failure() : "discarded"); } else if (result->isError()) { LOG(WARNING) << "Failed to set the owner group of the volume " << "path '" << path << "' back to " << gid << ": " << result->error(); } } if (changed) { Try<Nothing> status = persist(); if (status.isError()) { return Failure( "Failed to save state of volume gid infos: " + status.error()); } } return Nothing(); })); }
Try<Socket> Socket::create(Kind kind, Option<int> s) { // If the caller passed in a file descriptor, we do // not own its life cycle and must not close it. bool owned = s.isNone(); if (owned) { // Supported in Linux >= 2.6.27. #if defined(SOCK_NONBLOCK) && defined(SOCK_CLOEXEC) Try<int> fd = network::socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0); if (fd.isError()) { return Error("Failed to create socket: " + fd.error()); } #else Try<int> fd = network::socket(AF_INET, SOCK_STREAM, 0); if (fd.isError()) { return Error("Failed to create socket: " + fd.error()); } Try<Nothing> nonblock = os::nonblock(fd.get()); if (nonblock.isError()) { os::close(fd.get()); return Error("Failed to create socket, nonblock: " + nonblock.error()); } Try<Nothing> cloexec = os::cloexec(fd.get()); if (cloexec.isError()) { os::close(fd.get()); return Error("Failed to create socket, cloexec: " + cloexec.error()); } #endif s = fd.get(); } switch (kind) { case POLL: { Try<std::shared_ptr<Socket::Impl>> socket = PollSocketImpl::create(s.get()); if (socket.isError()) { if (owned) { os::close(s.get()); } return Error(socket.error()); } return Socket(socket.get()); } #ifdef USE_SSL_SOCKET case SSL: { Try<std::shared_ptr<Socket::Impl>> socket = LibeventSSLSocketImpl::create(s.get()); if (socket.isError()) { if (owned) { os::close(s.get()); } return Error(socket.error()); } return Socket(socket.get()); } #endif // By not setting a default we leverage the compiler errors when // the enumeration is augmented to find all the cases we need to // provide. } }
// This "fetcher program" is invoked by the slave's fetcher actor // (Fetcher, FetcherProcess) to "fetch" URIs into the sandbox directory // of a given task. Its parameters are provided in the form of the env // var MESOS_FETCHER_INFO which contains a FetcherInfo (see // fetcher.proto) object formatted in JSON. These are set by the actor // to indicate what set of URIs to process and how to proceed with // each one. A URI can be downloaded directly to the task's sandbox // directory or it can be copied to a cache first or it can be reused // from the cache, avoiding downloading. All cache management and // bookkeeping is centralized in the slave's fetcher actor, which can // have multiple instances of this fetcher program running at any // given time. Exit code: 0 if entirely successful, otherwise 1. int main(int argc, char* argv[]) { GOOGLE_PROTOBUF_VERIFY_VERSION; mesos::internal::logging::Flags flags; Try<Nothing> load = flags.load("MESOS_", argc, argv); CHECK_SOME(load) << "Could not load flags: " << load.error(); logging::initialize(argv[0], flags, true); // Catch signals. const Option<std::string> jsonFetcherInfo = os::getenv("MESOS_FETCHER_INFO"); CHECK_SOME(jsonFetcherInfo) << "Missing MESOS_FETCHER_INFO environment variable"; LOG(INFO) << "Fetcher Info: " << jsonFetcherInfo.get(); Try<JSON::Object> parse = JSON::parse<JSON::Object>(jsonFetcherInfo.get()); CHECK_SOME(parse) << "Failed to parse MESOS_FETCHER_INFO: " << parse.error(); Try<FetcherInfo> fetcherInfo = ::protobuf::parse<FetcherInfo>(parse.get()); CHECK_SOME(fetcherInfo) << "Failed to parse FetcherInfo: " << fetcherInfo.error(); CHECK(!fetcherInfo.get().sandbox_directory().empty()) << "Missing sandbox directory"; const string sandboxDirectory = fetcherInfo.get().sandbox_directory(); const Option<string> cacheDirectory = fetcherInfo.get().has_cache_directory() ? Option<string>::some(fetcherInfo.get().cache_directory()) : Option<string>::none(); const Option<string> frameworksHome = fetcherInfo.get().has_frameworks_home() ? Option<string>::some(fetcherInfo.get().frameworks_home()) : Option<string>::none(); // Fetch each URI to a local file, chmod, then chown if a user is provided. foreach (const FetcherInfo::Item& item, fetcherInfo.get().items()) { Try<string> fetched = fetch(item, cacheDirectory, sandboxDirectory, frameworksHome); if (fetched.isError()) { EXIT(1) << "Failed to fetch '" << item.uri().value() << "': " + fetched.error(); } else { LOG(INFO) << "Fetched '" << item.uri().value() << "' to '" << fetched.get() << "'"; } } // Recursively chown the sandbox directory if a user is provided. if (fetcherInfo.get().has_user()) { Try<Nothing> chowned = os::chown( fetcherInfo.get().user(), sandboxDirectory); if (chowned.isError()) { EXIT(1) << "Failed to chown " << sandboxDirectory << ": " << chowned.error(); } } return 0; }
Try<TaskState> TaskState::recover( const string& rootDir, const SlaveID& slaveId, const FrameworkID& frameworkId, const ExecutorID& executorId, const ContainerID& containerId, const TaskID& taskId, bool strict) { TaskState state; state.id = taskId; string message; // Read the task info. string path = paths::getTaskInfoPath( rootDir, slaveId, frameworkId, executorId, containerId, taskId); if (!os::exists(path)) { // This could happen if the slave died after creating the task // directory but before it checkpointed the task info. LOG(WARNING) << "Failed to find task info file '" << path << "'"; return state; } const Result<Task>& task = ::protobuf::read<Task>(path); if (task.isError()) { message = "Failed to read task info from '" + path + "': " + task.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } if (task.isNone()) { // This could happen if the slave died after opening the file for // writing but before it checkpointed anything. LOG(WARNING) << "Found empty task info file '" << path << "'"; return state; } state.info = task.get(); // Read the status updates. path = paths::getTaskUpdatesPath( rootDir, slaveId, frameworkId, executorId, containerId, taskId); if (!os::exists(path)) { // This could happen if the slave died before it checkpointed any // status updates for this task. LOG(WARNING) << "Failed to find status updates file '" << path << "'"; return state; } // Open the status updates file for reading and writing (for // truncating). Try<int> fd = os::open(path, O_RDWR | O_CLOEXEC); if (fd.isError()) { message = "Failed to open status updates file '" + path + "': " + fd.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } // Now, read the updates. Result<StatusUpdateRecord> record = None(); while (true) { // Ignore errors due to partial protobuf read and enable undoing // failed reads by reverting to the previous seek position. record = ::protobuf::read<StatusUpdateRecord>(fd.get(), true, true); if (!record.isSome()) { break; } if (record.get().type() == StatusUpdateRecord::UPDATE) { state.updates.push_back(record.get().update()); } else { state.acks.insert(UUID::fromBytes(record.get().uuid())); } } // Always truncate the file to contain only valid updates. // NOTE: This is safe even though we ignore partial protobuf read // errors above, because the 'fd' is properly set to the end of the // last valid update by 'protobuf::read()'. if (ftruncate(fd.get(), lseek(fd.get(), 0, SEEK_CUR)) != 0) { return ErrnoError( "Failed to truncate status updates file '" + path + "'"); } // After reading a non-corrupted updates file, 'record' should be // 'none'. if (record.isError()) { message = "Failed to read status updates file '" + path + "': " + record.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } // Close the updates file. Try<Nothing> close = os::close(fd.get()); if (close.isError()) { message = "Failed to close status updates file '" + path + "': " + close.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } return state; }
int main(int argc, char** argv) { GOOGLE_PROTOBUF_VERIFY_VERSION; master::Flags flags; // The following flags are executable specific (e.g., since we only // have one instance of libprocess per execution, we only want to // advertise the IP and port option once, here). Option<string> ip; flags.add(&ip, "ip", "IP address to listen on"); uint16_t port; flags.add(&port, "port", "Port to listen on", MasterInfo().port()); Option<string> zk; flags.add(&zk, "zk", "ZooKeeper URL (used for leader election amongst masters)\n" "May be one of:\n" " zk://host1:port1,host2:port2,.../path\n" " zk://username:password@host1:port1,host2:port2,.../path\n" " file:///path/to/file (where file contains one of the above)"); Try<Nothing> load = flags.load("MESOS_", argc, argv); if (load.isError()) { cerr << flags.usage(load.error()) << endl; return EXIT_FAILURE; } if (flags.version) { version(); return EXIT_SUCCESS; } if (flags.help) { cout << flags.usage() << endl; return EXIT_SUCCESS; } // Initialize modules. Note that since other subsystems may depend // upon modules, we should initialize modules before anything else. if (flags.modules.isSome()) { Try<Nothing> result = ModuleManager::load(flags.modules.get()); if (result.isError()) { EXIT(EXIT_FAILURE) << "Error loading modules: " << result.error(); } } // Initialize hooks. if (flags.hooks.isSome()) { Try<Nothing> result = HookManager::initialize(flags.hooks.get()); if (result.isError()) { EXIT(EXIT_FAILURE) << "Error installing hooks: " << result.error(); } } // Initialize libprocess. if (ip.isSome()) { os::setenv("LIBPROCESS_IP", ip.get()); } os::setenv("LIBPROCESS_PORT", stringify(port)); process::initialize("master"); logging::initialize(argv[0], flags, true); // Catch signals. LOG(INFO) << "Build: " << build::DATE << " by " << build::USER; LOG(INFO) << "Version: " << MESOS_VERSION; if (build::GIT_TAG.isSome()) { LOG(INFO) << "Git tag: " << build::GIT_TAG.get(); } if (build::GIT_SHA.isSome()) { LOG(INFO) << "Git SHA: " << build::GIT_SHA.get(); } // Create an instance of allocator. const std::string allocatorName = flags.allocator; Try<Allocator*> allocator = Allocator::create(allocatorName); if (allocator.isError()) { EXIT(EXIT_FAILURE) << "Failed to create '" << allocatorName << "' allocator: " << allocator.error(); } CHECK_NOTNULL(allocator.get()); LOG(INFO) << "Using '" << allocatorName << "' allocator"; state::Storage* storage = NULL; Log* log = NULL; if (flags.registry == "in_memory") { if (flags.registry_strict) { EXIT(EXIT_FAILURE) << "Cannot use '--registry_strict' when using in-memory storage" << " based registry"; } storage = new state::InMemoryStorage(); } else if (flags.registry == "replicated_log" || flags.registry == "log_storage") { // TODO(bmahler): "log_storage" is present for backwards // compatibility, can be removed before 0.19.0. if (flags.work_dir.isNone()) { EXIT(EXIT_FAILURE) << "--work_dir needed for replicated log based registry"; } Try<Nothing> mkdir = os::mkdir(flags.work_dir.get()); if (mkdir.isError()) { EXIT(EXIT_FAILURE) << "Failed to create work directory '" << flags.work_dir.get() << "': " << mkdir.error(); } if (zk.isSome()) { // Use replicated log with ZooKeeper. if (flags.quorum.isNone()) { EXIT(EXIT_FAILURE) << "Need to specify --quorum for replicated log based" << " registry when using ZooKeeper"; } Try<zookeeper::URL> url = zookeeper::URL::parse(zk.get()); if (url.isError()) { EXIT(EXIT_FAILURE) << "Error parsing ZooKeeper URL: " << url.error(); } log = new Log( flags.quorum.get(), path::join(flags.work_dir.get(), "replicated_log"), url.get().servers, flags.zk_session_timeout, path::join(url.get().path, "log_replicas"), url.get().authentication, flags.log_auto_initialize); } else { // Use replicated log without ZooKeeper. log = new Log( 1, path::join(flags.work_dir.get(), "replicated_log"), set<UPID>(), flags.log_auto_initialize); } storage = new state::LogStorage(log); } else { EXIT(EXIT_FAILURE) << "'" << flags.registry << "' is not a supported" << " option for registry persistence"; } CHECK_NOTNULL(storage); state::protobuf::State* state = new state::protobuf::State(storage); Registrar* registrar = new Registrar(flags, state); Repairer* repairer = new Repairer(); Files files; MasterContender* contender; MasterDetector* detector; // TODO(vinod): 'MasterContender::create()' should take // Option<string>. Try<MasterContender*> contender_ = MasterContender::create(zk.getOrElse("")); if (contender_.isError()) { EXIT(EXIT_FAILURE) << "Failed to create a master contender: " << contender_.error(); } contender = contender_.get(); // TODO(vinod): 'MasterDetector::create()' should take // Option<string>. Try<MasterDetector*> detector_ = MasterDetector::create(zk.getOrElse("")); if (detector_.isError()) { EXIT(EXIT_FAILURE) << "Failed to create a master detector: " << detector_.error(); } detector = detector_.get(); Option<Authorizer*> authorizer = None(); if (flags.acls.isSome()) { Try<Owned<Authorizer>> create = Authorizer::create(flags.acls.get()); if (create.isError()) { EXIT(EXIT_FAILURE) << "Failed to initialize the authorizer: " << create.error() << " (see --acls flag)"; } // Now pull out the authorizer but need to make a copy since we // get a 'const &' from 'Try::get'. authorizer = Owned<Authorizer>(create.get()).release(); } Option<shared_ptr<RateLimiter>> slaveRemovalLimiter = None(); if (flags.slave_removal_rate_limit.isSome()) { // Parse the flag value. // TODO(vinod): Move this parsing logic to flags once we have a // 'Rate' abstraction in stout. vector<string> tokens = strings::tokenize(flags.slave_removal_rate_limit.get(), "/"); if (tokens.size() != 2) { EXIT(EXIT_FAILURE) << "Invalid slave_removal_rate_limit: " << flags.slave_removal_rate_limit.get() << ". Format is <Number of slaves>/<Duration>"; } Try<int> permits = numify<int>(tokens[0]); if (permits.isError()) { EXIT(EXIT_FAILURE) << "Invalid slave_removal_rate_limit: " << flags.slave_removal_rate_limit.get() << ". Format is <Number of slaves>/<Duration>" << ": " << permits.error(); } Try<Duration> duration = Duration::parse(tokens[1]); if (duration.isError()) { EXIT(EXIT_FAILURE) << "Invalid slave_removal_rate_limit: " << flags.slave_removal_rate_limit.get() << ". Format is <Number of slaves>/<Duration>" << ": " << duration.error(); } slaveRemovalLimiter = new RateLimiter(permits.get(), duration.get()); } if (flags.firewall_rules.isSome()) { vector<Owned<FirewallRule>> rules; const Firewall firewall = flags.firewall_rules.get(); if (firewall.has_disabled_endpoints()) { hashset<string> paths; foreach (const string& path, firewall.disabled_endpoints().paths()) { paths.insert(path); } rules.emplace_back(new DisabledEndpointsFirewallRule(paths)); }
int main(int argc, char** argv) { GOOGLE_PROTOBUF_VERIFY_VERSION; master::Flags flags; // The following flags are executable specific (e.g., since we only // have one instance of libprocess per execution, we only want to // advertise the IP and port option once, here). Option<string> ip; flags.add(&ip, "ip", "IP address to listen on"); uint16_t port; flags.add(&port, "port", "Port to listen on", MasterInfo().port()); string zk; flags.add(&zk, "zk", "ZooKeeper URL (used for leader election amongst masters)\n" "May be one of:\n" " zk://host1:port1,host2:port2,.../path\n" " zk://username:password@host1:port1,host2:port2,.../path\n" " file://path/to/file (where file contains one of the above)", ""); bool help; flags.add(&help, "help", "Prints this help message", false); Try<Nothing> load = flags.load("MESOS_", argc, argv); if (load.isError()) { cerr << load.error() << endl; usage(argv[0], flags); exit(1); } if (help) { usage(argv[0], flags); exit(1); } // Initialize libprocess. if (ip.isSome()) { os::setenv("LIBPROCESS_IP", ip.get()); } os::setenv("LIBPROCESS_PORT", stringify(port)); process::initialize("master"); logging::initialize(argv[0], flags, true); // Catch signals. LOG(INFO) << "Build: " << build::DATE << " by " << build::USER; LOG(INFO) << "Version: " << MESOS_VERSION; if (build::GIT_TAG.isSome()) { LOG(INFO) << "Git tag: " << build::GIT_TAG.get(); } if (build::GIT_SHA.isSome()) { LOG(INFO) << "Git SHA: " << build::GIT_SHA.get(); } allocator::AllocatorProcess* allocatorProcess = new allocator::HierarchicalDRFAllocatorProcess(); allocator::Allocator* allocator = new allocator::Allocator(allocatorProcess); state::Storage* storage = NULL; if (strings::startsWith(flags.registry, "zk://")) { // TODO(benh): EXIT(1) << "ZooKeeper based registry unimplemented"; } else if (flags.registry == "local") { storage = new state::LevelDBStorage(path::join(flags.work_dir, "registry")); } else { EXIT(1) << "'" << flags.registry << "' is not a supported" << " option for registry persistence"; } CHECK_NOTNULL(storage); state::protobuf::State* state = new state::protobuf::State(storage); Registrar* registrar = new Registrar(state); Repairer* repairer = new Repairer(); Files files; MasterContender* contender; MasterDetector* detector; Try<MasterContender*> contender_ = MasterContender::create(zk); if (contender_.isError()) { EXIT(1) << "Failed to create a master contender: " << contender_.error(); } contender = contender_.get(); Try<MasterDetector*> detector_ = MasterDetector::create(zk); if (detector_.isError()) { EXIT(1) << "Failed to create a master detector: " << detector_.error(); } detector = detector_.get(); LOG(INFO) << "Starting Mesos master"; Master* master = new Master( allocator, registrar, repairer, &files, contender, detector, flags); if (zk == "") { // It means we are using the standalone detector so we need to // appoint this Master as the leader. dynamic_cast<StandaloneMasterDetector*>(detector)->appoint(master->info()); } process::spawn(master); process::wait(master->self()); delete master; delete allocator; delete allocatorProcess; delete registrar; delete repairer; delete state; delete storage; delete contender; delete detector; return 0; }
// Recursive version of `RemoveDirectory`. Two things are notable about this // implementation: // // 1. Unlike `rmdir`, this requires Windows-formatted paths, and therefore // should be in the `internal` namespace. // 2. To match the semantics of the POSIX implementation, this function // implements the semantics of `rm -r`, rather than `rmdir`. In particular, // if `path` points at a file, this function will delete it, while a call to // `rmdir` will not. inline Try<Nothing> recursive_remove_directory( const std::string& path, bool removeRoot, bool continueOnError) { // NOTE: Special case required to match the semantics of POSIX. See comment // above. As below, this also handles symlinks correctly, i.e., given a path // to a symlink, we delete the symlink rather than the target. if (os::stat::isfile(path)) { return os::rm(path); } // Appending a slash here if the path doesn't already have one simplifies // path join logic later, because (unlike Unix) Windows doesn't like double // slashes in paths. std::string current_path; if (!strings::endsWith(path, "\\")) { current_path = path + "\\"; } else { current_path = path; } // Get first file matching pattern `X:\path\to\wherever\*`. WIN32_FIND_DATA found; const std::string search_pattern = current_path + "*"; const SharedHandle search_handle( FindFirstFile(search_pattern.c_str(), &found), FindClose); if (search_handle.get() == INVALID_HANDLE_VALUE) { return WindowsError( "`os::internal::recursive_remove_directory` failed when searching " "for files with pattern '" + search_pattern + "'"); } do { // NOTE: do-while is appropriate here because folder is guaranteed to have // at least a file called `.` (and probably also one called `..`). const std::string current_file(found.cFileName); const bool is_current_directory = current_file.compare(".") == 0; const bool is_parent_directory = current_file.compare("..") == 0; // Don't try to delete `.` and `..` files in directory. if (is_current_directory || is_parent_directory) { continue; } // Path to remove. const std::string current_absolute_path = current_path + current_file; const bool is_directory = os::stat::isdir(current_absolute_path); // Delete current path, whether it's a directory, file, or symlink. if (is_directory) { Try<Nothing> removed = recursive_remove_directory( current_absolute_path, true, continueOnError); if (removed.isError()) { if (continueOnError) { LOG(WARNING) << "Failed to delete directory " << current_absolute_path << " with error " << removed.error(); } else { return Error(removed.error()); } } } else { // NOTE: this also handles symbolic links. if (::remove(current_absolute_path.c_str()) != 0) { if (continueOnError) { LOG(WARNING) << "`os::internal::recursive_remove_directory`" << " attempted to delete file '" << current_absolute_path << "', but failed"; } else { return WindowsError( "`os::internal::recursive_remove_directory` attempted to delete " "file '" + current_absolute_path + "', but failed"); } } } } while (FindNextFile(search_handle.get(), &found)); // Finally, remove current directory unless `removeRoot` is disabled. if (removeRoot && ::_rmdir(current_path.c_str()) == -1) { if (continueOnError) { LOG(WARNING) << "`os::internal::recursive_remove_directory`" << " attempted to delete directory '" << current_path << "', but failed"; return ErrnoError("rmdir failed in 'continueOnError' mode"); } else { return ErrnoError( "`os::internal::recursive_remove_directory` attempted to delete " "directory '" + current_path + "', but failed"); } } return Nothing(); }
int main(int argc, char** argv) { flags::FlagsBase flags; flags.setUsageMessage("Usage: " + Path(argv[0]).basename() + " <master>"); Duration timeout; flags.add(&timeout, "timeout", "How long to wait to resolve master", Seconds(5)); // TODO(marco): `verbose` is also a great candidate for FlagsBase. bool verbose; flags.add(&verbose, "verbose", "Be verbose", false); // Load flags from environment and command line, and remove // them from argv. Try<flags::Warnings> load = flags.load(None(), &argc, &argv); if (load.isError()) { cerr << flags.usage(load.error()) << endl; return EXIT_FAILURE; } if (flags.help) { cout << flags.usage() << endl; return EXIT_SUCCESS; } // Log any flag warnings. foreach (const flags::Warning& warning, load->warnings) { LOG(WARNING) << warning.message; } // 'master' argument must be the only argument left after parsing. if (argc != 2) { cerr << flags.usage("There must be only one argument: <master>") << endl; return EXIT_FAILURE; } string master = argv[1]; Try<mesos::master::detector::MasterDetector*> detector = mesos::master::detector::MasterDetector::create(master); if (detector.isError()) { cerr << "Failed to create a master detector: " << detector.error() << endl; return EXIT_FAILURE; } Future<Option<MasterInfo> > masterInfo = detector.get()->detect(); if (!masterInfo.await(timeout)) { cerr << "Failed to detect master from '" << master << "' within " << timeout << endl; return -1; } else { CHECK(!masterInfo.isDiscarded()); if (masterInfo.isFailed()) { cerr << "Failed to detect master from '" << master << "': " << masterInfo.failure() << endl; return EXIT_FAILURE; } } // The future is not satisfied unless the result is Some. CHECK_SOME(masterInfo.get()); cout << strings::remove(masterInfo.get().get().pid(), "master@") << endl; return EXIT_SUCCESS; }
Try<pid_t> LinuxLauncher::fork( const ContainerID& containerId, const lambda::function<int()>& childFunction) { // Create a freezer cgroup for this container if necessary. Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId)); if (exists.isError()) { return Error("Failed to check existence of freezer cgroup: " + exists.error()); } if (!exists.get()) { Try<Nothing> created = cgroups::create(hierarchy, cgroup(containerId)); if (created.isError()) { return Error("Failed to create freezer cgroup: " + created.error()); } } // Use a pipe to block the child until it's been moved into the freezer // cgroup. int pipes[2]; // We assume this should not fail under reasonable conditions so we use CHECK. CHECK(pipe(pipes) == 0); // Use the _childMain helper which moves the child into a new session and // blocks on the pipe until we're ready for it to run. lambda::function<int()> func = lambda::bind(&_childMain, childFunction, pipes); // Stack for the child. // - unsigned long long used for best alignment. // - static is ok because each child gets their own copy after the clone. // - 8 MiB appears to be the default for "ulimit -s" on OSX and Linux. static unsigned long long stack[(8*1024*1024)/sizeof(unsigned long long)]; LOG(INFO) << "Cloning child process with flags = " << namespaces; pid_t pid; if ((pid = ::clone( childMain, &stack[sizeof(stack)/sizeof(stack[0]) - 1], // stack grows down namespaces | SIGCHLD, // Specify SIGCHLD as child termination signal static_cast<void*>(&func))) == -1) { return ErrnoError("Failed to clone child process"); } // Parent. os::close(pipes[0]); // Move the child into the freezer cgroup. Any grandchildren will also be // contained in the cgroup. Try<Nothing> assign = cgroups::assign(hierarchy, cgroup(containerId), pid); if (assign.isError()) { LOG(ERROR) << "Failed to assign process " << pid << " of container '" << containerId << "'" << " to its freezer cgroup: " << assign.error(); kill(pid, SIGKILL); return Error("Failed to contain process"); } // Now that we've contained the child we can signal it to continue by // writing to the pipe. int buf; ssize_t len; while ((len = write(pipes[1], &buf, sizeof(buf))) == -1 && errno == EINTR); if (len != sizeof(buf)) { // Ensure the child is killed. kill(pid, SIGKILL); os::close(pipes[1]); return Error("Failed to synchronize child process"); } os::close(pipes[1]); // Store the pid (session id and process group id) if this is the first // process forked for this container. if (!pids.contains(containerId)) { pids.put(containerId, pid); } return pid; }
Try<RunState> RunState::recover( const string& rootDir, const SlaveID& slaveId, const FrameworkID& frameworkId, const ExecutorID& executorId, const ContainerID& containerId, bool strict) { RunState state; state.id = containerId; string message; // See if the sentinel file exists. This is done first so it is // known even if partial state is returned, e.g., if the libprocess // pid file is not recovered. It indicates the slave removed the // executor. string path = paths::getExecutorSentinelPath( rootDir, slaveId, frameworkId, executorId, containerId); state.completed = os::exists(path); // Find the tasks. Try<list<string> > tasks = paths::getTaskPaths( rootDir, slaveId, frameworkId, executorId, containerId); if (tasks.isError()) { return Error( "Failed to find tasks for executor run " + containerId.value() + ": " + tasks.error()); } // Recover tasks. foreach (const string& path, tasks.get()) { TaskID taskId; taskId.set_value(Path(path).basename()); Try<TaskState> task = TaskState::recover( rootDir, slaveId, frameworkId, executorId, containerId, taskId, strict); if (task.isError()) { return Error( "Failed to recover task " + taskId.value() + ": " + task.error()); } state.tasks[taskId] = task.get(); state.errors += task.get().errors; } // Read the forked pid. path = paths::getForkedPidPath( rootDir, slaveId, frameworkId, executorId, containerId); if (!os::exists(path)) { // This could happen if the slave died before the isolator // checkpointed the forked pid. LOG(WARNING) << "Failed to find executor forked pid file '" << path << "'"; return state; } Try<string> pid = os::read(path); if (pid.isError()) { message = "Failed to read executor forked pid from '" + path + "': " + pid.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } if (pid.get().empty()) { // This could happen if the slave died after opening the file for // writing but before it checkpointed anything. LOG(WARNING) << "Found empty executor forked pid file '" << path << "'"; return state; } Try<pid_t> forkedPid = numify<pid_t>(pid.get()); if (forkedPid.isError()) { return Error("Failed to parse forked pid " + pid.get() + ": " + forkedPid.error()); } state.forkedPid = forkedPid.get(); // Read the libprocess pid. path = paths::getLibprocessPidPath( rootDir, slaveId, frameworkId, executorId, containerId); if (os::exists(path)) { pid = os::read(path); if (pid.isError()) { message = "Failed to read executor libprocess pid from '" + path + "': " + pid.error(); if (strict) { return Error(message); } else { LOG(WARNING) << message; state.errors++; return state; } } if (pid.get().empty()) { // This could happen if the slave died after opening the file for // writing but before it checkpointed anything. LOG(WARNING) << "Found empty executor libprocess pid file '" << path << "'"; return state; } state.libprocessPid = process::UPID(pid.get()); state.http = false; return state; } path = paths::getExecutorHttpMarkerPath( rootDir, slaveId, frameworkId, executorId, containerId); if (!os::exists(path)) { // This could happen if the slave died before the executor // registered with the slave. LOG(WARNING) << "Failed to find executor libprocess pid/http marker file"; return state; } state.http = true; return state; }
void launchTask(ExecutorDriver* driver, const TaskInfo& task) { if (run.isSome()) { // TODO(alexr): Use `protobuf::createTaskStatus()` // instead of manually setting fields. TaskStatus status; status.mutable_task_id()->CopyFrom(task.task_id()); status.set_state(TASK_FAILED); status.set_message( "Attempted to run multiple tasks using a \"docker\" executor"); driver->sendStatusUpdate(status); return; } // Capture the TaskID. taskId = task.task_id(); // Capture the kill policy. if (task.has_kill_policy()) { killPolicy = task.kill_policy(); } LOG(INFO) << "Starting task " << taskId.get(); CHECK(task.has_container()); CHECK(task.has_command()); CHECK(task.container().type() == ContainerInfo::DOCKER); Try<Docker::RunOptions> runOptions = Docker::RunOptions::create( task.container(), task.command(), containerName, sandboxDirectory, mappedDirectory, task.resources() + task.executor().resources(), cgroupsEnableCfs, taskEnvironment, None(), // No extra devices. defaultContainerDNS ); if (runOptions.isError()) { // TODO(alexr): Use `protobuf::createTaskStatus()` // instead of manually setting fields. TaskStatus status; status.mutable_task_id()->CopyFrom(task.task_id()); status.set_state(TASK_FAILED); status.set_message( "Failed to create docker run options: " + runOptions.error()); driver->sendStatusUpdate(status); _stop(); return; } // We're adding task and executor resources to launch docker since // the DockerContainerizer updates the container cgroup limits // directly and it expects it to be the sum of both task and // executor resources. This does leave to a bit of unaccounted // resources for running this executor, but we are assuming // this is just a very small amount of overcommit. run = docker->run( runOptions.get(), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO)); run->onAny(defer(self(), &Self::reaped, lambda::_1)); // Delay sending TASK_RUNNING status update until we receive // inspect output. Note that we store a future that completes // after the sending of the running update. This allows us to // ensure that the terminal update is sent after the running // update (see `reaped()`). inspect = docker->inspect(containerName, DOCKER_INSPECT_DELAY) .then(defer(self(), [=](const Docker::Container& container) { if (!killed) { containerPid = container.pid; // TODO(alexr): Use `protobuf::createTaskStatus()` // instead of manually setting fields. TaskStatus status; status.mutable_task_id()->CopyFrom(taskId.get()); status.set_state(TASK_RUNNING); status.set_data(container.output); if (container.ipAddress.isSome()) { // TODO(karya): Deprecated -- Remove after 0.25.0 has shipped. Label* label = status.mutable_labels()->add_labels(); label->set_key("Docker.NetworkSettings.IPAddress"); label->set_value(container.ipAddress.get()); NetworkInfo* networkInfo = status.mutable_container_status()->add_network_infos(); // Copy the NetworkInfo if it is specified in the // ContainerInfo. A Docker container has at most one // NetworkInfo, which is validated in containerizer. if (task.container().network_infos().size() > 0) { networkInfo->CopyFrom(task.container().network_infos(0)); networkInfo->clear_ip_addresses(); } NetworkInfo::IPAddress* ipAddress = networkInfo->add_ip_addresses(); ipAddress->set_ip_address(container.ipAddress.get()); containerNetworkInfo = *networkInfo; } driver->sendStatusUpdate(status); } return Nothing(); })); inspect.onFailed(defer(self(), [=](const string& failure) { LOG(ERROR) << "Failed to inspect container '" << containerName << "'" << ": " << failure; // TODO(bmahler): This is fatal, try to shut down cleanly. // Since we don't have a container id, we can only discard // the run future. })); inspect.onReady(defer(self(), &Self::launchCheck, task)); inspect.onReady( defer(self(), &Self::launchHealthCheck, containerName, task)); }