// Attempt to get the uri using the hadoop client. Try<string> fetchWithHadoopClient( const string& uri, const string& directory) { HDFS hdfs; Try<bool> available = hdfs.available(); if (available.isError() || !available.get()) { LOG(INFO) << "Hadoop Client not available, " << "skipping fetch with Hadoop Client"; return Error("Hadoop Client unavailable"); } LOG(INFO) << "Fetching URI '" << uri << "' using Hadoop Client"; Try<string> base = os::basename(uri); if (base.isError()) { LOG(ERROR) << "Invalid basename for URI: " << base.error(); return Error("Invalid basename for URI"); } string path = path::join(directory, base.get()); LOG(INFO) << "Downloading resource from '" << uri << "' to '" << path << "'"; Try<Nothing> result = hdfs.copyToLocal(uri, path); if (result.isError()) { LOG(ERROR) << "HDFS copyToLocal failed: " << result.error(); return Error(result.error()); } return path; }
// Attempt to get the uri using the hadoop client. static Try<string> downloadWithHadoopClient( const string& sourceUri, const string& destinationPath) { HDFS hdfs; Try<bool> available = hdfs.available(); if (available.isError() || !available.get()) { return Error("Skipping fetch with Hadoop Client as" " Hadoop Client not available: " + available.error()); } LOG(INFO) << "Downloading resource with Hadoop client from '" << sourceUri << "' to '" << destinationPath << "'"; Try<Nothing> result = hdfs.copyToLocal(sourceUri, destinationPath); if (result.isError()) { return Error("HDFS copyToLocal failed: " + result.error()); } return destinationPath; }
// Fetch URI into directory. Try<string> fetch( const string& uri, const string& directory) { LOG(INFO) << "Fetching URI '" << uri << "'"; // Some checks to make sure using the URI value in shell commands // is safe. TODO(benh): These should be pushed into the scheduler // driver and reported to the user. if (uri.find_first_of('\\') != string::npos || uri.find_first_of('\'') != string::npos || uri.find_first_of('\0') != string::npos) { LOG(ERROR) << "URI contains illegal characters, refusing to fetch"; return Error("Illegal characters in URI"); } // Grab the resource using the hadoop client if it's one of the known schemes // TODO(tarnfeld): This isn't very scalable with hadoop's pluggable // filesystem implementations. // TODO(matei): Enforce some size limits on files we get from HDFS if (strings::startsWith(uri, "hdfs://") || strings::startsWith(uri, "hftp://") || strings::startsWith(uri, "s3://") || strings::startsWith(uri, "s3n://")) { Try<string> base = os::basename(uri); if (base.isError()) { LOG(ERROR) << "Invalid basename for URI: " << base.error(); return Error("Invalid basename for URI"); } string path = path::join(directory, base.get()); HDFS hdfs; LOG(INFO) << "Downloading resource from '" << uri << "' to '" << path << "'"; Try<Nothing> result = hdfs.copyToLocal(uri, path); if (result.isError()) { LOG(ERROR) << "HDFS copyToLocal failed: " << result.error(); return Error(result.error()); } return path; } else if (strings::startsWith(uri, "http://") || strings::startsWith(uri, "https://") || strings::startsWith(uri, "ftp://") || strings::startsWith(uri, "ftps://")) { string path = uri.substr(uri.find("://") + 3); if (path.find("/") == string::npos || path.size() <= path.find("/") + 1) { LOG(ERROR) << "Malformed URL (missing path)"; return Error("Malformed URI"); } path = path::join(directory, path.substr(path.find_last_of("/") + 1)); LOG(INFO) << "Downloading '" << uri << "' to '" << path << "'"; Try<int> code = net::download(uri, path); if (code.isError()) { LOG(ERROR) << "Error downloading resource: " << code.error().c_str(); return Error("Fetch of URI failed (" + code.error() + ")"); } else if (code.get() != 200) { LOG(ERROR) << "Error downloading resource, received HTTP/FTP return code " << code.get(); return Error("HTTP/FTP error (" + stringify(code.get()) + ")"); } return path; } else { // Copy the local resource. string local = uri; bool fileUri = false; if (strings::startsWith(local, string(FILE_URI_LOCALHOST))) { local = local.substr(sizeof(FILE_URI_LOCALHOST) - 1); fileUri = true; } else if (strings::startsWith(local, string(FILE_URI_PREFIX))) { local = local.substr(sizeof(FILE_URI_PREFIX) - 1); fileUri = true; } if(fileUri && !strings::startsWith(local, "/")) { return Error("File URI only supports absolute paths"); } if (local.find_first_of("/") != 0) { // We got a non-Hadoop and non-absolute path. if (os::hasenv("MESOS_FRAMEWORKS_HOME")) { local = path::join(os::getenv("MESOS_FRAMEWORKS_HOME"), local); LOG(INFO) << "Prepended environment variable " << "MESOS_FRAMEWORKS_HOME to relative path, " << "making it: '" << local << "'"; } else { LOG(ERROR) << "A relative path was passed for the resource but the " << "environment variable MESOS_FRAMEWORKS_HOME is not set. " << "Please either specify this config option " << "or avoid using a relative path"; return Error("Could not resolve relative URI"); } } Try<string> base = os::basename(local); if (base.isError()) { LOG(ERROR) << base.error(); return Error("Fetch of URI failed"); } // Copy the resource to the directory. string path = path::join(directory, base.get()); std::ostringstream command; command << "cp '" << local << "' '" << path << "'"; LOG(INFO) << "Copying resource from '" << local << "' to '" << directory << "'"; int status = os::system(command.str()); if (status != 0) { LOG(ERROR) << "Failed to copy '" << local << "' : Exit status " << status; return Error("Local copy failed"); } return path; } }