// Attempt to get the uri using the hadoop client. Try<string> fetchWithHadoopClient( const string& uri, const string& directory) { HDFS hdfs; Try<bool> available = hdfs.available(); if (available.isError() || !available.get()) { LOG(INFO) << "Hadoop Client not available, " << "skipping fetch with Hadoop Client"; return Error("Hadoop Client unavailable"); } LOG(INFO) << "Fetching URI '" << uri << "' using Hadoop Client"; Try<string> base = os::basename(uri); if (base.isError()) { LOG(ERROR) << "Invalid basename for URI: " << base.error(); return Error("Invalid basename for URI"); } string path = path::join(directory, base.get()); LOG(INFO) << "Downloading resource from '" << uri << "' to '" << path << "'"; Try<Nothing> result = hdfs.copyToLocal(uri, path); if (result.isError()) { LOG(ERROR) << "HDFS copyToLocal failed: " << result.error(); return Error(result.error()); } return path; }
// Find out how large a potential download from the given URI is. static Try<Bytes> fetchSize( const string& uri, const Option<string>& frameworksHome) { VLOG(1) << "Fetching size for URI: " << uri; Result<string> path = Fetcher::uriToLocalPath(uri, frameworksHome); if (path.isError()) { return Error(path.error()); } if (path.isSome()) { Try<Bytes> size = os::stat::size(path.get(), os::stat::FOLLOW_SYMLINK); if (size.isError()) { return Error("Could not determine file size for: '" + path.get() + "', error: " + size.error()); } return size.get(); } if (Fetcher::isNetUri(uri)) { Try<Bytes> size = net::contentLength(uri); if (size.isError()) { return Error(size.error()); } if (size.get() == 0) { return Error("URI reported content-length 0: " + uri); } return size.get(); } HDFS hdfs; Try<bool> available = hdfs.available(); if (available.isError() || !available.get()) { return Error("Hadoop client not available: " + available.error()); } Try<Bytes> size = hdfs.du(uri); if (size.isError()) { return Error("Hadoop client could not determine size: " + size.error()); } return size.get(); }
// Attempt to get the uri using the hadoop client. static Try<string> downloadWithHadoopClient( const string& sourceUri, const string& destinationPath) { HDFS hdfs; Try<bool> available = hdfs.available(); if (available.isError() || !available.get()) { return Error("Skipping fetch with Hadoop Client as" " Hadoop Client not available: " + available.error()); } LOG(INFO) << "Downloading resource with Hadoop client from '" << sourceUri << "' to '" << destinationPath << "'"; Try<Nothing> result = hdfs.copyToLocal(sourceUri, destinationPath); if (result.isError()) { return Error("HDFS copyToLocal failed: " + result.error()); } return destinationPath; }