Future<Nothing> HadoopFetcherPlugin::fetch( const URI& uri, const string& directory) { // TODO(jieyu): Validate the given URI. if (!uri.has_path()) { return Failure("URI path is not specified"); } Try<Nothing> mkdir = os::mkdir(directory); if (mkdir.isError()) { return Failure( "Failed to create directory '" + directory + "': " + mkdir.error()); } // NOTE: We ignore the scheme prefix if the host in URI is not // specified. This is the case when the host is set using the hadoop // configuration file. // // TODO(jieyu): Allow user to specify the name of the output file. return hdfs.get()->copyToLocal( (uri.has_host() ? stringify(uri) : uri.path()), path::join(directory, Path(uri.path()).basename())); }
Future<Nothing> CurlFetcherPlugin::fetch( const URI& uri, const string& directory) { // TODO(jieyu): Validate the given URI. if (!uri.has_path()) { return Failure("URI path is not specified"); } if (!os::exists(directory)) { Try<Nothing> mkdir = os::mkdir(directory); if (mkdir.isError()) { return Failure( "Failed to create directory '" + directory + "': " + mkdir.error()); } } // TODO(jieyu): Allow user to specify the name of the output file. const string output = path::join(directory, Path(uri.path()).basename()); const vector<string> argv = { "curl", "-s", // Don’t show progress meter or error messages. "-S", // Makes curl show an error message if it fails. "-L", // Follow HTTP 3xx redirects. "-w", "%{http_code}", // Display HTTP response code on stdout. "-o", output, // Write output to the file. strings::trim(stringify(uri)) }; Try<Subprocess> s = subprocess( "curl", argv, Subprocess::PATH("/dev/null"), Subprocess::PIPE(), Subprocess::PIPE()); if (s.isError()) { return Failure("Failed to exec the curl subprocess: " + s.error()); } return await( s.get().status(), io::read(s.get().out().get()), io::read(s.get().err().get())) .then(_fetch); }
Future<Nothing> CopyFetcherPlugin::fetch( const URI& uri, const string& directory) const { // TODO(jojy): Validate the given URI. if (!uri.has_path()) { return Failure("URI path is not specified"); } // TODO(jojy): Verify that the path is a file. Try<Nothing> mkdir = os::mkdir(directory); if (mkdir.isError()) { return Failure( "Failed to create directory '" + directory + "': " + mkdir.error()); } VLOG(1) << "Copying '" << uri.path() << "' to '" << directory << "'"; const vector<string> argv = {"cp", "-a", uri.path(), directory}; Try<Subprocess> s = subprocess( "cp", argv, Subprocess::PATH(os::DEV_NULL), Subprocess::PIPE(), Subprocess::PIPE()); if (s.isError()) { return Failure("Failed to exec the copy subprocess: " + s.error()); } return await( s.get().status(), io::read(s.get().out().get()), io::read(s.get().err().get())) .then([](const tuple< Future<Option<int>>, Future<string>, Future<string>>& t) -> Future<Nothing> { Future<Option<int>> status = std::get<0>(t); if (!status.isReady()) { return Failure( "Failed to get the exit status of the copy subprocess: " + (status.isFailed() ? status.failure() : "discarded")); } if (status->isNone()) { return Failure("Failed to reap the copy subprocess"); } if (status->get() != 0) { Future<string> error = std::get<2>(t); if (!error.isReady()) { return Failure( "Failed to perform 'copy'. Reading stderr failed: " + (error.isFailed() ? error.failure() : "discarded")); } return Failure("Failed to perform 'copy': " + error.get()); } return Nothing(); }); }
Future<Nothing> CurlFetcherPlugin::fetch( const URI& uri, const string& directory) { // TODO(jieyu): Validate the given URI. if (!uri.has_path()) { return Failure("URI path is not specified"); } Try<Nothing> mkdir = os::mkdir(directory); if (mkdir.isError()) { return Failure( "Failed to create directory '" + directory + "': " + mkdir.error()); } // TODO(jieyu): Allow user to specify the name of the output file. const string output = path::join(directory, Path(uri.path()).basename()); const vector<string> argv = { "curl", "-s", // Don’t show progress meter or error messages. "-S", // Makes curl show an error message if it fails. "-L", // Follow HTTP 3xx redirects. "-w", "%{http_code}", // Display HTTP response code on stdout. "-o", output, // Write output to the file. strings::trim(stringify(uri)) }; Try<Subprocess> s = subprocess( "curl", argv, Subprocess::PATH("/dev/null"), Subprocess::PIPE(), Subprocess::PIPE()); if (s.isError()) { return Failure("Failed to exec the curl subprocess: " + s.error()); } return await( s.get().status(), io::read(s.get().out().get()), io::read(s.get().err().get())) .then([](const tuple< Future<Option<int>>, Future<string>, Future<string>>& t) -> Future<Nothing> { Future<Option<int>> status = std::get<0>(t); if (!status.isReady()) { return Failure( "Failed to get the exit status of the curl subprocess: " + (status.isFailed() ? status.failure() : "discarded")); } if (status->isNone()) { return Failure("Failed to reap the curl subprocess"); } if (status->get() != 0) { Future<string> error = std::get<2>(t); if (!error.isReady()) { return Failure( "Failed to perform 'curl'. Reading stderr failed: " + (error.isFailed() ? error.failure() : "discarded")); } return Failure("Failed to perform 'curl': " + error.get()); } Future<string> output = std::get<1>(t); if (!output.isReady()) { return Failure( "Failed to read stdout from 'curl': " + (output.isFailed() ? output.failure() : "discarded")); } // Parse the output and get the HTTP response code. Try<int> code = numify<int>(output.get()); if (code.isError()) { return Failure("Unexpected output from 'curl': " + output.get()); } if (code.get() != http::Status::OK) { return Failure( "Unexpected HTTP response code: " + http::Status::string(code.get())); } return Nothing(); }); }