Ejemplo n.º 1
0
Future<Nothing> HadoopFetcherPlugin::fetch(
    const URI& uri,
    const string& directory)
{
  // TODO(jieyu): Validate the given URI.

  if (!uri.has_path()) {
    return Failure("URI path is not specified");
  }

  Try<Nothing> mkdir = os::mkdir(directory);
  if (mkdir.isError()) {
    return Failure(
        "Failed to create directory '" +
        directory + "': " + mkdir.error());
  }

  // NOTE: We ignore the scheme prefix if the host in URI is not
  // specified. This is the case when the host is set using the hadoop
  // configuration file.
  //
  // TODO(jieyu): Allow user to specify the name of the output file.
  return hdfs.get()->copyToLocal(
      (uri.has_host() ? stringify(uri) : uri.path()),
      path::join(directory, Path(uri.path()).basename()));
}
Ejemplo n.º 2
0
Future<Nothing> CurlFetcherPlugin::fetch(
    const URI& uri,
    const string& directory)
{
  // TODO(jieyu): Validate the given URI.

  if (!uri.has_path()) {
    return Failure("URI path is not specified");
  }

  if (!os::exists(directory)) {
    Try<Nothing> mkdir = os::mkdir(directory);
    if (mkdir.isError()) {
      return Failure(
          "Failed to create directory '" +
          directory + "': " + mkdir.error());
    }
  }

  // TODO(jieyu): Allow user to specify the name of the output file.
  const string output = path::join(directory, Path(uri.path()).basename());

  const vector<string> argv = {
    "curl",
    "-s",                 // Don’t show progress meter or error messages.
    "-S",                 // Makes curl show an error message if it fails.
    "-L",                 // Follow HTTP 3xx redirects.
    "-w", "%{http_code}", // Display HTTP response code on stdout.
    "-o", output,         // Write output to the file.
    strings::trim(stringify(uri))
  };

  Try<Subprocess> s = subprocess(
      "curl",
      argv,
      Subprocess::PATH("/dev/null"),
      Subprocess::PIPE(),
      Subprocess::PIPE());

  if (s.isError()) {
    return Failure("Failed to exec the curl subprocess: " + s.error());
  }

  return await(
      s.get().status(),
      io::read(s.get().out().get()),
      io::read(s.get().err().get()))
    .then(_fetch);
}
Ejemplo n.º 3
0
Future<Nothing> CopyFetcherPlugin::fetch(
    const URI& uri,
    const string& directory) const
{
  // TODO(jojy): Validate the given URI.

  if (!uri.has_path()) {
    return Failure("URI path is not specified");
  }

  // TODO(jojy): Verify that the path is a file.

  Try<Nothing> mkdir = os::mkdir(directory);
  if (mkdir.isError()) {
    return Failure(
        "Failed to create directory '" +
        directory + "': " + mkdir.error());
  }

  VLOG(1) << "Copying '" << uri.path() << "' to '" << directory << "'";

  const vector<string> argv = {"cp", "-a", uri.path(), directory};

  Try<Subprocess> s = subprocess(
      "cp",
      argv,
      Subprocess::PATH(os::DEV_NULL),
      Subprocess::PIPE(),
      Subprocess::PIPE());

  if (s.isError()) {
    return Failure("Failed to exec the copy subprocess: " + s.error());
  }

  return await(
      s.get().status(),
      io::read(s.get().out().get()),
      io::read(s.get().err().get()))
    .then([](const tuple<
        Future<Option<int>>,
        Future<string>,
        Future<string>>& t) -> Future<Nothing> {
      Future<Option<int>> status = std::get<0>(t);
      if (!status.isReady()) {
        return Failure(
            "Failed to get the exit status of the copy subprocess: " +
            (status.isFailed() ? status.failure() : "discarded"));
      }

      if (status->isNone()) {
        return Failure("Failed to reap the copy subprocess");
      }

      if (status->get() != 0) {
        Future<string> error = std::get<2>(t);
        if (!error.isReady()) {
          return Failure(
              "Failed to perform 'copy'. Reading stderr failed: " +
              (error.isFailed() ? error.failure() : "discarded"));
        }

        return Failure("Failed to perform 'copy': " + error.get());
      }

      return Nothing();
    });
}
Ejemplo n.º 4
0
Future<Nothing> CurlFetcherPlugin::fetch(
    const URI& uri,
    const string& directory)
{
  // TODO(jieyu): Validate the given URI.

  if (!uri.has_path()) {
    return Failure("URI path is not specified");
  }

  Try<Nothing> mkdir = os::mkdir(directory);
  if (mkdir.isError()) {
    return Failure(
        "Failed to create directory '" +
        directory + "': " + mkdir.error());
  }

  // TODO(jieyu): Allow user to specify the name of the output file.
  const string output = path::join(directory, Path(uri.path()).basename());

  const vector<string> argv = {
    "curl",
    "-s",                 // Don’t show progress meter or error messages.
    "-S",                 // Makes curl show an error message if it fails.
    "-L",                 // Follow HTTP 3xx redirects.
    "-w", "%{http_code}", // Display HTTP response code on stdout.
    "-o", output,         // Write output to the file.
    strings::trim(stringify(uri))
  };

  Try<Subprocess> s = subprocess(
      "curl",
      argv,
      Subprocess::PATH("/dev/null"),
      Subprocess::PIPE(),
      Subprocess::PIPE());

  if (s.isError()) {
    return Failure("Failed to exec the curl subprocess: " + s.error());
  }

  return await(
      s.get().status(),
      io::read(s.get().out().get()),
      io::read(s.get().err().get()))
    .then([](const tuple<
        Future<Option<int>>,
        Future<string>,
        Future<string>>& t) -> Future<Nothing> {
      Future<Option<int>> status = std::get<0>(t);
      if (!status.isReady()) {
        return Failure(
            "Failed to get the exit status of the curl subprocess: " +
            (status.isFailed() ? status.failure() : "discarded"));
      }

      if (status->isNone()) {
        return Failure("Failed to reap the curl subprocess");
      }

      if (status->get() != 0) {
        Future<string> error = std::get<2>(t);
        if (!error.isReady()) {
          return Failure(
              "Failed to perform 'curl'. Reading stderr failed: " +
              (error.isFailed() ? error.failure() : "discarded"));
        }

        return Failure("Failed to perform 'curl': " + error.get());
      }

      Future<string> output = std::get<1>(t);
      if (!output.isReady()) {
        return Failure(
            "Failed to read stdout from 'curl': " +
            (output.isFailed() ? output.failure() : "discarded"));
      }

      // Parse the output and get the HTTP response code.
      Try<int> code = numify<int>(output.get());
      if (code.isError()) {
        return Failure("Unexpected output from 'curl': " + output.get());
      }

      if (code.get() != http::Status::OK) {
        return Failure(
            "Unexpected HTTP response code: " +
            http::Status::string(code.get()));
      }

      return Nothing();
    });
}