Exemplo n.º 1
0
// Attempt to get the uri using the hadoop client.
Try<string> fetchWithHadoopClient(
    const string& uri,
    const string& directory)
{
  HDFS hdfs;
  Try<bool> available = hdfs.available();

  if (available.isError() || !available.get()) {
    LOG(INFO) << "Hadoop Client not available, "
              << "skipping fetch with Hadoop Client";
    return Error("Hadoop Client unavailable");
  }

  LOG(INFO) << "Fetching URI '" << uri << "' using Hadoop Client";

  Try<string> base = os::basename(uri);
  if (base.isError()) {
    LOG(ERROR) << "Invalid basename for URI: " << base.error();
    return Error("Invalid basename for URI");
  }

  string path = path::join(directory, base.get());

  LOG(INFO) << "Downloading resource from '" << uri  << "' to '" << path << "'";

  Try<Nothing> result = hdfs.copyToLocal(uri, path);
  if (result.isError()) {
    LOG(ERROR) << "HDFS copyToLocal failed: " << result.error();
    return Error(result.error());
  }

  return path;
}
Exemplo n.º 2
0
// Find out how large a potential download from the given URI is.
static Try<Bytes> fetchSize(
    const string& uri,
    const Option<string>& frameworksHome)
{
  VLOG(1) << "Fetching size for URI: " << uri;

  Result<string> path = Fetcher::uriToLocalPath(uri, frameworksHome);
  if (path.isError()) {
    return Error(path.error());
  }
  if (path.isSome()) {
    Try<Bytes> size = os::stat::size(path.get(), os::stat::FOLLOW_SYMLINK);
    if (size.isError()) {
      return Error("Could not determine file size for: '" + path.get() +
                     "', error: " + size.error());
    }
    return size.get();
  }

  if (Fetcher::isNetUri(uri)) {
    Try<Bytes> size = net::contentLength(uri);
    if (size.isError()) {
      return Error(size.error());
    }
    if (size.get() == 0) {
      return Error("URI reported content-length 0: " + uri);
    }

    return size.get();
  }

  HDFS hdfs;

  Try<bool> available = hdfs.available();
  if (available.isError() || !available.get()) {
    return Error("Hadoop client not available: " + available.error());
  }

  Try<Bytes> size = hdfs.du(uri);
  if (size.isError()) {
    return Error("Hadoop client could not determine size: " + size.error());
  }

  return size.get();
}
Exemplo n.º 3
0
// Attempt to get the uri using the hadoop client.
static Try<string> downloadWithHadoopClient(
    const string& sourceUri,
    const string& destinationPath)
{
  HDFS hdfs;
  Try<bool> available = hdfs.available();

  if (available.isError() || !available.get()) {
    return Error("Skipping fetch with Hadoop Client as"
                 " Hadoop Client not available: " + available.error());
  }

  LOG(INFO) << "Downloading resource with Hadoop client from '" << sourceUri
            << "' to '" << destinationPath << "'";

  Try<Nothing> result = hdfs.copyToLocal(sourceUri, destinationPath);
  if (result.isError()) {
    return Error("HDFS copyToLocal failed: " + result.error());
  }

  return destinationPath;
}
Exemplo n.º 4
0
// Fetch URI into directory.
Try<string> fetch(
    const string& uri,
    const string& directory)
{
  LOG(INFO) << "Fetching URI '" << uri << "'";

  // Some checks to make sure using the URI value in shell commands
  // is safe. TODO(benh): These should be pushed into the scheduler
  // driver and reported to the user.
  if (uri.find_first_of('\\') != string::npos ||
      uri.find_first_of('\'') != string::npos ||
      uri.find_first_of('\0') != string::npos) {
    LOG(ERROR) << "URI contains illegal characters, refusing to fetch";
    return Error("Illegal characters in URI");
  }

  // Grab the resource using the hadoop client if it's one of the known schemes
  // TODO(tarnfeld): This isn't very scalable with hadoop's pluggable
  // filesystem implementations.
  // TODO(matei): Enforce some size limits on files we get from HDFS
  if (strings::startsWith(uri, "hdfs://") ||
      strings::startsWith(uri, "hftp://") ||
      strings::startsWith(uri, "s3://") ||
      strings::startsWith(uri, "s3n://")) {
    Try<string> base = os::basename(uri);
    if (base.isError()) {
      LOG(ERROR) << "Invalid basename for URI: " << base.error();
      return Error("Invalid basename for URI");
    }
    string path = path::join(directory, base.get());

    HDFS hdfs;

    LOG(INFO) << "Downloading resource from '" << uri
              << "' to '" << path << "'";
    Try<Nothing> result = hdfs.copyToLocal(uri, path);
    if (result.isError()) {
      LOG(ERROR) << "HDFS copyToLocal failed: " << result.error();
      return Error(result.error());
    }

    return path;
  } else if (strings::startsWith(uri, "http://") ||
             strings::startsWith(uri, "https://") ||
             strings::startsWith(uri, "ftp://") ||
             strings::startsWith(uri, "ftps://")) {
    string path = uri.substr(uri.find("://") + 3);
    if (path.find("/") == string::npos ||
        path.size() <= path.find("/") + 1) {
      LOG(ERROR) << "Malformed URL (missing path)";
      return Error("Malformed URI");
    }

    path =  path::join(directory, path.substr(path.find_last_of("/") + 1));
    LOG(INFO) << "Downloading '" << uri << "' to '" << path << "'";
    Try<int> code = net::download(uri, path);
    if (code.isError()) {
      LOG(ERROR) << "Error downloading resource: " << code.error().c_str();
      return Error("Fetch of URI failed (" + code.error() + ")");
    } else if (code.get() != 200) {
      LOG(ERROR) << "Error downloading resource, received HTTP/FTP return code "
                 << code.get();
      return Error("HTTP/FTP error (" + stringify(code.get()) + ")");
    }

    return path;
  } else { // Copy the local resource.
    string local = uri;
    bool fileUri = false;
    if (strings::startsWith(local, string(FILE_URI_LOCALHOST))) {
      local = local.substr(sizeof(FILE_URI_LOCALHOST) - 1);
      fileUri = true;
    } else if (strings::startsWith(local, string(FILE_URI_PREFIX))) {
      local = local.substr(sizeof(FILE_URI_PREFIX) - 1);
      fileUri = true;
    }

    if(fileUri && !strings::startsWith(local, "/")) {
      return Error("File URI only supports absolute paths");
    }

    if (local.find_first_of("/") != 0) {
      // We got a non-Hadoop and non-absolute path.
      if (os::hasenv("MESOS_FRAMEWORKS_HOME")) {
        local = path::join(os::getenv("MESOS_FRAMEWORKS_HOME"), local);
        LOG(INFO) << "Prepended environment variable "
                  << "MESOS_FRAMEWORKS_HOME to relative path, "
                  << "making it: '" << local << "'";
      } else {
        LOG(ERROR) << "A relative path was passed for the resource but the "
                   << "environment variable MESOS_FRAMEWORKS_HOME is not set. "
                   << "Please either specify this config option "
                   << "or avoid using a relative path";
        return Error("Could not resolve relative URI");
      }
    }

    Try<string> base = os::basename(local);
    if (base.isError()) {
      LOG(ERROR) << base.error();
      return Error("Fetch of URI failed");
    }

    // Copy the resource to the directory.
    string path = path::join(directory, base.get());
    std::ostringstream command;
    command << "cp '" << local << "' '" << path << "'";
    LOG(INFO) << "Copying resource from '" << local
              << "' to '" << directory << "'";

    int status = os::system(command.str());
    if (status != 0) {
      LOG(ERROR) << "Failed to copy '" << local
                 << "' : Exit status " << status;
      return Error("Local copy failed");
    }

    return path;
  }
}