예제 #1
0
void HIPToolChain::addClangTargetOptions(
    const llvm::opt::ArgList &DriverArgs,
    llvm::opt::ArgStringList &CC1Args,
    Action::OffloadKind DeviceOffloadingKind) const {
  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);

  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
  (void) GpuArch;
  assert(DeviceOffloadingKind == Action::OFK_HIP &&
         "Only HIP offloading kinds are supported for GPUs.");

  CC1Args.push_back("-fcuda-is-device");

  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
                         options::OPT_fno_cuda_flush_denormals_to_zero, false))
    CC1Args.push_back("-fcuda-flush-denormals-to-zero");

  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
                         options::OPT_fno_cuda_approx_transcendentals, false))
    CC1Args.push_back("-fcuda-approx-transcendentals");

  if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
                         false))
    CC1Args.push_back("-fcuda-rdc");
}
예제 #2
0
파일: Cuda.cpp 프로젝트: Bekenn/clang
void CudaToolChain::addClangTargetOptions(
    const llvm::opt::ArgList &DriverArgs,
    llvm::opt::ArgStringList &CC1Args,
    Action::OffloadKind DeviceOffloadingKind) const {
  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);

  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
          DeviceOffloadingKind == Action::OFK_Cuda) &&
         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");

  if (DeviceOffloadingKind == Action::OFK_Cuda) {
    CC1Args.push_back("-fcuda-is-device");

    if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
                           options::OPT_fno_cuda_flush_denormals_to_zero, false))
      CC1Args.push_back("-fcuda-flush-denormals-to-zero");

    if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
                           options::OPT_fno_cuda_approx_transcendentals, false))
      CC1Args.push_back("-fcuda-approx-transcendentals");
  }

  if (DriverArgs.hasArg(options::OPT_nocudalib))
    return;

  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);

  if (LibDeviceFile.empty()) {
    if (DeviceOffloadingKind == Action::OFK_OpenMP &&
        DriverArgs.hasArg(options::OPT_S))
      return;

    getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
    return;
  }

  CC1Args.push_back("-mlink-cuda-bitcode");
  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));

  if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
    // CUDA-9 uses new instructions that are only available in PTX6.0
    CC1Args.push_back("-target-feature");
    CC1Args.push_back("+ptx60");
  } else {
    // Libdevice in CUDA-7.0 requires PTX version that's more recent
    // than LLVM defaults to. Use PTX4.2 which is the PTX version that
    // came with CUDA-7.0.
    CC1Args.push_back("-target-feature");
    CC1Args.push_back("+ptx42");
  }
}
예제 #3
0
파일: Cuda.cpp 프로젝트: Bekenn/clang
CudaInstallationDetector::CudaInstallationDetector(
    const Driver &D, const llvm::Triple &HostTriple,
    const llvm::opt::ArgList &Args)
    : D(D) {
  struct Candidate {
    std::string Path;
    bool StrictChecking;

    Candidate(std::string Path, bool StrictChecking = false)
        : Path(Path), StrictChecking(StrictChecking) {}
  };
  SmallVector<Candidate, 4> Candidates;

  // In decreasing order so we prefer newer versions to older versions.
  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};

  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
    Candidates.emplace_back(
        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
  } else if (HostTriple.isOSWindows()) {
    for (const char *Ver : Versions)
      Candidates.emplace_back(
          D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
          Ver);
  } else {
    if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
      // Try to find ptxas binary. If the executable is located in a directory
      // called 'bin/', its parent directory might be a good guess for a valid
      // CUDA installation.
      // However, some distributions might installs 'ptxas' to /usr/bin. In that
      // case the candidate would be '/usr' which passes the following checks
      // because '/usr/include' exists as well. To avoid this case, we always
      // check for the directory potentially containing files for libdevice,
      // even if the user passes -nocudalib.
      if (llvm::ErrorOr<std::string> ptxas =
              llvm::sys::findProgramByName("ptxas")) {
        SmallString<256> ptxasAbsolutePath;
        llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);

        StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
        if (llvm::sys::path::filename(ptxasDir) == "bin")
          Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
                                  /*StrictChecking=*/true);
      }
    }

    Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
    for (const char *Ver : Versions)
      Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);

    if (Distro(D.getVFS()).IsDebian())
      // Special case for Debian to have nvidia-cuda-toolkit work
      // out of the box. More info on http://bugs.debian.org/882505
      Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
  }

  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);

  for (const auto &Candidate : Candidates) {
    InstallPath = Candidate.Path;
    if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
      continue;

    BinPath = InstallPath + "/bin";
    IncludePath = InstallPath + "/include";
    LibDevicePath = InstallPath + "/nvvm/libdevice";

    auto &FS = D.getVFS();
    if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
      continue;
    bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
    if (CheckLibDevice && !FS.exists(LibDevicePath))
      continue;

    // On Linux, we have both lib and lib64 directories, and we need to choose
    // based on our triple.  On MacOS, we have only a lib directory.
    //
    // It's sufficient for our purposes to be flexible: If both lib and lib64
    // exist, we choose whichever one matches our triple.  Otherwise, if only
    // lib exists, we use it.
    if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
      LibPath = InstallPath + "/lib64";
    else if (FS.exists(InstallPath + "/lib"))
      LibPath = InstallPath + "/lib";
    else
      continue;

    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
        FS.getBufferForFile(InstallPath + "/version.txt");
    if (!VersionFile) {
      // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
      // version.txt isn't present.
      Version = CudaVersion::CUDA_70;
    } else {
      Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
    }

    if (Version >= CudaVersion::CUDA_90) {
      // CUDA-9+ uses single libdevice file for all GPU variants.
      std::string FilePath = LibDevicePath + "/libdevice.10.bc";
      if (FS.exists(FilePath)) {
        for (const char *GpuArchName :
             {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
                   "sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
          const CudaArch GpuArch = StringToCudaArch(GpuArchName);
          if (Version >= MinVersionForCudaArch(GpuArch) &&
              Version <= MaxVersionForCudaArch(GpuArch))
            LibDeviceMap[GpuArchName] = FilePath;
        }
      }
    } else {
      std::error_code EC;
      for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
           !EC && LI != LE; LI = LI.increment(EC)) {
        StringRef FilePath = LI->path();
        StringRef FileName = llvm::sys::path::filename(FilePath);
        // Process all bitcode filenames that look like
        // libdevice.compute_XX.YY.bc
        const StringRef LibDeviceName = "libdevice.";
        if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
          continue;
        StringRef GpuArch = FileName.slice(
            LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
        LibDeviceMap[GpuArch] = FilePath.str();
        // Insert map entries for specifc devices with this compute
        // capability. NVCC's choice of the libdevice library version is
        // rather peculiar and depends on the CUDA version.
        if (GpuArch == "compute_20") {
          LibDeviceMap["sm_20"] = FilePath;
          LibDeviceMap["sm_21"] = FilePath;
          LibDeviceMap["sm_32"] = FilePath;
        } else if (GpuArch == "compute_30") {
          LibDeviceMap["sm_30"] = FilePath;
          if (Version < CudaVersion::CUDA_80) {
            LibDeviceMap["sm_50"] = FilePath;
            LibDeviceMap["sm_52"] = FilePath;
            LibDeviceMap["sm_53"] = FilePath;
          }
          LibDeviceMap["sm_60"] = FilePath;
          LibDeviceMap["sm_61"] = FilePath;
          LibDeviceMap["sm_62"] = FilePath;
        } else if (GpuArch == "compute_35") {
          LibDeviceMap["sm_35"] = FilePath;
          LibDeviceMap["sm_37"] = FilePath;
        } else if (GpuArch == "compute_50") {
          if (Version >= CudaVersion::CUDA_80) {
            LibDeviceMap["sm_50"] = FilePath;
            LibDeviceMap["sm_52"] = FilePath;
            LibDeviceMap["sm_53"] = FilePath;
          }
        }
      }
    }

    // Check that we have found at least one libdevice that we can link in if
    // -nocudalib hasn't been specified.
    if (LibDeviceMap.empty() && !NoCudaLib)
      continue;

    IsValid = true;
    break;
  }
}
예제 #4
0
CudaInstallationDetector::CudaInstallationDetector(
    const Driver &D, const llvm::Triple &HostTriple,
    const llvm::opt::ArgList &Args)
    : D(D) {
  SmallVector<std::string, 4> CudaPathCandidates;

  // In decreasing order so we prefer newer versions to older versions.
  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};

  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
    CudaPathCandidates.push_back(
        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
  } else if (HostTriple.isOSWindows()) {
    for (const char *Ver : Versions)
      CudaPathCandidates.push_back(
          D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
          Ver);
  } else {
    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
    for (const char *Ver : Versions)
      CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
  }

  for (const auto &CudaPath : CudaPathCandidates) {
    if (CudaPath.empty() || !D.getVFS().exists(CudaPath))
      continue;

    InstallPath = CudaPath;
    BinPath = CudaPath + "/bin";
    IncludePath = InstallPath + "/include";
    LibDevicePath = InstallPath + "/nvvm/libdevice";

    auto &FS = D.getVFS();
    if (!(FS.exists(IncludePath) && FS.exists(BinPath) &&
          FS.exists(LibDevicePath)))
      continue;

    // On Linux, we have both lib and lib64 directories, and we need to choose
    // based on our triple.  On MacOS, we have only a lib directory.
    //
    // It's sufficient for our purposes to be flexible: If both lib and lib64
    // exist, we choose whichever one matches our triple.  Otherwise, if only
    // lib exists, we use it.
    if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
      LibPath = InstallPath + "/lib64";
    else if (FS.exists(InstallPath + "/lib"))
      LibPath = InstallPath + "/lib";
    else
      continue;

    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
        FS.getBufferForFile(InstallPath + "/version.txt");
    if (!VersionFile) {
      // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
      // version.txt isn't present.
      Version = CudaVersion::CUDA_70;
    } else {
      Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
    }

    std::error_code EC;
    for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
         !EC && LI != LE; LI = LI.increment(EC)) {
      StringRef FilePath = LI->path();
      StringRef FileName = llvm::sys::path::filename(FilePath);
      // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
      const StringRef LibDeviceName = "libdevice.";
      if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
        continue;
      StringRef GpuArch = FileName.slice(
          LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
      LibDeviceMap[GpuArch] = FilePath.str();
      // Insert map entries for specifc devices with this compute
      // capability. NVCC's choice of the libdevice library version is
      // rather peculiar and depends on the CUDA version.
      if (GpuArch == "compute_20") {
        LibDeviceMap["sm_20"] = FilePath;
        LibDeviceMap["sm_21"] = FilePath;
        LibDeviceMap["sm_32"] = FilePath;
      } else if (GpuArch == "compute_30") {
        LibDeviceMap["sm_30"] = FilePath;
        if (Version < CudaVersion::CUDA_80) {
          LibDeviceMap["sm_50"] = FilePath;
          LibDeviceMap["sm_52"] = FilePath;
          LibDeviceMap["sm_53"] = FilePath;
        }
        LibDeviceMap["sm_60"] = FilePath;
        LibDeviceMap["sm_61"] = FilePath;
        LibDeviceMap["sm_62"] = FilePath;
      } else if (GpuArch == "compute_35") {
        LibDeviceMap["sm_35"] = FilePath;
        LibDeviceMap["sm_37"] = FilePath;
      } else if (GpuArch == "compute_50") {
        if (Version >= CudaVersion::CUDA_80) {
          LibDeviceMap["sm_50"] = FilePath;
          LibDeviceMap["sm_52"] = FilePath;
          LibDeviceMap["sm_53"] = FilePath;
        }
      }
    }

    IsValid = true;
    break;
  }
}
예제 #5
0
void CudaToolChain::addClangTargetOptions(
    const llvm::opt::ArgList &DriverArgs,
    llvm::opt::ArgStringList &CC1Args,
    Action::OffloadKind DeviceOffloadingKind) const {
  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);

  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
          DeviceOffloadingKind == Action::OFK_Cuda) &&
         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");

  if (DeviceOffloadingKind == Action::OFK_Cuda) {
    CC1Args.push_back("-fcuda-is-device");

    if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
                           options::OPT_fno_cuda_flush_denormals_to_zero, false))
      CC1Args.push_back("-fcuda-flush-denormals-to-zero");

    if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
                           options::OPT_fno_cuda_approx_transcendentals, false))
      CC1Args.push_back("-fcuda-approx-transcendentals");

    if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
                           false))
      CC1Args.push_back("-fcuda-rdc");
  }

  if (DriverArgs.hasArg(options::OPT_nocudalib))
    return;

  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);

  if (LibDeviceFile.empty()) {
    if (DeviceOffloadingKind == Action::OFK_OpenMP &&
        DriverArgs.hasArg(options::OPT_S))
      return;

    getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
    return;
  }

  CC1Args.push_back("-mlink-cuda-bitcode");
  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));

  // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
  // defaults to. Use PTX4.2 by default, which is the PTX version that came with
  // CUDA-7.0.
  const char *PtxFeature = "+ptx42";
  if (CudaInstallation.version() >= CudaVersion::CUDA_91) {
    // CUDA-9.1 uses new instructions that are only available in PTX6.1+
    PtxFeature = "+ptx61";
  } else if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
    // CUDA-9.0 uses new instructions that are only available in PTX6.0+
    PtxFeature = "+ptx60";
  }
  CC1Args.append({"-target-feature", PtxFeature});
  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
                         options::OPT_fno_cuda_short_ptr, false))
    CC1Args.append({"-mllvm", "--nvptx-short-ptr"});

  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
    SmallVector<StringRef, 8> LibraryPaths;
    // Add path to lib and/or lib64 folders.
    SmallString<256> DefaultLibPath =
      llvm::sys::path::parent_path(getDriver().Dir);
    llvm::sys::path::append(DefaultLibPath,
        Twine("lib") + CLANG_LIBDIR_SUFFIX);
    LibraryPaths.emplace_back(DefaultLibPath.c_str());

    // Add user defined library paths from LIBRARY_PATH.
    llvm::Optional<std::string> LibPath =
        llvm::sys::Process::GetEnv("LIBRARY_PATH");
    if (LibPath) {
      SmallVector<StringRef, 8> Frags;
      const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
      llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
      for (StringRef Path : Frags)
        LibraryPaths.emplace_back(Path.trim());
    }

    std::string LibOmpTargetName =
      "libomptarget-nvptx-" + GpuArch.str() + ".bc";
    bool FoundBCLibrary = false;
    for (StringRef LibraryPath : LibraryPaths) {
      SmallString<128> LibOmpTargetFile(LibraryPath);
      llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
      if (llvm::sys::fs::exists(LibOmpTargetFile)) {
        CC1Args.push_back("-mlink-cuda-bitcode");
        CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
        FoundBCLibrary = true;
        break;
      }
    }
    if (!FoundBCLibrary)
      getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
          << LibOmpTargetName;
  }
}