void HIPToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && "Only HIP offloading kinds are supported for GPUs."); CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, false)) CC1Args.push_back("-fcuda-flush-denormals-to-zero"); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, options::OPT_fno_cuda_approx_transcendentals, false)) CC1Args.push_back("-fcuda-approx-transcendentals"); if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, false)) CC1Args.push_back("-fcuda-rdc"); }
void CudaToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); assert((DeviceOffloadingKind == Action::OFK_OpenMP || DeviceOffloadingKind == Action::OFK_Cuda) && "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); if (DeviceOffloadingKind == Action::OFK_Cuda) { CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, false)) CC1Args.push_back("-fcuda-flush-denormals-to-zero"); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, options::OPT_fno_cuda_approx_transcendentals, false)) CC1Args.push_back("-fcuda-approx-transcendentals"); } if (DriverArgs.hasArg(options::OPT_nocudalib)) return; std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch); if (LibDeviceFile.empty()) { if (DeviceOffloadingKind == Action::OFK_OpenMP && DriverArgs.hasArg(options::OPT_S)) return; getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch; return; } CC1Args.push_back("-mlink-cuda-bitcode"); CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); if (CudaInstallation.version() >= CudaVersion::CUDA_90) { // CUDA-9 uses new instructions that are only available in PTX6.0 CC1Args.push_back("-target-feature"); CC1Args.push_back("+ptx60"); } else { // Libdevice in CUDA-7.0 requires PTX version that's more recent // than LLVM defaults to. Use PTX4.2 which is the PTX version that // came with CUDA-7.0. CC1Args.push_back("-target-feature"); CC1Args.push_back("+ptx42"); } }
CudaInstallationDetector::CudaInstallationDetector( const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args) : D(D) { struct Candidate { std::string Path; bool StrictChecking; Candidate(std::string Path, bool StrictChecking = false) : Path(Path), StrictChecking(StrictChecking) {} }; SmallVector<Candidate, 4> Candidates; // In decreasing order so we prefer newer versions to older versions. std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"}; if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) { Candidates.emplace_back( Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str()); } else if (HostTriple.isOSWindows()) { for (const char *Ver : Versions) Candidates.emplace_back( D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" + Ver); } else { if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) { // Try to find ptxas binary. If the executable is located in a directory // called 'bin/', its parent directory might be a good guess for a valid // CUDA installation. // However, some distributions might installs 'ptxas' to /usr/bin. In that // case the candidate would be '/usr' which passes the following checks // because '/usr/include' exists as well. To avoid this case, we always // check for the directory potentially containing files for libdevice, // even if the user passes -nocudalib. if (llvm::ErrorOr<std::string> ptxas = llvm::sys::findProgramByName("ptxas")) { SmallString<256> ptxasAbsolutePath; llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath); StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath); if (llvm::sys::path::filename(ptxasDir) == "bin") Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir), /*StrictChecking=*/true); } } Candidates.emplace_back(D.SysRoot + "/usr/local/cuda"); for (const char *Ver : Versions) Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver); if (Distro(D.getVFS()).IsDebian()) // Special case for Debian to have nvidia-cuda-toolkit work // out of the box. More info on http://bugs.debian.org/882505 Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda"); } bool NoCudaLib = Args.hasArg(options::OPT_nocudalib); for (const auto &Candidate : Candidates) { InstallPath = Candidate.Path; if (InstallPath.empty() || !D.getVFS().exists(InstallPath)) continue; BinPath = InstallPath + "/bin"; IncludePath = InstallPath + "/include"; LibDevicePath = InstallPath + "/nvvm/libdevice"; auto &FS = D.getVFS(); if (!(FS.exists(IncludePath) && FS.exists(BinPath))) continue; bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking); if (CheckLibDevice && !FS.exists(LibDevicePath)) continue; // On Linux, we have both lib and lib64 directories, and we need to choose // based on our triple. On MacOS, we have only a lib directory. // // It's sufficient for our purposes to be flexible: If both lib and lib64 // exist, we choose whichever one matches our triple. Otherwise, if only // lib exists, we use it. if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64")) LibPath = InstallPath + "/lib64"; else if (FS.exists(InstallPath + "/lib")) LibPath = InstallPath + "/lib"; else continue; llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = FS.getBufferForFile(InstallPath + "/version.txt"); if (!VersionFile) { // CUDA 7.0 doesn't have a version.txt, so guess that's our version if // version.txt isn't present. Version = CudaVersion::CUDA_70; } else { Version = ParseCudaVersionFile((*VersionFile)->getBuffer()); } if (Version >= CudaVersion::CUDA_90) { // CUDA-9+ uses single libdevice file for all GPU variants. std::string FilePath = LibDevicePath + "/libdevice.10.bc"; if (FS.exists(FilePath)) { for (const char *GpuArchName : {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53", "sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) { const CudaArch GpuArch = StringToCudaArch(GpuArchName); if (Version >= MinVersionForCudaArch(GpuArch) && Version <= MaxVersionForCudaArch(GpuArch)) LibDeviceMap[GpuArchName] = FilePath; } } } else { std::error_code EC; for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE; !EC && LI != LE; LI = LI.increment(EC)) { StringRef FilePath = LI->path(); StringRef FileName = llvm::sys::path::filename(FilePath); // Process all bitcode filenames that look like // libdevice.compute_XX.YY.bc const StringRef LibDeviceName = "libdevice."; if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc"))) continue; StringRef GpuArch = FileName.slice( LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); LibDeviceMap[GpuArch] = FilePath.str(); // Insert map entries for specifc devices with this compute // capability. NVCC's choice of the libdevice library version is // rather peculiar and depends on the CUDA version. if (GpuArch == "compute_20") { LibDeviceMap["sm_20"] = FilePath; LibDeviceMap["sm_21"] = FilePath; LibDeviceMap["sm_32"] = FilePath; } else if (GpuArch == "compute_30") { LibDeviceMap["sm_30"] = FilePath; if (Version < CudaVersion::CUDA_80) { LibDeviceMap["sm_50"] = FilePath; LibDeviceMap["sm_52"] = FilePath; LibDeviceMap["sm_53"] = FilePath; } LibDeviceMap["sm_60"] = FilePath; LibDeviceMap["sm_61"] = FilePath; LibDeviceMap["sm_62"] = FilePath; } else if (GpuArch == "compute_35") { LibDeviceMap["sm_35"] = FilePath; LibDeviceMap["sm_37"] = FilePath; } else if (GpuArch == "compute_50") { if (Version >= CudaVersion::CUDA_80) { LibDeviceMap["sm_50"] = FilePath; LibDeviceMap["sm_52"] = FilePath; LibDeviceMap["sm_53"] = FilePath; } } } } // Check that we have found at least one libdevice that we can link in if // -nocudalib hasn't been specified. if (LibDeviceMap.empty() && !NoCudaLib) continue; IsValid = true; break; } }
CudaInstallationDetector::CudaInstallationDetector( const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args) : D(D) { SmallVector<std::string, 4> CudaPathCandidates; // In decreasing order so we prefer newer versions to older versions. std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"}; if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) { CudaPathCandidates.push_back( Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ)); } else if (HostTriple.isOSWindows()) { for (const char *Ver : Versions) CudaPathCandidates.push_back( D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" + Ver); } else { CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda"); for (const char *Ver : Versions) CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver); } for (const auto &CudaPath : CudaPathCandidates) { if (CudaPath.empty() || !D.getVFS().exists(CudaPath)) continue; InstallPath = CudaPath; BinPath = CudaPath + "/bin"; IncludePath = InstallPath + "/include"; LibDevicePath = InstallPath + "/nvvm/libdevice"; auto &FS = D.getVFS(); if (!(FS.exists(IncludePath) && FS.exists(BinPath) && FS.exists(LibDevicePath))) continue; // On Linux, we have both lib and lib64 directories, and we need to choose // based on our triple. On MacOS, we have only a lib directory. // // It's sufficient for our purposes to be flexible: If both lib and lib64 // exist, we choose whichever one matches our triple. Otherwise, if only // lib exists, we use it. if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64")) LibPath = InstallPath + "/lib64"; else if (FS.exists(InstallPath + "/lib")) LibPath = InstallPath + "/lib"; else continue; llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = FS.getBufferForFile(InstallPath + "/version.txt"); if (!VersionFile) { // CUDA 7.0 doesn't have a version.txt, so guess that's our version if // version.txt isn't present. Version = CudaVersion::CUDA_70; } else { Version = ParseCudaVersionFile((*VersionFile)->getBuffer()); } std::error_code EC; for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE; !EC && LI != LE; LI = LI.increment(EC)) { StringRef FilePath = LI->path(); StringRef FileName = llvm::sys::path::filename(FilePath); // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc const StringRef LibDeviceName = "libdevice."; if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc"))) continue; StringRef GpuArch = FileName.slice( LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); LibDeviceMap[GpuArch] = FilePath.str(); // Insert map entries for specifc devices with this compute // capability. NVCC's choice of the libdevice library version is // rather peculiar and depends on the CUDA version. if (GpuArch == "compute_20") { LibDeviceMap["sm_20"] = FilePath; LibDeviceMap["sm_21"] = FilePath; LibDeviceMap["sm_32"] = FilePath; } else if (GpuArch == "compute_30") { LibDeviceMap["sm_30"] = FilePath; if (Version < CudaVersion::CUDA_80) { LibDeviceMap["sm_50"] = FilePath; LibDeviceMap["sm_52"] = FilePath; LibDeviceMap["sm_53"] = FilePath; } LibDeviceMap["sm_60"] = FilePath; LibDeviceMap["sm_61"] = FilePath; LibDeviceMap["sm_62"] = FilePath; } else if (GpuArch == "compute_35") { LibDeviceMap["sm_35"] = FilePath; LibDeviceMap["sm_37"] = FilePath; } else if (GpuArch == "compute_50") { if (Version >= CudaVersion::CUDA_80) { LibDeviceMap["sm_50"] = FilePath; LibDeviceMap["sm_52"] = FilePath; LibDeviceMap["sm_53"] = FilePath; } } } IsValid = true; break; } }
void CudaToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); assert((DeviceOffloadingKind == Action::OFK_OpenMP || DeviceOffloadingKind == Action::OFK_Cuda) && "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); if (DeviceOffloadingKind == Action::OFK_Cuda) { CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, false)) CC1Args.push_back("-fcuda-flush-denormals-to-zero"); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, options::OPT_fno_cuda_approx_transcendentals, false)) CC1Args.push_back("-fcuda-approx-transcendentals"); if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, false)) CC1Args.push_back("-fcuda-rdc"); } if (DriverArgs.hasArg(options::OPT_nocudalib)) return; std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch); if (LibDeviceFile.empty()) { if (DeviceOffloadingKind == Action::OFK_OpenMP && DriverArgs.hasArg(options::OPT_S)) return; getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch; return; } CC1Args.push_back("-mlink-cuda-bitcode"); CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM // defaults to. Use PTX4.2 by default, which is the PTX version that came with // CUDA-7.0. const char *PtxFeature = "+ptx42"; if (CudaInstallation.version() >= CudaVersion::CUDA_91) { // CUDA-9.1 uses new instructions that are only available in PTX6.1+ PtxFeature = "+ptx61"; } else if (CudaInstallation.version() >= CudaVersion::CUDA_90) { // CUDA-9.0 uses new instructions that are only available in PTX6.0+ PtxFeature = "+ptx60"; } CC1Args.append({"-target-feature", PtxFeature}); if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr, options::OPT_fno_cuda_short_ptr, false)) CC1Args.append({"-mllvm", "--nvptx-short-ptr"}); if (DeviceOffloadingKind == Action::OFK_OpenMP) { SmallVector<StringRef, 8> LibraryPaths; // Add path to lib and/or lib64 folders. SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(getDriver().Dir); llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX); LibraryPaths.emplace_back(DefaultLibPath.c_str()); // Add user defined library paths from LIBRARY_PATH. llvm::Optional<std::string> LibPath = llvm::sys::Process::GetEnv("LIBRARY_PATH"); if (LibPath) { SmallVector<StringRef, 8> Frags; const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'}; llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr); for (StringRef Path : Frags) LibraryPaths.emplace_back(Path.trim()); } std::string LibOmpTargetName = "libomptarget-nvptx-" + GpuArch.str() + ".bc"; bool FoundBCLibrary = false; for (StringRef LibraryPath : LibraryPaths) { SmallString<128> LibOmpTargetFile(LibraryPath); llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName); if (llvm::sys::fs::exists(LibOmpTargetFile)) { CC1Args.push_back("-mlink-cuda-bitcode"); CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile)); FoundBCLibrary = true; break; } } if (!FoundBCLibrary) getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime) << LibOmpTargetName; } }