// For amdgcn the inputs of the linker job are device bitcode and output is // object file. It calls llvm-link, opt, llc, then lld steps. void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn && "Unsupported target"); std::string SubArchName = JA.getOffloadingArch(); assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch"); // Prefix for temporary file name. std::string Prefix = llvm::sys::path::stem(Inputs[0].getFilename()).str() + "-" + SubArchName; // Each command outputs different files. const char *LLVMLinkCommand = constructLLVMLinkCommand(C, JA, Inputs, Args, SubArchName, Prefix); const char *OptCommand = constructOptCommand(C, JA, Inputs, Args, SubArchName, Prefix, LLVMLinkCommand); const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand); constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); }
void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast<const toolchains::CudaToolChain &>(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); StringRef GPUArchName; // If this is an OpenMP action we need to extract the device architecture // from the -march=arch option. This option may come from -Xopenmp-target // flag or the default value. if (JA.isDeviceOffloading(Action::OFK_OpenMP)) { GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); assert(!GPUArchName.empty() && "Must have an architecture passed in."); } else GPUArchName = JA.getOffloadingArch(); // Obtain architecture from the action. CudaArch gpu_arch = StringToCudaArch(GPUArchName); assert(gpu_arch != CudaArch::UNKNOWN && "Device action expected to have an architecture."); // Check that our installation's ptxas supports gpu_arch. if (!Args.hasArg(options::OPT_no_cuda_version_check)) { TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch); } ArgStringList CmdArgs; CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); if (Args.hasFlag(options::OPT_cuda_noopt_device_debug, options::OPT_no_cuda_noopt_device_debug, false)) { // ptxas does not accept -g option if optimization is enabled, so // we ignore the compiler's -O* options if we want debug info. CmdArgs.push_back("-g"); CmdArgs.push_back("--dont-merge-basicblocks"); CmdArgs.push_back("--return-at-end"); } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { // Map the -O we received to -O{0,1,2,3}. // // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's // default, so it may correspond more closely to the spirit of clang -O2. // -O3 seems like the least-bad option when -Osomething is specified to // clang but it isn't handled below. StringRef OOpt = "3"; if (A->getOption().matches(options::OPT_O4) || A->getOption().matches(options::OPT_Ofast)) OOpt = "3"; else if (A->getOption().matches(options::OPT_O0)) OOpt = "0"; else if (A->getOption().matches(options::OPT_O)) { // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options. OOpt = llvm::StringSwitch<const char *>(A->getValue()) .Case("1", "1") .Case("2", "2") .Case("3", "3") .Case("s", "2") .Case("z", "2") .Default("2"); } CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); } else { // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond // to no optimizations, but ptxas's default is -O3. CmdArgs.push_back("-O0"); } // Pass -v to ptxas if it was passed to the driver. if (Args.hasArg(options::OPT_v)) CmdArgs.push_back("-v"); CmdArgs.push_back("--gpu-name"); CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch))); CmdArgs.push_back("--output-file"); CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output))); for (const auto& II : Inputs) CmdArgs.push_back(Args.MakeArgString(II.getFilename())); for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) CmdArgs.push_back(Args.MakeArgString(A)); // In OpenMP we need to generate relocatable code. if (JA.isOffloading(Action::OFK_OpenMP) && Args.hasFlag(options::OPT_fopenmp_relocatable_target, options::OPT_fnoopenmp_relocatable_target, /*Default=*/ true)) CmdArgs.push_back("-c"); const char *Exec; if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ)) Exec = A->getValue(); else Exec = Args.MakeArgString(TC.GetProgramPath("ptxas")); C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); }