/// Add OpenMP linker script arguments at the end of the argument list so that /// the fat binary is built by embedding each of the device images into the /// host. The linker script also defines a few symbols required by the code /// generation so that the images can be easily retrieved at runtime by the /// offloading library. This should be used only in tool chains that support /// linker scripts. void tools::AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, ArgStringList &CmdArgs, const JobAction &JA) { // If this is not an OpenMP host toolchain, we don't need to do anything. if (!JA.isHostOffloading(Action::OFK_OpenMP)) return; // Create temporary linker script. Keep it if save-temps is enabled. const char *LKS; SmallString<256> Name = llvm::sys::path::filename(Output.getFilename()); if (C.getDriver().isSaveTempsEnabled()) { llvm::sys::path::replace_extension(Name, "lk"); LKS = C.getArgs().MakeArgString(Name.c_str()); } else { llvm::sys::path::replace_extension(Name, ""); Name = C.getDriver().GetTemporaryPath(Name, "lk"); LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str())); } // Add linker script option to the command. CmdArgs.push_back("-T"); CmdArgs.push_back(LKS); // Create a buffer to write the contents of the linker script. std::string LksBuffer; llvm::raw_string_ostream LksStream(LksBuffer); // Get the OpenMP offload tool chains so that we can extract the triple // associated with each device input. auto OpenMPToolChains = C.getOffloadToolChains<Action::OFK_OpenMP>(); assert(OpenMPToolChains.first != OpenMPToolChains.second && "No OpenMP toolchains??"); // Track the input file name and device triple in order to build the script, // inserting binaries in the designated sections. SmallVector<std::pair<std::string, const char *>, 8> InputBinaryInfo; // Add commands to embed target binaries. We ensure that each section and // image is 16-byte aligned. This is not mandatory, but increases the // likelihood of data to be aligned with a cache block in several main host // machines. LksStream << "/*\n"; LksStream << " OpenMP Offload Linker Script\n"; LksStream << " *** Automatically generated by Clang ***\n"; LksStream << "*/\n"; LksStream << "TARGET(binary)\n"; auto DTC = OpenMPToolChains.first; for (auto &II : Inputs) { const Action *A = II.getAction(); // Is this a device linking action? if (A && isa<LinkJobAction>(A) && A->isDeviceOffloading(Action::OFK_OpenMP)) { assert(DTC != OpenMPToolChains.second && "More device inputs than device toolchains??"); InputBinaryInfo.push_back(std::make_pair( DTC->second->getTriple().normalize(), II.getFilename())); ++DTC; LksStream << "INPUT(" << II.getFilename() << ")\n"; } } assert(DTC == OpenMPToolChains.second && "Less device inputs than device toolchains??"); LksStream << "SECTIONS\n"; LksStream << "{\n"; // Put each target binary into a separate section. for (const auto &BI : InputBinaryInfo) { LksStream << " .omp_offloading." << BI.first << " :\n"; LksStream << " ALIGN(0x10)\n"; LksStream << " {\n"; LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first << " = .);\n"; LksStream << " " << BI.second << "\n"; LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first << " = .);\n"; LksStream << " }\n"; } // Add commands to define host entries begin and end. We use 1-byte subalign // so that the linker does not add any padding and the elements in this // section form an array. LksStream << " .omp_offloading.entries :\n"; LksStream << " ALIGN(0x10)\n"; LksStream << " SUBALIGN(0x01)\n"; LksStream << " {\n"; LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n"; LksStream << " *(.omp_offloading.entries)\n"; LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n"; LksStream << " }\n"; LksStream << "}\n"; LksStream << "INSERT BEFORE .data\n"; LksStream.flush(); // Dump the contents of the linker script if the user requested that. We // support this option to enable testing of behavior with -###. if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script)) llvm::errs() << LksBuffer; // If this is a dry run, do not create the linker script file. if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) return; // Open script file and write the contents. std::error_code EC; llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None); if (EC) { C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); return; } Lksf << LksBuffer; }
void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast<const toolchains::CudaToolChain &>(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); ArgStringList CmdArgs; // OpenMP uses nvlink to link cubin files. The result will be embedded in the // host binary by the host linker. assert(!JA.isHostOffloading(Action::OFK_OpenMP) && "CUDA toolchain not expected for an OpenMP host device."); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else assert(Output.isNothing() && "Invalid output."); if (Args.hasArg(options::OPT_g_Flag)) CmdArgs.push_back("-g"); if (Args.hasArg(options::OPT_v)) CmdArgs.push_back("-v"); StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas."); CmdArgs.push_back("-arch"); CmdArgs.push_back(Args.MakeArgString(GPUArch)); // Add paths specified in LIBRARY_PATH environment variable as -L options. addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); // Add paths for the default clang library path. SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(TC.getDriver().Dir); llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX); CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); // Add linking against library implementing OpenMP calls on NVPTX target. CmdArgs.push_back("-lomptarget-nvptx"); for (const auto &II : Inputs) { if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR || II.getType() == types::TY_LTO_BC || II.getType() == types::TY_LLVM_BC) { C.getDriver().Diag(diag::err_drv_no_linker_llvm_support) << getToolChain().getTripleString(); continue; } // Currently, we only pass the input files to the linker, we do not pass // any libraries that may be valid only for the host. if (!II.isFilename()) continue; const char *CubinF = C.addTempFile( C.getArgs().MakeArgString(getToolChain().getInputFilename(II))); CmdArgs.push_back(CubinF); } AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("nvlink")); C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); }
static void ExecuteTemplightJobs(Driver &TheDriver, DiagnosticsEngine &Diags, Compilation &C, Job &J, const char* Argv0, SmallVector<std::pair<int, const Command *>, 4>& FailingCommands) { if (JobList *jobs = dyn_cast<JobList>(&J)) { for (JobList::iterator it = jobs->begin(), it_end = jobs->end(); it != it_end; ++it) ExecuteTemplightJobs(TheDriver, Diags, C, *it, Argv0, FailingCommands); return; } Command *command = dyn_cast<Command>(&J); // Since argumentsFitWithinSystemLimits() may underestimate system's capacity // if the tool does not support response files, there is a chance/ that things // will just work without a response file, so we silently just skip it. if ( command && command->getCreator().getResponseFilesSupport() != Tool::RF_None && llvm::sys::argumentsFitWithinSystemLimits(command->getArguments()) ) { std::string TmpName = TheDriver.GetTemporaryPath("response", "txt"); command->setResponseFile(C.addTempFile(C.getArgs().MakeArgString( TmpName.c_str()))); } if ( command && (StringRef(command->getCreator().getName()) == "clang") ) { // Initialize a compiler invocation object from the clang (-cc1) arguments. const ArgStringList &cc_arguments = command->getArguments(); const char** args_start = const_cast<const char**>(cc_arguments.data()); const char** args_end = args_start + cc_arguments.size(); std::unique_ptr<CompilerInstance> Clang(new CompilerInstance()); int Res = !CompilerInvocation::CreateFromArgs( Clang->getInvocation(), args_start, args_end, Diags); if(Res) FailingCommands.push_back(std::make_pair(Res, command)); Clang->getFrontendOpts().DisableFree = false; // Infer the builtin include path if unspecified. void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && Clang->getHeaderSearchOpts().ResourceDir.empty()) Clang->getHeaderSearchOpts().ResourceDir = CompilerInvocation::GetResourcesPath(Argv0, GetExecutablePathVP); // Create the compilers actual diagnostics engine. Clang->createDiagnostics(); if (!Clang->hasDiagnostics()) { FailingCommands.push_back(std::make_pair(1, command)); return; } LocalOutputFilename = ""; // Let the filename be created from options or output file name. std::string TemplightOutFile = TemplightAction::CreateOutputFilename( Clang.get(), "", InstProfiler, OutputToStdOut, MemoryProfile); // Check if templight filename is in a temporary path: llvm::SmallString<128> TDir; llvm::sys::path::system_temp_directory(true, TDir); if ( TDir.equals(llvm::sys::path::parent_path(llvm::StringRef(TemplightOutFile))) ) { C.addTempFile(TemplightOutFile.c_str()); TempOutputFiles.push_back(TemplightOutFile); } // Execute the frontend actions. Res = ExecuteTemplightInvocation(Clang.get()); if(Res) FailingCommands.push_back(std::make_pair(Res, command)); } else { C.ExecuteJob(J, FailingCommands); } }