/// \brief Assign DWARF discriminators. /// /// To assign discriminators, we examine the boundaries of every /// basic block and its successors. Suppose there is a basic block B1 /// with successor B2. The last instruction I1 in B1 and the first /// instruction I2 in B2 are located at the same file and line number. /// This situation is illustrated in the following code snippet: /// /// if (i < 10) x = i; /// /// entry: /// br i1 %cmp, label %if.then, label %if.end, !dbg !10 /// if.then: /// %1 = load i32* %i.addr, align 4, !dbg !10 /// store i32 %1, i32* %x, align 4, !dbg !10 /// br label %if.end, !dbg !10 /// if.end: /// ret void, !dbg !12 /// /// Notice how the branch instruction in block 'entry' and all the /// instructions in block 'if.then' have the exact same debug location /// information (!dbg !10). /// /// To distinguish instructions in block 'entry' from instructions in /// block 'if.then', we generate a new lexical block for all the /// instruction in block 'if.then' that share the same file and line /// location with the last instruction of block 'entry'. /// /// This new lexical block will have the same location information as /// the previous one, but with a new DWARF discriminator value. /// /// One of the main uses of this discriminator value is in runtime /// sample profilers. It allows the profiler to distinguish instructions /// at location !dbg !10 that execute on different basic blocks. This is /// important because while the predicate 'if (x < 10)' may have been /// executed millions of times, the assignment 'x = i' may have only /// executed a handful of times (meaning that the entry->if.then edge is /// seldom taken). /// /// If we did not have discriminator information, the profiler would /// assign the same weight to both blocks 'entry' and 'if.then', which /// in turn will make it conclude that the entry->if.then edge is very /// hot. /// /// To decide where to create new discriminator values, this function /// traverses the CFG and examines instruction at basic block boundaries. /// If the last instruction I1 of a block B1 is at the same file and line /// location as instruction I2 of successor B2, then it creates a new /// lexical block for I2 and all the instruction in B2 that share the same /// file and line location as I2. This new lexical block will have a /// different discriminator number than I1. bool AddDiscriminators::runOnFunction(Function &F) { // If the function has debug information, but the user has disabled // discriminators, do nothing. // Simlarly, if the function has no debug info, do nothing. // Finally, if this module is built with dwarf versions earlier than 4, // do nothing (discriminator support is a DWARF 4 feature). if (NoDiscriminators || !hasDebugInfo(F) || F.getParent()->getDwarfVersion() < 4) return false; bool Changed = false; Module *M = F.getParent(); LLVMContext &Ctx = M->getContext(); DIBuilder Builder(*M, /*AllowUnresolved*/ false); typedef std::pair<StringRef, unsigned> Location; typedef DenseMap<const BasicBlock *, Metadata *> BBScopeMap; typedef DenseMap<Location, BBScopeMap> LocationBBMap; typedef DenseMap<Location, unsigned> LocationDiscriminatorMap; LocationBBMap LBM; LocationDiscriminatorMap LDM; // Traverse all instructions in the function. If the source line location // of the instruction appears in other basic block, assign a new // discriminator for this instruction. for (BasicBlock &B : F) { for (auto &I : B.getInstList()) { if (isa<DbgInfoIntrinsic>(&I)) continue; const DILocation *DIL = I.getDebugLoc(); if (!DIL) continue; Location L = std::make_pair(DIL->getFilename(), DIL->getLine()); auto &BBMap = LBM[L]; auto R = BBMap.insert(std::make_pair(&B, (Metadata *)nullptr)); if (BBMap.size() == 1) continue; bool InsertSuccess = R.second; Metadata *&NewScope = R.first->second; // If we could insert a different block in the same location, a // discriminator is needed to distinguish both instructions. if (InsertSuccess) { auto *Scope = DIL->getScope(); auto *File = Builder.createFile(DIL->getFilename(), Scope->getDirectory()); NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]); } I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(), NewScope, DIL->getInlinedAt())); DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" << DIL->getColumn() << ":" << dyn_cast<DILexicalBlockFile>(NewScope)->getDiscriminator() << I << "\n"); Changed = true; } } // Traverse all instructions and assign new discriminators to call // instructions with the same lineno that are in the same basic block. // Sample base profile needs to distinguish different function calls within // a same source line for correct profile annotation. for (BasicBlock &B : F) { const DILocation *FirstDIL = nullptr; for (auto &I : B.getInstList()) { CallInst *Current = dyn_cast<CallInst>(&I); if (!Current || isa<DbgInfoIntrinsic>(&I)) continue; DILocation *CurrentDIL = Current->getDebugLoc(); if (FirstDIL) { if (CurrentDIL && CurrentDIL->getLine() == FirstDIL->getLine() && CurrentDIL->getFilename() == FirstDIL->getFilename()) { auto *Scope = FirstDIL->getScope(); auto *File = Builder.createFile(FirstDIL->getFilename(), Scope->getDirectory()); Location L = std::make_pair(FirstDIL->getFilename(), FirstDIL->getLine()); auto *NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]); Current->setDebugLoc(DILocation::get( Ctx, CurrentDIL->getLine(), CurrentDIL->getColumn(), NewScope, CurrentDIL->getInlinedAt())); Changed = true; } else { FirstDIL = CurrentDIL; } } else { FirstDIL = CurrentDIL; } } } return Changed; }
static bool markTails(Function &F, bool &AllCallsAreTailCalls) { if (F.callsFunctionThatReturnsTwice()) return false; AllCallsAreTailCalls = true; // The local stack holds all alloca instructions and all byval arguments. AllocaDerivedValueTracker Tracker; for (Argument &Arg : F.args()) { if (Arg.hasByValAttr()) Tracker.walk(&Arg); } for (auto &BB : F) { for (auto &I : BB) if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) Tracker.walk(AI); } bool Modified = false; // Track whether a block is reachable after an alloca has escaped. Blocks that // contain the escaping instruction will be marked as being visited without an // escaped alloca, since that is how the block began. enum VisitType { UNVISITED, UNESCAPED, ESCAPED }; DenseMap<BasicBlock *, VisitType> Visited; // We propagate the fact that an alloca has escaped from block to successor. // Visit the blocks that are propagating the escapedness first. To do this, we // maintain two worklists. SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped; // We may enter a block and visit it thinking that no alloca has escaped yet, // then see an escape point and go back around a loop edge and come back to // the same block twice. Because of this, we defer setting tail on calls when // we first encounter them in a block. Every entry in this list does not // statically use an alloca via use-def chain analysis, but may find an alloca // through other means if the block turns out to be reachable after an escape // point. SmallVector<CallInst *, 32> DeferredTails; BasicBlock *BB = &F.getEntryBlock(); VisitType Escaped = UNESCAPED; do { for (auto &I : *BB) { if (Tracker.EscapePoints.count(&I)) Escaped = ESCAPED; CallInst *CI = dyn_cast<CallInst>(&I); if (!CI || CI->isTailCall()) continue; bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); if (!IsNoTail && CI->doesNotAccessMemory()) { // A call to a readnone function whose arguments are all things computed // outside this function can be marked tail. Even if you stored the // alloca address into a global, a readnone function can't load the // global anyhow. // // Note that this runs whether we know an alloca has escaped or not. If // it has, then we can't trust Tracker.AllocaUsers to be accurate. bool SafeToTail = true; for (auto &Arg : CI->arg_operands()) { if (isa<Constant>(Arg.getUser())) continue; if (Argument *A = dyn_cast<Argument>(Arg.getUser())) if (!A->hasByValAttr()) continue; SafeToTail = false; break; } if (SafeToTail) { emitOptimizationRemark( F.getContext(), "tailcallelim", F, CI->getDebugLoc(), "marked this readnone call a tail call candidate"); CI->setTailCall(); Modified = true; continue; } } if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { DeferredTails.push_back(CI); } else { AllCallsAreTailCalls = false; } } for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { auto &State = Visited[SuccBB]; if (State < Escaped) { State = Escaped; if (State == ESCAPED) WorklistEscaped.push_back(SuccBB); else WorklistUnescaped.push_back(SuccBB); } } if (!WorklistEscaped.empty()) { BB = WorklistEscaped.pop_back_val(); Escaped = ESCAPED; } else { BB = nullptr; while (!WorklistUnescaped.empty()) { auto *NextBB = WorklistUnescaped.pop_back_val(); if (Visited[NextBB] == UNESCAPED) { BB = NextBB; Escaped = UNESCAPED; break; } } } } while (BB); for (CallInst *CI : DeferredTails) { if (Visited[CI->getParent()] != ESCAPED) { // If the escape point was part way through the block, calls after the // escape point wouldn't have been put into DeferredTails. emitOptimizationRemark(F.getContext(), "tailcallelim", F, CI->getDebugLoc(), "marked this call a tail call candidate"); CI->setTailCall(); Modified = true; } else { AllCallsAreTailCalls = false; } } return Modified; }
int compile(list<string> args, list<string> kgen_args, string merge, list<string> merge_args, string input, string output, int arch, string host_compiler, string fileprefix) { // // The LLVM compiler to emit IR. // const char* llvm_compiler = "kernelgen-gfortran"; // // Interpret kernelgen compile options. // for (list<string>::iterator iarg = kgen_args.begin(), iearg = kgen_args.end(); iarg != iearg; iarg++) { const char* arg = (*iarg).c_str(); if (!strncmp(arg, "-Wk,--llvm-compiler=", 20)) llvm_compiler = arg + 20; } // // Generate temporary output file. // Check if output file is specified in the command line. // Replace or add output to the temporary file. // cfiledesc tmp_output = cfiledesc::mktemp(fileprefix); bool output_specified = false; for (list<string>::iterator iarg = args.begin(), iearg = args.end(); iarg != iearg; iarg++) { const char* arg = (*iarg).c_str(); if (!strcmp(arg, "-o")) { iarg++; *iarg = tmp_output.getFilename(); output_specified = true; break; } } if (!output_specified) { args.push_back("-o"); args.push_back(tmp_output.getFilename()); } // // 1) Compile source code using regular host compiler. // { if (verbose) { cout << host_compiler; for (list<string>::iterator iarg = args.begin(), iearg = args.end(); iarg != iearg; iarg++) cout << " " << *iarg; cout << endl; } int status = execute(host_compiler, args, "", NULL, NULL); if (status) return status; } // // 2) Emit LLVM IR. // string out = ""; { list<string> emit_ir_args; for (list<string>::iterator iarg = args.begin(), iearg = args.end(); iarg != iearg; iarg++) { const char* arg = (*iarg).c_str(); if (!strcmp(arg, "-c") || !strcmp(arg, "-o")) { iarg++; continue; } if (!strcmp(arg, "-g")) { continue; } emit_ir_args.push_back(*iarg); } emit_ir_args.push_back("-fplugin=/opt/kernelgen/lib/dragonegg.so"); emit_ir_args.push_back("-fplugin-arg-dragonegg-emit-ir"); emit_ir_args.push_back("-S"); emit_ir_args.push_back(input); emit_ir_args.push_back("-o"); emit_ir_args.push_back("-"); if (verbose) { cout << llvm_compiler; for (list<string>::iterator iarg = emit_ir_args.begin(), iearg = emit_ir_args.end(); iarg != iearg; iarg++) cout << " " << *iarg; cout << endl; } int status = execute(llvm_compiler, emit_ir_args, "", &out, NULL); if (status) return status; } // // 3) Record existing module functions. // LLVMContext &context = getGlobalContext(); SMDiagnostic diag; MemoryBuffer* buffer1 = MemoryBuffer::getMemBuffer(out); auto_ptr<Module> m1; m1.reset(ParseIR(buffer1, diag, context)); //m1.get()->dump(); // // 4) Inline calls and extract loops into new functions. // MemoryBuffer* buffer2 = MemoryBuffer::getMemBuffer(out); auto_ptr<Module> m2; m2.reset(ParseIR(buffer2, diag, context)); { PassManager manager; manager.add(createInstructionCombiningPass()); manager.run(*m2.get()); } std::vector<CallInst *> LoopFuctionCalls; { PassManager manager; manager.add(createBranchedLoopExtractorPass(LoopFuctionCalls)); manager.run(*m2.get()); } //m2.get()->dump(); // // 5) Replace call to loop functions with call to launcher. // Append "always inline" attribute to all other functions. // Type* int32Ty = Type::getInt32Ty(context); Function* launch = Function::Create( TypeBuilder<types::i<32>(types::i<8>*, types::i<64>, types::i<32>*), true>::get(context), GlobalValue::ExternalLinkage, "kernelgen_launch", m2.get()); for (Module::iterator f1 = m2.get()->begin(), fe1 = m2.get()->end(); f1 != fe1; f1++) { Function* func = f1; if (func->isDeclaration()) continue; // Search for the current function in original module // functions list. // If function is not in list of original module, then // it is generated by the loop extractor. // Append "always inline" attribute to all other functions. if (m1.get()->getFunction(func->getName())) { const AttrListPtr attr = func->getAttributes(); const AttrListPtr attr_new = attr.addAttr(~0U, Attribute::AlwaysInline); func->setAttributes(attr_new); continue; } // Each such function must be extracted to the // standalone module and packed into resulting // object file data section. if (verbose) cout << "Preparing loop function " << func->getName().data() << " ..." << endl; // Reset to default visibility. func->setVisibility(GlobalValue::DefaultVisibility); // Reset to default linkage. func->setLinkage(GlobalValue::ExternalLinkage); // Replace call to this function in module with call to launcher. bool found = false; for (Module::iterator f2 = m2->begin(), fe2 = m2->end(); (f2 != fe2) && !found; f2++) for (Function::iterator bb = f2->begin(); (bb != f2->end()) && !found; bb++) for (BasicBlock::iterator i = bb->begin(); i != bb->end(); i++) { // Check if instruction in focus is a call. CallInst* call = dyn_cast<CallInst>(cast<Value>(i)); if (!call) continue; // Check if function is called (needs -instcombine pass). Function* callee = call->getCalledFunction(); if (!callee) continue; if (callee->isDeclaration()) continue; if (callee->getName() != func->getName()) continue; // Create a constant array holding original called // function name. Constant* name = ConstantArray::get( context, callee->getName(), true); // Create and initialize the memory buffer for name. ArrayType* nameTy = cast<ArrayType>(name->getType()); AllocaInst* nameAlloc = new AllocaInst(nameTy, "", call); StoreInst* nameInit = new StoreInst(name, nameAlloc, "", call); Value* Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(context), 0); GetElementPtrInst* namePtr = GetElementPtrInst::Create(nameAlloc, Idx, "", call); // Add pointer to the original function string name. SmallVector<Value*, 16> call_args; call_args.push_back(namePtr); // Add size of the aggregated arguments structure. { BitCastInst* BC = new BitCastInst( call->getArgOperand(0), Type::getInt64PtrTy(context), "", call); LoadInst* LI = new LoadInst(BC, "", call); call_args.push_back(LI); } // Add original aggregated structure argument. call_args.push_back(call->getArgOperand(0)); // Create new function call with new call arguments // and copy old call properties. CallInst* newcall = CallInst::Create(launch, call_args, "", call); //newcall->takeName(call); newcall->setCallingConv(call->getCallingConv()); newcall->setAttributes(call->getAttributes()); newcall->setDebugLoc(call->getDebugLoc()); // Replace old call with new one. call->replaceAllUsesWith(newcall); call->eraseFromParent(); found = true; break; } } //m2.get()->dump(); // // 6) Apply optimization passes to the resulting common // module. // { PassManager manager; manager.add(createLowerSetJmpPass()); PassManagerBuilder builder; builder.Inliner = createFunctionInliningPass(); builder.OptLevel = 3; builder.DisableSimplifyLibCalls = true; builder.populateModulePassManager(manager); manager.run(*m2.get()); } //m2.get()->dump(); // // 7) Embed the resulting module into object file. // { string ir_string; raw_string_ostream ir(ir_string); ir << (*m2.get()); celf e(tmp_output.getFilename(), output); e.getSection(".data")->addSymbol( "__kernelgen_" + string(input), ir_string.c_str(), ir_string.size() + 1); } return 0; }