MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (Succ->isEHPad()) return nullptr; MachineFunction *MF = getParent(); DebugLoc DL; // FIXME: this is nowhere // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. if (MF->getTarget().requiresStructuredCFG()) return nullptr; // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) return nullptr; // Avoid bugpoint weirdness: A block may end with a conditional branch but // jumps to the same MBB is either case. We have duplicate CFG edges in that // case that we can't handle. Since this never happens in properly optimized // code, just skip those edges. if (TBB && TBB == FBB) { DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" << getNumber() << '\n'); return nullptr; } MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(std::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>(); SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>(); if (LIS) LIS->insertMBBInMaps(NMBB); else if (Indexes) Indexes->insertMBBInMaps(NMBB); // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); // Collect a list of virtual registers killed by the terminators. SmallVector<unsigned, 4> KilledRegs; if (LV) for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0 || !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; unsigned Reg = OI->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); DEBUG(dbgs() << "Removing terminator kill: " << *MI); OI->setIsKill(false); } } } SmallVector<unsigned, 4> UsedRegs; if (LIS) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0) continue; unsigned Reg = OI->getReg(); if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) UsedRegs.push_back(Reg); } } } ReplaceUsesOfBlockWith(Succ, NMBB); // If updateTerminator() removes instructions, we need to remove them from // SlotIndexes. SmallVector<MachineInstr*, 4> Terminators; if (Indexes) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) Terminators.push_back(&*I); } updateTerminator(); if (Indexes) { SmallVector<MachineInstr*, 4> NewTerminators; for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) NewTerminators.push_back(&*I); for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), E = Terminators.end(); I != E; ++I) { if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == NewTerminators.end()) Indexes->removeMachineInstrFromMaps(*I); } } // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); I != E; ++I) { // Some instructions may have been moved to NMBB by updateTerminator(), // so we first remove any instruction that already has an index. if (Indexes->hasIndex(&*I)) Indexes->removeMachineInstrFromMaps(&*I); Indexes->insertMachineInstrInMaps(&*I); } } } // Fix PHI nodes in Succ so they refer to NMBB instead of this for (MachineBasicBlock::instr_iterator i = Succ->instr_begin(),e = Succ->instr_end(); i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == this) i->getOperand(ni+1).setMBB(NMBB); // Inherit live-ins from the successor for (const auto &LI : Succ->liveins()) NMBB->addLiveIn(LI); // Update LiveVariables. const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } } // Update relevant live-through information. LV->addNewBlock(NMBB, this, Succ); } if (LIS) { // After splitting the edge and updating SlotIndexes, live intervals may be // in one of two situations, depending on whether this block was the last in // the function. If the original block was the last in the function, all // live intervals will end prior to the beginning of the new split block. If // the original block was not at the end of the function, all live intervals // will extend to the end of the new split block. bool isLastMBB = std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); SlotIndex StartIndex = Indexes->getMBBEndIdx(this); SlotIndex PrevIndex = StartIndex.getPrevSlot(); SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); // Find the registers used from NMBB in PHIs in Succ. SmallSet<unsigned, 8> PHISrcRegs; for (MachineBasicBlock::instr_iterator I = Succ->instr_begin(), E = Succ->instr_end(); I != E && I->isPHI(); ++I) { for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { if (I->getOperand(ni+1).getMBB() == NMBB) { MachineOperand &MO = I->getOperand(ni); unsigned Reg = MO.getReg(); PHISrcRegs.insert(Reg); if (MO.isUndef()) continue; LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "PHI sources should be live out of their predecessors."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } } MachineRegisterInfo *MRI = &getParent()->getRegInfo(); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) continue; LiveInterval &LI = LIS->getInterval(Reg); if (!LI.liveAt(PrevIndex)) continue; bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } else if (!isLiveOut && !isLastMBB) { LI.removeSegment(StartIndex, EndIndex); } } // Update all intervals for registers whose uses may have been modified by // updateTerminator(). LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); } if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable<MachineDominatorTree>()) MDT->recordSplitCriticalEdge(this, Succ, NMBB); if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { if (TIL == DestLoop) { // Both in the same loop, the NMBB joins loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == Succ && "Should not create irreducible loops!"); if (MachineLoop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NMBB, MLI->getBase()); } } } return NMBB; }
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { const ModuleAnalysisManager &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager(); bool Changed = false; assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); Module &M = *InitialC.begin()->getFunction().getParent(); ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M); if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = llvm::make_unique<ImportedFunctionsInliningStatistics>(); ImportedFunctionsStats->setModuleInfo(M); } // We use a single common worklist for calls across the entire SCC. We // process these in-order and append new calls introduced during inlining to // the end. // // Note that this particular order of processing is actually critical to // avoid very bad behaviors. Consider *highly connected* call graphs where // each function contains a small amonut of code and a couple of calls to // other functions. Because the LLVM inliner is fundamentally a bottom-up // inliner, it can handle gracefully the fact that these all appear to be // reasonable inlining candidates as it will flatten things until they become // too big to inline, and then move on and flatten another batch. // // However, when processing call edges *within* an SCC we cannot rely on this // bottom-up behavior. As a consequence, with heavily connected *SCCs* of // functions we can end up incrementally inlining N calls into each of // N functions because each incremental inlining decision looks good and we // don't have a topological ordering to prevent explosions. // // To compensate for this, we don't process transitive edges made immediate // by inlining until we've done one pass of inlining across the entire SCC. // Large, highly connected SCCs still lead to some amount of code bloat in // this model, but it is uniformly spread across all the functions in the SCC // and eventually they all become too large to inline, rather than // incrementally maknig a single function grow in a super linear fashion. SmallVector<std::pair<CallSite, int>, 16> Calls; FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) .getManager(); // Populate the initial list of calls in this SCC. for (auto &N : InitialC) { auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction()); // We want to generally process call sites top-down in order for // simplifications stemming from replacing the call with the returned value // after inlining to be visible to subsequent inlining decisions. // FIXME: Using instructions sequence is a really bad way to do this. // Instead we should do an actual RPO walk of the function body. for (Instruction &I : instructions(N.getFunction())) if (auto CS = CallSite(&I)) if (Function *Callee = CS.getCalledFunction()) { if (!Callee->isDeclaration()) Calls.push_back({CS, -1}); else if (!isa<IntrinsicInst>(I)) { using namespace ore; ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " << NV("Caller", CS.getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); } } } if (Calls.empty()) return PreservedAnalyses::all(); // Capture updatable variables for the current SCC and RefSCC. auto *C = &InitialC; auto *RC = &C->getOuterRefSCC(); // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid // infinite inlining in some obscure cases. To represent this, we use an // index into the InlineHistory vector. SmallVector<std::pair<Function *, int>, 16> InlineHistory; // Track a set vector of inlined callees so that we can augment the caller // with all of their edges in the call graph before pruning out the ones that // got simplified away. SmallSetVector<Function *, 4> InlinedCallees; // Track the dead functions to delete once finished with inlining calls. We // defer deleting these to make it easier to handle the call graph updates. SmallVector<Function *, 4> DeadFunctions; // Loop forward over all of the calls. Note that we cannot cache the size as // inlining can introduce new calls that need to be processed. for (int i = 0; i < (int)Calls.size(); ++i) { // We expect the calls to typically be batched with sequences of calls that // have the same caller, so we first set up some shared infrastructure for // this caller. We also do any pruning we can at this layer on the caller // alone. Function &F = *Calls[i].first.getCaller(); LazyCallGraph::Node &N = *CG.lookup(F); if (CG.lookupSCC(N) != C) continue; if (F.hasFnAttribute(Attribute::OptimizeNone)) continue; LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"); // Get a FunctionAnalysisManager via a proxy for this particular node. We // do this each time we visit a node as the SCC may have changed and as // we're going to mutate this particular function we want to make sure the // proxy is in place to forward any invalidation events. We can use the // manager we get here for looking up results for functions other than this // node however because those functions aren't going to be mutated by this // pass. FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG) .getManager(); // Get the remarks emission analysis for the caller. auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); }; auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { return FAM.getResult<BlockFrequencyAnalysis>(F); }; auto GetInlineCost = [&](CallSite CS) { Function &Callee = *CS.getCalledFunction(); auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, PSI, &ORE); }; // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) { int InlineHistoryID; CallSite CS; std::tie(CS, InlineHistoryID) = Calls[i]; Function &Callee = *CS.getCalledFunction(); if (InlineHistoryID != -1 && InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) continue; // Check if this inlining may repeat breaking an SCC apart that has // already been split once before. In that case, inlining here may // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. if (CG.lookupSCC(*CG.lookup(Callee)) == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); continue; } Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); // Check whether we want to inline this callsite. if (!OIC) continue; // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( /*cg=*/nullptr, &GetAssumptionCache, PSI, &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())), &FAM.getResult<BlockFrequencyAnalysis>(Callee)); // Get DebugLoc to report. CS will be invalid after Inliner. DebugLoc DLoc = CS->getDebugLoc(); BasicBlock *Block = CS.getParent(); using namespace ore; if (!InlineFunction(CS, IFI)) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) << NV("Callee", &Callee) << " will not be inlined into " << NV("Caller", &F); }); continue; } DidInline = true; InlinedCallees.insert(&Callee); ORE.emit([&]() { bool AlwaysInline = OIC->isAlways(); StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block); R << NV("Callee", &Callee) << " inlined into "; R << NV("Caller", &F); if (AlwaysInline) R << " with cost=always"; else { R << " with cost=" << NV("Cost", OIC->getCost()); R << " (threshold=" << NV("Threshold", OIC->getThreshold()); R << ")"; } return R; }); // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { int NewHistoryID = InlineHistory.size(); InlineHistory.push_back({&Callee, InlineHistoryID}); for (CallSite &CS : reverse(IFI.InlinedCallSites)) if (Function *NewCallee = CS.getCalledFunction()) if (!NewCallee->isDeclaration()) Calls.push_back({CS, NewHistoryID}); } if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats->recordInline(F, Callee); // Merge the attributes based on the inlining. AttributeFuncs::mergeAttributesForInlining(F, Callee); // For local functions, check whether this makes the callee trivially // dead. In that case, we can drop the body of the function eagerly // which may reduce the number of callers of other functions to one, // changing inline cost thresholds. if (Callee.hasLocalLinkage()) { // To check this we also need to nuke any dead constant uses (perhaps // made dead by this operation on other functions). Callee.removeDeadConstantUsers(); if (Callee.use_empty() && !CG.isLibFunction(Callee)) { Calls.erase( std::remove_if(Calls.begin() + i + 1, Calls.end(), [&Callee](const std::pair<CallSite, int> &Call) { return Call.first.getCaller() == &Callee; }), Calls.end()); // Clear the body and queue the function itself for deletion when we // finish inlining and call graph updates. // Note that after this point, it is an error to do anything other // than use the callee's address or delete it. Callee.dropAllReferences(); assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && "Cannot put cause a function to become dead twice!"); DeadFunctions.push_back(&Callee); } } } // Back the call index up by one to put us in a good position to go around // the outer loop. --i; if (!DidInline) continue; Changed = true; // Add all the inlined callees' edges as ref edges to the caller. These are // by definition trivial edges as we always have *some* transitive ref edge // chain. While in some cases these edges are direct calls inside the // callee, they have to be modeled in the inliner as reference edges as // there may be a reference edge anywhere along the chain from the current // caller to the callee that causes the whole thing to appear like // a (transitive) reference edge that will require promotion to a call edge // below. for (Function *InlinedCallee : InlinedCallees) { LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee); for (LazyCallGraph::Edge &E : *CalleeN) RC->insertTrivialRefEdge(N, E.getNode()); } // At this point, since we have made changes we have at least removed // a call instruction. However, in the process we do some incremental // simplification of the surrounding code. This simplification can // essentially do all of the same things as a function pass and we can // re-use the exact same logic for updating the call graph to reflect the // change. LazyCallGraph::SCC *OldC = C; C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); RC = &C->getOuterRefSCC(); // If this causes an SCC to split apart into multiple smaller SCCs, there // is a subtle risk we need to prepare for. Other transformations may // expose an "infinite inlining" opportunity later, and because of the SCC // mutation, we will revisit this function and potentially re-inline. If we // do, and that re-inlining also has the potentially to mutate the SCC // structure, the infinite inlining problem can manifest through infinite // SCC splits and merges. To avoid this, we capture the originating caller // node and the SCC containing the call edge. This is a slight over // approximation of the possible inlining decisions that must be avoided, // but is relatively efficient to store. // FIXME: This seems like a very heavyweight way of retaining the inline // history, we should look for a more efficient way of tracking it. if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) { return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; })) { LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " "retaining this to avoid infinite inlining.\n"); UR.InlinedInternalEdges.insert({&N, OldC}); } InlinedCallees.clear(); } // Now that we've finished inlining all of the calls across this SCC, delete // all of the trivially dead functions, updating the call graph and the CGSCC // pass manager in the process. // // Note that this walks a pointer set which has non-deterministic order but // that is OK as all we do is delete things and add pointers to unordered // sets. for (Function *DeadF : DeadFunctions) { // Get the necessary information out of the call graph and nuke the // function there. Also, cclear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG) .getManager(); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); auto &DeadRC = DeadC.getOuterRefSCC(); CG.removeDeadFunction(*DeadF); // Mark the relevant parts of the call graph as invalid so we don't visit // them. UR.InvalidatedSCCs.insert(&DeadC); UR.InvalidatedRefSCCs.insert(&DeadRC); // And delete the actual function from the module. M.getFunctionList().erase(DeadF); } if (!Changed) return PreservedAnalyses::all(); // Even if we change the IR, we update the core CGSCC data structures and so // can preserve the proxy to the function analysis manager. PreservedAnalyses PA; PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); return PA; }
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); StackProtector *SP = &getAnalysis<StackProtector>(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // Skew to be applied to alignment. unsigned Skew = TFI.getStackAlignmentSkew(Fn); // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI->getObjectSize(i); unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = RoundUpToAlignment(Offset, Align, Skew); MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = RoundUpToAlignment(Offset, Align, Skew); MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); } } unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); bool EarlyScavengingSlots = (TFI.hasFP(Fn) && TFI.isFPCloseToIncomingSP() && RegInfo->useFPForScavengingIndex(Fn) && !RegInfo->needsStackRealignment(Fn)); if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } // FIXME: Once this is working, then enable flag will change to a target // check for whether the frame is large enough to want to use virtual // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI->getUseLocalStackAllocationBlock()) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. Offset = RoundUpToAlignment(Offset, Align, Skew); DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset << "]\n"); MFI->setObjectOffset(Entry.first, FIOffset); } // Allocate the local block Offset += MFI->getLocalFrameSize(); MaxAlign = std::max(Align, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { StackObjSet LargeArrayObjs; StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign, Skew); // Assign large stack objects first. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { case StackProtector::SSPLK_None: continue; case StackProtector::SSPLK_SmallArray: SmallArrayObjs.insert(i); continue; case StackProtector::SSPLK_AddrOf: AddrOfObjs.insert(i); continue; case StackProtector::SSPLK_LargeArray: LargeArrayObjs.insert(i); continue; } llvm_unreachable("Unexpected SSPLayoutKind."); } AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); } // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; if (ProtectedObjs.count(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); } // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); Offset = RoundUpToAlignment(Offset, StackAlign, Skew); } // Update frame info to pretend that this is part of the stack... int64_t StackSize = Offset - LocalAreaOffset; MFI->setStackSize(StackSize); NumBytesStackSpace += StackSize; }
/// findLoopBackEdges - Do a DFS walk to find loop back edges. /// void CodeGenPrepare::findLoopBackEdges(const Function &F) { SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; FindFunctionBackedges(F, Edges); BackEdges.insert(Edges.begin(), Edges.end()); }
/// If there were any appending global variables, link them together now. Expected<Constant *> IRLinker::linkAppendingVarProto(GlobalVariable *DstGV, const GlobalVariable *SrcGV) { Type *EltTy = cast<ArrayType>(TypeMap.get(SrcGV->getValueType())) ->getElementType(); // FIXME: This upgrade is done during linking to support the C API. Once the // old form is deprecated, we should move this upgrade to // llvm::UpgradeGlobalVariable() and simplify the logic here and in // Mapper::mapAppendingVariable() in ValueMapper.cpp. StringRef Name = SrcGV->getName(); bool IsNewStructor = false; bool IsOldStructor = false; if (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") { if (cast<StructType>(EltTy)->getNumElements() == 3) IsNewStructor = true; else IsOldStructor = true; } PointerType *VoidPtrTy = Type::getInt8Ty(SrcGV->getContext())->getPointerTo(); if (IsOldStructor) { auto &ST = *cast<StructType>(EltTy); Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; EltTy = StructType::get(SrcGV->getContext(), Tys, false); } uint64_t DstNumElements = 0; if (DstGV) { ArrayType *DstTy = cast<ArrayType>(DstGV->getValueType()); DstNumElements = DstTy->getNumElements(); if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) return stringErr( "Linking globals named '" + SrcGV->getName() + "': can only link appending global with another appending " "global!"); // Check to see that they two arrays agree on type. if (EltTy != DstTy->getElementType()) return stringErr("Appending variables with different element types!"); if (DstGV->isConstant() != SrcGV->isConstant()) return stringErr("Appending variables linked with different const'ness!"); if (DstGV->getAlignment() != SrcGV->getAlignment()) return stringErr( "Appending variables with different alignment need to be linked!"); if (DstGV->getVisibility() != SrcGV->getVisibility()) return stringErr( "Appending variables with different visibility need to be linked!"); if (DstGV->hasGlobalUnnamedAddr() != SrcGV->hasGlobalUnnamedAddr()) return stringErr( "Appending variables with different unnamed_addr need to be linked!"); if (DstGV->getSection() != SrcGV->getSection()) return stringErr( "Appending variables with different section name need to be linked!"); } SmallVector<Constant *, 16> SrcElements; getArrayElements(SrcGV->getInitializer(), SrcElements); if (IsNewStructor) { auto It = remove_if(SrcElements, [this](Constant *E) { auto *Key = dyn_cast<GlobalValue>(E->getAggregateElement(2)->stripPointerCasts()); if (!Key) return false; GlobalValue *DGV = getLinkedToGlobal(Key); return !shouldLink(DGV, *Key); }); SrcElements.erase(It, SrcElements.end()); } uint64_t NewSize = DstNumElements + SrcElements.size(); ArrayType *NewType = ArrayType::get(EltTy, NewSize); // Create the new global variable. GlobalVariable *NG = new GlobalVariable( DstM, NewType, SrcGV->isConstant(), SrcGV->getLinkage(), /*init*/ nullptr, /*name*/ "", DstGV, SrcGV->getThreadLocalMode(), SrcGV->getType()->getAddressSpace()); NG->copyAttributesFrom(SrcGV); forceRenaming(NG, SrcGV->getName()); Constant *Ret = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType())); Mapper.scheduleMapAppendingVariable(*NG, DstGV ? DstGV->getInitializer() : nullptr, IsOldStructor, SrcElements); // Replace any uses of the two global variables with uses of the new // global. if (DstGV) { DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); DstGV->eraseFromParent(); } return Ret; }
/// Expand the arguments of a function-like macro so that we can quickly /// return preexpanded tokens from Tokens. void TokenLexer::ExpandFunctionArguments() { SmallVector<Token, 128> ResultToks; // Loop through 'Tokens', expanding them into ResultToks. Keep // track of whether we change anything. If not, no need to keep them. If so, // we install the newly expanded sequence as the new 'Tokens' list. bool MadeChange = false; const bool CalledWithVariadicArguments = ActualArgs->invokedWithVariadicArgument(Macro); VAOptExpansionContext VCtx(PP); for (unsigned I = 0, E = NumTokens; I != E; ++I) { const Token &CurTok = Tokens[I]; // We don't want a space for the next token after a paste // operator. In valid code, the token will get smooshed onto the // preceding one anyway. In assembler-with-cpp mode, invalid // pastes are allowed through: in this case, we do not want the // extra whitespace to be added. For example, we want ". ## foo" // -> ".foo" not ". foo". if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace()) NextTokGetsSpace = true; if (VCtx.isVAOptToken(CurTok)) { MadeChange = true; assert(Tokens[I + 1].is(tok::l_paren) && "__VA_OPT__ must be followed by '('"); ++I; // Skip the l_paren VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(), ResultToks.size()); continue; } // We have entered into the __VA_OPT__ context, so handle tokens // appropriately. if (VCtx.isInVAOpt()) { // If we are about to process a token that is either an argument to // __VA_OPT__ or its closing rparen, then: // 1) If the token is the closing rparen that exits us out of __VA_OPT__, // perform any necessary stringification or placemarker processing, // and/or skip to the next token. // 2) else if macro was invoked without variadic arguments skip this // token. // 3) else (macro was invoked with variadic arguments) process the token // normally. if (Tokens[I].is(tok::l_paren)) VCtx.sawOpeningParen(Tokens[I].getLocation()); // Continue skipping tokens within __VA_OPT__ if the macro was not // called with variadic arguments, else let the rest of the loop handle // this token. Note sawClosingParen() returns true only if the r_paren matches // the closing r_paren of the __VA_OPT__. if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) { if (!CalledWithVariadicArguments) { // Skip this token. continue; } // ... else the macro was called with variadic arguments, and we do not // have a closing rparen - so process this token normally. } else { // Current token is the closing r_paren which marks the end of the // __VA_OPT__ invocation, so handle any place-marker pasting (if // empty) by removing hashhash either before (if exists) or after. And // also stringify the entire contents if VAOPT was preceded by a hash, // but do so only after any token concatenation that needs to occur // within the contents of VAOPT. if (VCtx.hasStringifyOrCharifyBefore()) { // Replace all the tokens just added from within VAOPT into a single // stringified token. This requires token-pasting to eagerly occur // within these tokens. If either the contents of VAOPT were empty // or the macro wasn't called with any variadic arguments, the result // is a token that represents an empty string. stringifyVAOPTContents(ResultToks, VCtx, /*ClosingParenLoc*/ Tokens[I].getLocation()); } else if (/*No tokens within VAOPT*/ !( ResultToks.size() - VCtx.getNumberOfTokensPriorToVAOpt())) { // Treat VAOPT as a placemarker token. Eat either the '##' before the // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that // hashhash was not a placemarker) or the '##' // after VAOPT, but not both. if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) { ResultToks.pop_back(); } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) { ++I; // Skip the following hashhash. } } VCtx.reset(); // We processed __VA_OPT__'s closing paren (and the exit out of // __VA_OPT__), so skip to the next token. continue; } } // If we found the stringify operator, get the argument stringified. The // preprocessor already verified that the following token is a macro // parameter or __VA_OPT__ when the #define was lexed. if (CurTok.isOneOf(tok::hash, tok::hashat)) { int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo()); assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) && "Token following # is not an argument or __VA_OPT__!"); if (ArgNo == -1) { // Handle the __VA_OPT__ case. VCtx.sawHashOrHashAtBefore(NextTokGetsSpace, CurTok.is(tok::hashat)); continue; } // Else handle the simple argument case. SourceLocation ExpansionLocStart = getExpansionLocForMacroDefLoc(CurTok.getLocation()); SourceLocation ExpansionLocEnd = getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation()); Token Res; if (CurTok.is(tok::hash)) // Stringify Res = ActualArgs->getStringifiedArgument(ArgNo, PP, ExpansionLocStart, ExpansionLocEnd); else { // 'charify': don't bother caching these. Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), PP, true, ExpansionLocStart, ExpansionLocEnd); } Res.setFlag(Token::StringifiedInMacro); // The stringified/charified string leading space flag gets set to match // the #/#@ operator. if (NextTokGetsSpace) Res.setFlag(Token::LeadingSpace); ResultToks.push_back(Res); MadeChange = true; ++I; // Skip arg name. NextTokGetsSpace = false; continue; } // Find out if there is a paste (##) operator before or after the token. bool NonEmptyPasteBefore = !ResultToks.empty() && ResultToks.back().is(tok::hashhash); bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash); bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash); assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) && "unexpected ## in ResultToks"); // Otherwise, if this is not an argument token, just add the token to the // output buffer. IdentifierInfo *II = CurTok.getIdentifierInfo(); int ArgNo = II ? Macro->getParameterNum(II) : -1; if (ArgNo == -1) { // This isn't an argument, just add it. ResultToks.push_back(CurTok); if (NextTokGetsSpace) { ResultToks.back().setFlag(Token::LeadingSpace); NextTokGetsSpace = false; } else if (PasteBefore && !NonEmptyPasteBefore) ResultToks.back().clearFlag(Token::LeadingSpace); continue; } // An argument is expanded somehow, the result is different than the // input. MadeChange = true; // Otherwise, this is a use of the argument. // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there // are no trailing commas if __VA_ARGS__ is empty. if (!PasteBefore && ActualArgs->isVarargsElidedUse() && MaybeRemoveCommaBeforeVaArgs(ResultToks, /*HasPasteOperator=*/false, Macro, ArgNo, PP)) continue; // If it is not the LHS/RHS of a ## operator, we must pre-expand the // argument and substitute the expanded tokens into the result. This is // C99 6.10.3.1p1. if (!PasteBefore && !PasteAfter) { const Token *ResultArgToks; // Only preexpand the argument if it could possibly need it. This // avoids some work in common cases. const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; else ResultArgToks = ArgTok; // Use non-preexpanded tokens. // If the arg token expanded into anything, append it. if (ResultArgToks->isNot(tok::eof)) { size_t FirstResult = ResultToks.size(); unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); ResultToks.append(ResultArgToks, ResultArgToks+NumToks); // In Microsoft-compatibility mode, we follow MSVC's preprocessing // behavior by not considering single commas from nested macro // expansions as argument separators. Set a flag on the token so we can // test for this later when the macro expansion is processed. if (PP.getLangOpts().MSVCCompat && NumToks == 1 && ResultToks.back().is(tok::comma)) ResultToks.back().setFlag(Token::IgnoredComma); // If the '##' came from expanding an argument, turn it into 'unknown' // to avoid pasting. for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult, ResultToks.end())) { if (Tok.is(tok::hashhash)) Tok.setKind(tok::unknown); } if(ExpandLocStart.isValid()) { updateLocForMacroArgTokens(CurTok.getLocation(), ResultToks.begin()+FirstResult, ResultToks.end()); } // If any tokens were substituted from the argument, the whitespace // before the first token should match the whitespace of the arg // identifier. ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, NextTokGetsSpace); ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false); NextTokGetsSpace = false; } continue; } // Okay, we have a token that is either the LHS or RHS of a paste (##) // argument. It gets substituted as its non-pre-expanded tokens. const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); unsigned NumToks = MacroArgs::getArgLength(ArgToks); if (NumToks) { // Not an empty argument? bool VaArgsPseudoPaste = false; // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when // the expander tries to paste ',' with the first token of the __VA_ARGS__ // expansion. if (NonEmptyPasteBefore && ResultToks.size() >= 2 && ResultToks[ResultToks.size()-2].is(tok::comma) && (unsigned)ArgNo == Macro->getNumParams()-1 && Macro->isVariadic()) { VaArgsPseudoPaste = true; // Remove the paste operator, report use of the extension. PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma); } ResultToks.append(ArgToks, ArgToks+NumToks); // If the '##' came from expanding an argument, turn it into 'unknown' // to avoid pasting. for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks, ResultToks.end())) { if (Tok.is(tok::hashhash)) Tok.setKind(tok::unknown); } if (ExpandLocStart.isValid()) { updateLocForMacroArgTokens(CurTok.getLocation(), ResultToks.end()-NumToks, ResultToks.end()); } // Transfer the leading whitespace information from the token // (the macro argument) onto the first token of the // expansion. Note that we don't do this for the GNU // pseudo-paste extension ", ## __VA_ARGS__". if (!VaArgsPseudoPaste) { ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine, false); ResultToks[ResultToks.size() - NumToks].setFlagValue( Token::LeadingSpace, NextTokGetsSpace); } NextTokGetsSpace = false; continue; } // If an empty argument is on the LHS or RHS of a paste, the standard (C99 // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We // implement this by eating ## operators when a LHS or RHS expands to // empty. if (PasteAfter) { // Discard the argument token and skip (don't copy to the expansion // buffer) the paste operator after it. ++I; continue; } // If this is on the RHS of a paste operator, we've already copied the // paste operator to the ResultToks list, unless the LHS was empty too. // Remove it. assert(PasteBefore); if (NonEmptyPasteBefore) { assert(ResultToks.back().is(tok::hashhash)); // Do not remove the paste operator if it is the one before __VA_OPT__ // (and we are still processing tokens within VA_OPT). We handle the case // of removing the paste operator if __VA_OPT__ reduces to the notional // placemarker above when we encounter the closing paren of VA_OPT. if (!VCtx.isInVAOpt() || ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt()) ResultToks.pop_back(); } // If this is the __VA_ARGS__ token, and if the argument wasn't provided, // and if the macro had at least one real argument, and if the token before // the ## was a comma, remove the comma. This is a GCC extension which is // disabled when using -std=c99. if (ActualArgs->isVarargsElidedUse()) MaybeRemoveCommaBeforeVaArgs(ResultToks, /*HasPasteOperator=*/true, Macro, ArgNo, PP); } // If anything changed, install this as the new Tokens list. if (MadeChange) { assert(!OwnsTokens && "This would leak if we already own the token list"); // This is deleted in the dtor. NumTokens = ResultToks.size(); // The tokens will be added to Preprocessor's cache and will be removed // when this TokenLexer finishes lexing them. Tokens = PP.cacheMacroExpandedTokens(this, ResultToks); // The preprocessor cache of macro expanded tokens owns these tokens,not us. OwnsTokens = false; } }
/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop /// information, and remove any dead successors it has. /// void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, std::vector<Instruction*> &Worklist, Loop *L) { if (pred_begin(BB) != pred_end(BB)) { // This block isn't dead, since an edge to BB was just removed, see if there // are any easy simplifications we can do now. if (BasicBlock *Pred = BB->getSinglePredecessor()) { // If it has one pred, fold phi nodes in BB. while (isa<PHINode>(BB->begin())) ReplaceUsesOfWith(BB->begin(), cast<PHINode>(BB->begin())->getIncomingValue(0), Worklist, L, LPM); // If this is the header of a loop and the only pred is the latch, we now // have an unreachable loop. if (Loop *L = LI->getLoopFor(BB)) if (loopHeader == BB && L->contains(Pred)) { // Remove the branch from the latch to the header block, this makes // the header dead, which will make the latch dead (because the header // dominates the latch). LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L); Pred->getTerminator()->eraseFromParent(); new UnreachableInst(BB->getContext(), Pred); // The loop is now broken, remove it from LI. RemoveLoopFromHierarchy(L); // Reprocess the header, which now IS dead. RemoveBlockIfDead(BB, Worklist, L); return; } // If pred ends in a uncond branch, add uncond branch to worklist so that // the two blocks will get merged. if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) if (BI->isUnconditional()) Worklist.push_back(BI); } return; } DEBUG(dbgs() << "Nuking dead block: " << *BB); // Remove the instructions in the basic block from the worklist. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { RemoveFromWorklist(I, Worklist); // Anything that uses the instructions in this basic block should have their // uses replaced with undefs. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!I->getType()->isVoidTy()) I->replaceAllUsesWith(UndefValue::get(I->getType())); } // If this is the edge to the header block for a loop, remove the loop and // promote all subloops. if (Loop *BBLoop = LI->getLoopFor(BB)) { if (BBLoop->getLoopLatch() == BB) { RemoveLoopFromHierarchy(BBLoop); if (currentLoop == BBLoop) { currentLoop = 0; redoLoop = false; } } } // Remove the block from the loop info, which removes it from any loops it // was in. LI->removeBlock(BB); // Remove phi node entries in successors for this block. TerminatorInst *TI = BB->getTerminator(); SmallVector<BasicBlock*, 4> Succs; for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { Succs.push_back(TI->getSuccessor(i)); TI->getSuccessor(i)->removePredecessor(BB); } // Unique the successors, remove anything with multiple uses. array_pod_sort(Succs.begin(), Succs.end()); Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end()); // Remove the basic block, including all of the instructions contained in it. LPM->deleteSimpleAnalysisValue(BB, L); BB->eraseFromParent(); // Remove successor blocks here that are not dead, so that we know we only // have dead blocks in this list. Nondead blocks have a way of becoming dead, // then getting removed before we revisit them, which is badness. // for (unsigned i = 0; i != Succs.size(); ++i) if (pred_begin(Succs[i]) != pred_end(Succs[i])) { // One exception is loop headers. If this block was the preheader for a // loop, then we DO want to visit the loop so the loop gets deleted. // We know that if the successor is a loop header, that this loop had to // be the preheader: the case where this was the latch block was handled // above and headers can only have two predecessors. if (!LI->isLoopHeader(Succs[i])) { Succs.erase(Succs.begin()+i); --i; } } for (unsigned i = 0, e = Succs.size(); i != e; ++i) RemoveBlockIfDead(Succs[i], Worklist, L); }
void SelectionDAGBuilder::LowerStatepoint( ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG // Consistency check for (const User *U : CS->users()) { const CallInst *Call = cast<CallInst>(U); if (isGCRelocate(Call)) StatepointLowering.scheduleRelocCall(*Call); } #endif #ifndef NDEBUG // If this is a malformed statepoint, report it early to simplify debugging. // This should catch any IR level mistake that's made when constructing or // transforming statepoints. ISP.verify(); // Check that the associated GCStrategy expects to encounter statepoints. assert(GFI->getStrategy().useStatepoints() && "GCStrategy does not expect to encounter statepoints"); #endif // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredMetaArgs; lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this); // Get call node, we will replace it later with statepoint SDNode *CallNode = lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. // Call Node: Chain, Target, {Args}, RegMask, [Glue] SDValue Chain = CallNode->getOperand(0); SDValue Glue; bool CallHasIncomingGlue = CallNode->getGluedNode(); if (CallHasIncomingGlue) { // Glue is always last operand Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); } // Build the GC_TRANSITION_START node if necessary. // // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the // order in which they appear in the call to the statepoint intrinsic. If // any of the operands is a pointer-typed, that operand is immediately // followed by a SRCVALUE for the pointer that may be used during lowering // (e.g. to form MachinePointerInfo values for loads/stores). const bool IsGCTransition = (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) == (uint64_t)StatepointFlags::GCTransition; if (IsGCTransition) { SmallVector<SDValue, 8> TSOps; // Add chain TSOps.push_back(Chain); // Add GC transition arguments for (const Value *V : ISP.gc_transition_args()) { TSOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TSOps.push_back(DAG.getSrcValue(V)); } // Add glue if necessary if (CallHasIncomingGlue) TSOps.push_back(Glue); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue GCTransitionStart = DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps); Chain = GCTransitionStart.getValue(0); Glue = GCTransitionStart.getValue(1); } // TODO: Currently, all of these operands are being marked as read/write in // PrologEpilougeInserter.cpp, we should special case the VMState arguments // and flags to be read-only. SmallVector<SDValue, 40> Ops; // Add the <id> and <numBytes> constants. Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64)); Ops.push_back( DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32)); // Calculate and push starting position of vmstate arguments // Get number of arguments incoming directly into call node unsigned NumCallRegArgs = CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3); Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32)); // Add call target SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); Ops.push_back(CallTarget); // Add call arguments // Get position of register mask in the call SDNode::op_iterator RegMaskIt; if (CallHasIncomingGlue) RegMaskIt = CallNode->op_end() - 2; else RegMaskIt = CallNode->op_end() - 1; Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); // Add a constant argument for the calling convention pushStackMapConstant(Ops, *this, CS.getCallingConv()); // Add a constant argument for the flags uint64_t Flags = ISP.getFlags(); assert( ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) && "unknown flag used"); pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); // Add register mask from call node Ops.push_back(*RegMaskIt); // Add chain Ops.push_back(Chain); // Same for the glue, but we add it only if original call had it if (Glue.getNode()) Ops.push_back(Glue); // Compute return values. Provide a glue output since we consume one as // input. This allows someone else to chain off us as needed. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); SDNode *SinkNode = StatepointMCNode; // Build the GC_TRANSITION_END node if necessary. // // See the comment above regarding GC_TRANSITION_START for the layout of // the operands to the GC_TRANSITION_END node. if (IsGCTransition) { SmallVector<SDValue, 8> TEOps; // Add chain TEOps.push_back(SDValue(StatepointMCNode, 0)); // Add GC transition arguments for (const Value *V : ISP.gc_transition_args()) { TEOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TEOps.push_back(DAG.getSrcValue(V)); } // Add glue TEOps.push_back(SDValue(StatepointMCNode, 1)); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue GCTransitionStart = DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps); SinkNode = GCTransitionStart.getNode(); } // Replace original call DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root // Remove originall call node DAG.DeleteNode(CallNode); // DON'T set the root - under the assumption that it's already set past the // inserted node we created. // TODO: A better future implementation would be to emit a single variable // argument, variable return value STATEPOINT node here and then hookup the // return value of each gc.relocate to the respective output of the // previously emitted STATEPOINT value. Unfortunately, this doesn't appear // to actually be possible today. }
CXCursor cxcursor::MakeCXCursor(Stmt *S, Decl *Parent, CXTranslationUnit TU, SourceRange RegionOfInterest) { assert(S && TU && "Invalid arguments!"); CXCursorKind K = CXCursor_NotImplemented; switch (S->getStmtClass()) { case Stmt::NoStmtClass: break; case Stmt::CaseStmtClass: K = CXCursor_CaseStmt; break; case Stmt::DefaultStmtClass: K = CXCursor_DefaultStmt; break; case Stmt::IfStmtClass: K = CXCursor_IfStmt; break; case Stmt::SwitchStmtClass: K = CXCursor_SwitchStmt; break; case Stmt::WhileStmtClass: K = CXCursor_WhileStmt; break; case Stmt::DoStmtClass: K = CXCursor_DoStmt; break; case Stmt::ForStmtClass: K = CXCursor_ForStmt; break; case Stmt::GotoStmtClass: K = CXCursor_GotoStmt; break; case Stmt::IndirectGotoStmtClass: K = CXCursor_IndirectGotoStmt; break; case Stmt::ContinueStmtClass: K = CXCursor_ContinueStmt; break; case Stmt::BreakStmtClass: K = CXCursor_BreakStmt; break; case Stmt::ReturnStmtClass: K = CXCursor_ReturnStmt; break; case Stmt::AsmStmtClass: K = CXCursor_AsmStmt; break; case Stmt::ObjCAtTryStmtClass: K = CXCursor_ObjCAtTryStmt; break; case Stmt::ObjCAtCatchStmtClass: K = CXCursor_ObjCAtCatchStmt; break; case Stmt::ObjCAtFinallyStmtClass: K = CXCursor_ObjCAtFinallyStmt; break; case Stmt::ObjCAtThrowStmtClass: K = CXCursor_ObjCAtThrowStmt; break; case Stmt::ObjCAtSynchronizedStmtClass: K = CXCursor_ObjCAtSynchronizedStmt; break; case Stmt::ObjCAutoreleasePoolStmtClass: K = CXCursor_ObjCAutoreleasePoolStmt; break; case Stmt::ObjCForCollectionStmtClass: K = CXCursor_ObjCForCollectionStmt; break; case Stmt::CXXCatchStmtClass: K = CXCursor_CXXCatchStmt; break; case Stmt::CXXTryStmtClass: K = CXCursor_CXXTryStmt; break; case Stmt::CXXForRangeStmtClass: K = CXCursor_CXXForRangeStmt; break; case Stmt::SEHTryStmtClass: K = CXCursor_SEHTryStmt; break; case Stmt::SEHExceptStmtClass: K = CXCursor_SEHExceptStmt; break; case Stmt::SEHFinallyStmtClass: K = CXCursor_SEHFinallyStmt; break; case Stmt::ArrayTypeTraitExprClass: case Stmt::AsTypeExprClass: case Stmt::AtomicExprClass: case Stmt::BinaryConditionalOperatorClass: case Stmt::BinaryTypeTraitExprClass: case Stmt::CXXBindTemporaryExprClass: case Stmt::CXXDefaultArgExprClass: case Stmt::CXXScalarValueInitExprClass: case Stmt::CXXUuidofExprClass: case Stmt::ChooseExprClass: case Stmt::DesignatedInitExprClass: case Stmt::ExprWithCleanupsClass: case Stmt::ExpressionTraitExprClass: case Stmt::ExtVectorElementExprClass: case Stmt::ImplicitCastExprClass: case Stmt::ImplicitValueInitExprClass: case Stmt::MaterializeTemporaryExprClass: case Stmt::ObjCIndirectCopyRestoreExprClass: case Stmt::OffsetOfExprClass: case Stmt::ParenListExprClass: case Stmt::PredefinedExprClass: case Stmt::ShuffleVectorExprClass: case Stmt::UnaryExprOrTypeTraitExprClass: case Stmt::UnaryTypeTraitExprClass: case Stmt::VAArgExprClass: K = CXCursor_UnexposedExpr; break; case Stmt::OpaqueValueExprClass: if (Expr *Src = cast<OpaqueValueExpr>(S)->getSourceExpr()) return MakeCXCursor(Src, Parent, TU, RegionOfInterest); K = CXCursor_UnexposedExpr; break; case Stmt::PseudoObjectExprClass: return MakeCXCursor(cast<PseudoObjectExpr>(S)->getSyntacticForm(), Parent, TU, RegionOfInterest); case Stmt::CompoundStmtClass: K = CXCursor_CompoundStmt; break; case Stmt::NullStmtClass: K = CXCursor_NullStmt; break; case Stmt::LabelStmtClass: K = CXCursor_LabelStmt; break; case Stmt::DeclStmtClass: K = CXCursor_DeclStmt; break; case Stmt::IntegerLiteralClass: K = CXCursor_IntegerLiteral; break; case Stmt::FloatingLiteralClass: K = CXCursor_FloatingLiteral; break; case Stmt::ImaginaryLiteralClass: K = CXCursor_ImaginaryLiteral; break; case Stmt::StringLiteralClass: K = CXCursor_StringLiteral; break; case Stmt::CharacterLiteralClass: K = CXCursor_CharacterLiteral; break; case Stmt::ParenExprClass: K = CXCursor_ParenExpr; break; case Stmt::UnaryOperatorClass: K = CXCursor_UnaryOperator; break; case Stmt::CXXNoexceptExprClass: K = CXCursor_UnaryExpr; break; case Stmt::ArraySubscriptExprClass: K = CXCursor_ArraySubscriptExpr; break; case Stmt::BinaryOperatorClass: K = CXCursor_BinaryOperator; break; case Stmt::CompoundAssignOperatorClass: K = CXCursor_CompoundAssignOperator; break; case Stmt::ConditionalOperatorClass: K = CXCursor_ConditionalOperator; break; case Stmt::CStyleCastExprClass: K = CXCursor_CStyleCastExpr; break; case Stmt::CompoundLiteralExprClass: K = CXCursor_CompoundLiteralExpr; break; case Stmt::InitListExprClass: K = CXCursor_InitListExpr; break; case Stmt::AddrLabelExprClass: K = CXCursor_AddrLabelExpr; break; case Stmt::StmtExprClass: K = CXCursor_StmtExpr; break; case Stmt::GenericSelectionExprClass: K = CXCursor_GenericSelectionExpr; break; case Stmt::GNUNullExprClass: K = CXCursor_GNUNullExpr; break; case Stmt::CXXStaticCastExprClass: K = CXCursor_CXXStaticCastExpr; break; case Stmt::CXXDynamicCastExprClass: K = CXCursor_CXXDynamicCastExpr; break; case Stmt::CXXReinterpretCastExprClass: K = CXCursor_CXXReinterpretCastExpr; break; case Stmt::CXXConstCastExprClass: K = CXCursor_CXXConstCastExpr; break; case Stmt::CXXFunctionalCastExprClass: K = CXCursor_CXXFunctionalCastExpr; break; case Stmt::CXXTypeidExprClass: K = CXCursor_CXXTypeidExpr; break; case Stmt::CXXBoolLiteralExprClass: K = CXCursor_CXXBoolLiteralExpr; break; case Stmt::CXXNullPtrLiteralExprClass: K = CXCursor_CXXNullPtrLiteralExpr; break; case Stmt::CXXThisExprClass: K = CXCursor_CXXThisExpr; break; case Stmt::CXXThrowExprClass: K = CXCursor_CXXThrowExpr; break; case Stmt::CXXNewExprClass: K = CXCursor_CXXNewExpr; break; case Stmt::CXXDeleteExprClass: K = CXCursor_CXXDeleteExpr; break; case Stmt::ObjCStringLiteralClass: K = CXCursor_ObjCStringLiteral; break; case Stmt::ObjCEncodeExprClass: K = CXCursor_ObjCEncodeExpr; break; case Stmt::ObjCSelectorExprClass: K = CXCursor_ObjCSelectorExpr; break; case Stmt::ObjCProtocolExprClass: K = CXCursor_ObjCProtocolExpr; break; case Stmt::ObjCBridgedCastExprClass: K = CXCursor_ObjCBridgedCastExpr; break; case Stmt::BlockExprClass: K = CXCursor_BlockExpr; break; case Stmt::PackExpansionExprClass: K = CXCursor_PackExpansionExpr; break; case Stmt::SizeOfPackExprClass: K = CXCursor_SizeOfPackExpr; break; case Stmt::BlockDeclRefExprClass: case Stmt::DeclRefExprClass: case Stmt::DependentScopeDeclRefExprClass: case Stmt::SubstNonTypeTemplateParmExprClass: case Stmt::SubstNonTypeTemplateParmPackExprClass: case Stmt::UnresolvedLookupExprClass: K = CXCursor_DeclRefExpr; break; case Stmt::CXXDependentScopeMemberExprClass: case Stmt::CXXPseudoDestructorExprClass: case Stmt::MemberExprClass: case Stmt::ObjCIsaExprClass: case Stmt::ObjCIvarRefExprClass: case Stmt::ObjCPropertyRefExprClass: case Stmt::UnresolvedMemberExprClass: K = CXCursor_MemberRefExpr; break; case Stmt::CallExprClass: case Stmt::CXXOperatorCallExprClass: case Stmt::CXXMemberCallExprClass: case Stmt::CUDAKernelCallExprClass: case Stmt::CXXConstructExprClass: case Stmt::CXXTemporaryObjectExprClass: case Stmt::CXXUnresolvedConstructExprClass: K = CXCursor_CallExpr; break; case Stmt::ObjCMessageExprClass: { K = CXCursor_ObjCMessageExpr; int SelectorIdIndex = -1; // Check if cursor points to a selector id. if (RegionOfInterest.isValid() && RegionOfInterest.getBegin() == RegionOfInterest.getEnd()) { SmallVector<SourceLocation, 16> SelLocs; cast<ObjCMessageExpr>(S)->getSelectorLocs(SelLocs); SmallVector<SourceLocation, 16>::iterator I=std::find(SelLocs.begin(), SelLocs.end(),RegionOfInterest.getBegin()); if (I != SelLocs.end()) SelectorIdIndex = I - SelLocs.begin(); } CXCursor C = { K, 0, { Parent, S, TU } }; return getSelectorIdentifierCursor(SelectorIdIndex, C); } case Stmt::MSDependentExistsStmtClass: K = CXCursor_UnexposedStmt; break; } CXCursor C = { K, 0, { Parent, S, TU } }; return C; }
// recurseBasicBlock() - This calculates the ProfileInfo estimation for a // single block and then recurses into the successors. // The algorithm preserves the flow condition, meaning that the sum of the // weight of the incoming edges must be equal the block weight which must in // turn be equal to the sume of the weights of the outgoing edges. // Since the flow of an block is deterimined from the current state of the // flow, once an edge has a flow assigned this flow is never changed again, // otherwise it would be possible to violate the flow condition in another // block. void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { // Break the recursion if this BasicBlock was already visited. if (BBToVisit.find(BB) == BBToVisit.end()) return; // Read the LoopInfo for this block. bool BBisHeader = LI->isLoopHeader(BB); Loop* BBLoop = LI->getLoopFor(BB); // To get the block weight, read all incoming edges. double BBWeight = 0; std::set<BasicBlock*> ProcessedPreds; for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); bbi != bbe; ++bbi ) { // If this block was not considered already, add weight. Edge edge = getEdge(*bbi,BB); double w = getEdgeWeight(edge); if (ProcessedPreds.insert(*bbi).second) { BBWeight += ignoreMissing(w); } // If this block is a loop header and the predecessor is contained in this // loop, thus the edge is a backedge, continue and do not check if the // value is valid. if (BBisHeader && BBLoop->contains(*bbi)) { printEdgeError(edge, "but is backedge, continuing"); continue; } // If the edges value is missing (and this is no loop header, and this is // no backedge) return, this block is currently non estimatable. if (w == MissingValue) { printEdgeError(edge, "returning"); return; } } if (getExecutionCount(BB) != MissingValue) { BBWeight = getExecutionCount(BB); } // Fetch all necessary information for current block. SmallVector<Edge, 8> ExitEdges; SmallVector<Edge, 8> Edges; if (BBLoop) { BBLoop->getExitEdges(ExitEdges); } // If this is a loop header, consider the following: // Exactly the flow that is entering this block, must exit this block too. So // do the following: // *) get all the exit edges, read the flow that is already leaving this // loop, remember the edges that do not have any flow on them right now. // (The edges that have already flow on them are most likely exiting edges of // other loops, do not touch those flows because the previously caclulated // loopheaders would not be exact anymore.) // *) In case there is not a single exiting edge left, create one at the loop // latch to prevent the flow from building up in the loop. // *) Take the flow that is not leaving the loop already and distribute it on // the remaining exiting edges. // (This ensures that all flow that enters the loop also leaves it.) // *) Increase the flow into the loop by increasing the weight of this block. // There is at least one incoming backedge that will bring us this flow later // on. (So that the flow condition in this node is valid again.) if (BBisHeader) { double incoming = BBWeight; // Subtract the flow leaving the loop. std::set<Edge> ProcessedExits; for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(), ee = ExitEdges.end(); ei != ee; ++ei) { if (ProcessedExits.insert(*ei).second) { double w = getEdgeWeight(*ei); if (w == MissingValue) { Edges.push_back(*ei); // Check if there is a necessary minimal weight, if yes, subtract it // from weight. if (MinimalWeight.find(*ei) != MinimalWeight.end()) { incoming -= MinimalWeight[*ei]; DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); } } else { incoming -= w; } } } // If no exit edges, create one: if (Edges.size() == 0) { BasicBlock *Latch = BBLoop->getLoopLatch(); if (Latch) { Edge edge = getEdge(Latch,0); EdgeInformation[BB->getParent()][edge] = BBWeight; printEdgeWeight(edge); edge = getEdge(Latch, BB); EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount; printEdgeWeight(edge); } } // Distribute remaining weight to the exting edges. To prevent fractions // from building up and provoking precision problems the weight which is to // be distributed is split and the rounded, the last edge gets a somewhat // bigger value, but we are close enough for an estimation. double fraction = floor(incoming/Edges.size()); for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); ei != ee; ++ei) { double w = 0; if (ei != (ee-1)) { w = fraction; incoming -= fraction; } else { w = incoming; } EdgeInformation[BB->getParent()][*ei] += w; // Read necessary minimal weight. if (MinimalWeight.find(*ei) != MinimalWeight.end()) { EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); } printEdgeWeight(*ei); // Add minimal weight to paths to all exit edges, this is used to ensure // that enough flow is reaching this edges. Path p; const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest); while (Dest != BB) { const BasicBlock *Parent = p.find(Dest)->second; Edge e = getEdge(Parent, Dest); if (MinimalWeight.find(e) == MinimalWeight.end()) { MinimalWeight[e] = 0; } MinimalWeight[e] += w; DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n"); Dest = Parent; } } // Increase flow into the loop. BBWeight *= (ExecCount+1); } BlockInformation[BB->getParent()][BB] = BBWeight; // Up until now we considered only the loop exiting edges, now we have a // definite block weight and must distribute this onto the outgoing edges. // Since there may be already flow attached to some of the edges, read this // flow first and remember the edges that have still now flow attached. Edges.clear(); std::set<BasicBlock*> ProcessedSuccs; succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); // Also check for (BB,0) edges that may already contain some flow. (But only // in case there are no successors.) if (bbi == bbe) { Edge edge = getEdge(BB,0); EdgeInformation[BB->getParent()][edge] = BBWeight; printEdgeWeight(edge); } for ( ; bbi != bbe; ++bbi ) { if (ProcessedSuccs.insert(*bbi).second) { Edge edge = getEdge(BB,*bbi); double w = getEdgeWeight(edge); if (w != MissingValue) { BBWeight -= getEdgeWeight(edge); } else { Edges.push_back(edge); // If minimal weight is necessary, reserve weight by subtracting weight // from block weight, this is readded later on. if (MinimalWeight.find(edge) != MinimalWeight.end()) { BBWeight -= MinimalWeight[edge]; DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n"); } } } } double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0; // Finally we know what flow is still not leaving the block, distribute this // flow onto the empty edges. for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); ei != ee; ++ei) { if (ei != (ee-1)) { EdgeInformation[BB->getParent()][*ei] += fraction; BBWeight -= fraction; } else { EdgeInformation[BB->getParent()][*ei] += BBWeight; } // Readd minial necessary weight. if (MinimalWeight.find(*ei) != MinimalWeight.end()) { EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); } printEdgeWeight(*ei); } // This block is visited, mark this before the recursion. BBToVisit.erase(BB); // Recurse into successors. for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); bbi != bbe; ++bbi) { recurseBasicBlock(*bbi); } }
/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing /// mode of the machine to fold the specified instruction into a load or store /// that ultimately uses it. However, the specified instruction has multiple /// uses. Given this, it may actually increase register pressure to fold it /// into the load. For example, consider this code: /// /// X = ... /// Y = X+1 /// use(Y) -> nonload/store /// Z = Y+1 /// load Z /// /// In this case, Y has multiple uses, and can be folded into the load of Z /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to /// be live at the use(Y) line. If we don't fold Y into load Z, we use one /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the /// number of computations either. /// /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If /// X was live across 'load Z' for other reasons, we actually *would* want to /// fold the addressing mode in the Z case. This would make Y die earlier. bool AddressingModeMatcher:: IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) { if (IgnoreProfitability) return true; // AMBefore is the addressing mode before this instruction was folded into it, // and AMAfter is the addressing mode after the instruction was folded. Get // the set of registers referenced by AMAfter and subtract out those // referenced by AMBefore: this is the set of values which folding in this // address extends the lifetime of. // // Note that there are only two potential values being referenced here, // BaseReg and ScaleReg (global addresses are always available, as are any // folded immediates). Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; // If the BaseReg or ScaledReg was referenced by the previous addrmode, their // lifetime wasn't extended by adding this instruction. if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) BaseReg = 0; if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) ScaledReg = 0; // If folding this instruction (and it's subexprs) didn't extend any live // ranges, we're ok with it. if (BaseReg == 0 && ScaledReg == 0) return true; // If all uses of this instruction are ultimately load/store/inlineasm's, // check to see if their addressing modes will include this instruction. If // so, we can fold it into all uses, so it doesn't matter if it has multiple // uses. SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; SmallPtrSet<Instruction*, 16> ConsideredInsts; if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of // computation involving only operations that could theoretically be folded // into a memory use, loop over each of these uses and see if they could // *actually* fold the instruction. SmallVector<Instruction*, 32> MatchedAddrModeInsts; for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { Instruction *User = MemoryUses[i].first; unsigned OpNo = MemoryUses[i].second; // Get the access type of this use. If the use isn't a pointer, we don't // know what it accesses. Value *Address = User->getOperand(OpNo); if (!Address->getType()->isPointerTy()) return false; Type *AddressAccessTy = cast<PointerType>(Address->getType())->getElementType(); // Do a match against the root of this address, ignoring profitability. This // will tell us if the addressing mode for the memory operation will // *actually* cover the shared instruction. ExtAddrMode Result; AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, MemoryInst, Result); Matcher.IgnoreProfitability = true; bool Success = Matcher.MatchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); // If the match didn't cover I, then it won't be shared by it. if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), I) == MatchedAddrModeInsts.end()) return false; MatchedAddrModeInsts.clear(); } return true; }
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the /// result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; if (MRI->hasOneNonDBGUse(SrcReg)) // No other uses. return false; // Ensure DstReg can get a register class that actually supports // sub-registers. Don't change the class until we commit. const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); if (!DstRC) return false; // The ext instr may be operating on a sub-register of SrcReg as well. // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit // register. // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of // SrcReg:SubIdx should be replaced. bool UseSrcSubIdx = TM->getRegisterInfo()-> getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); // Uses that are in the same BB of uses of the result of the instruction. SmallVector<MachineOperand*, 8> Uses; // Uses that the result of the instruction can reach. SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; if (UseMI == MI) continue; if (UseMI->isPHI()) { ExtendLife = false; continue; } // Only accept uses of SrcReg:SubIdx. if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) continue; // It's an error to translate this: // // %reg1025 = <sext> %reg1024 // ... // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 // // into this: // // %reg1025 = <sext> %reg1024 // ... // %reg1027 = COPY %reg1025:4 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 // // The problem here is that SUBREG_TO_REG is there to assert that an // implicit zext occurs. It doesn't insert a zext instruction. If we allow // the COPY here, it will give us the value after the <sext>, not the // original value of %reg1024 before <sext>. if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) continue; MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. if (!LocalMIs.count(UseMI)) Uses.push_back(&UseMO); } else if (ReachedBBs.count(UseMBB)) { // Non-local uses where the result of the extension is used. Always // replace these unless it's a PHI. Uses.push_back(&UseMO); } else if (Aggressive && DT->dominates(MBB, UseMBB)) { // We may want to extend the live range of the extension result in order // to replace these uses. ExtendedUses.push_back(&UseMO); } else { // Both will be live out of the def MBB anyway. Don't extend live range of // the extension result. ExtendLife = false; break; } } if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. std::copy(ExtendedUses.begin(), ExtendedUses.end(), std::back_inserter(Uses)); // Now replace all uses. bool Changed = false; if (!Uses.empty()) { SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); for (unsigned i = 0, e = Uses.size(); i != e; ++i) { MachineOperand *UseMO = Uses[i]; MachineInstr *UseMI = UseMO->getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (PHIBBs.count(UseMBB)) continue; // About to add uses of DstReg, clear DstReg's kill flags. if (!Changed) { MRI->clearKillFlags(DstReg); MRI->constrainRegClass(DstReg, DstRC); } unsigned NewVR = MRI->createVirtualRegister(RC); MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. if (UseSrcSubIdx) { Copy->getOperand(0).setSubReg(SubIdx); Copy->getOperand(0).setIsUndef(); } UseMO->setReg(NewVR); ++NumReuse; Changed = true; } } return Changed; }
bool AndroidBitcodeLinker::LinkInArchive(AndroidBitcodeItem &Item) { const StringRef &Filename = Item.getFile(); verbose("Linking archive file '" + Filename.str() + "'"); std::set<std::string> UndefinedSymbols; std::set<std::string> DefinedSymbols; GetAllSymbols(linker->getModule(), UndefinedSymbols, DefinedSymbols); // Update list set_union(UndefinedSymbols, GlobalUndefinedSymbols); set_union(DefinedSymbols, GlobalDefinedSymbols); set_subtract(UndefinedSymbols, DefinedSymbols); if (UndefinedSymbols.empty()) { verbose("No symbols undefined, skipping library '" + Filename.str() + "'"); return false; // No need to link anything in! } std::string ErrMsg; std::unique_ptr<Archive> AutoArch( Archive::OpenAndLoadSymbols(Filename, Config.getContext(), &ErrMsg)); Archive* arch = AutoArch.get(); // possible empty archive? if (!arch) { return false; } if (!arch->isBitcodeArchive()) { Item.setNative(true); if (Config.isLinkNativeBinary()) { return false; } else { return error("Cannot link native binaries with bitcode" + Filename.str()); } } std::set<std::string> NotDefinedByArchive; std::set<std::string> CurrentlyUndefinedSymbols; do { CurrentlyUndefinedSymbols = UndefinedSymbols; SmallVector<Module*, 16> Modules; if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg)) return error("Cannot find symbols in '" + Filename.str() + "': " + ErrMsg); if (Modules.empty()) break; NotDefinedByArchive.insert(UndefinedSymbols.begin(), UndefinedSymbols.end()); for (SmallVectorImpl<Module*>::iterator I=Modules.begin(), E=Modules.end(); I != E; ++I) { Module* aModule = *I; if (aModule != NULL) { if (std::error_code ec = aModule->materializeAll()) return error("Could not load a module: " + ec.message()); verbose(" Linking in module: " + aModule->getModuleIdentifier()); // Link it in if (linker->linkInModule(aModule)) return error("Cannot link in module '" + aModule->getModuleIdentifier() + "'"); } } GetAllSymbols(linker->getModule(), UndefinedSymbols, DefinedSymbols); set_subtract(UndefinedSymbols, NotDefinedByArchive); if (UndefinedSymbols.empty()) break; } while (CurrentlyUndefinedSymbols != UndefinedSymbols); return false; }
// This is the marker algorithm from "Simple and Efficient Construction of // Static Single Assignment Form" // The simple, non-marker algorithm places phi nodes at any join // Here, we place markers, and only place phi nodes if they end up necessary. // They are only necessary if they break a cycle (IE we recursively visit // ourselves again), or we discover, while getting the value of the operands, // that there are two or more definitions needing to be merged. // This still will leave non-minimal form in the case of irreducible control // flow, where phi nodes may be in cycles with themselves, but unnecessary. MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( BasicBlock *BB, DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) { // First, do a cache lookup. Without this cache, certain CFG structures // (like a series of if statements) take exponential time to visit. auto Cached = CachedPreviousDef.find(BB); if (Cached != CachedPreviousDef.end()) { return Cached->second; } if (BasicBlock *Pred = BB->getSinglePredecessor()) { // Single predecessor case, just recurse, we can only have one definition. MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); CachedPreviousDef.insert({BB, Result}); return Result; } if (VisitedBlocks.count(BB)) { // We hit our node again, meaning we had a cycle, we must insert a phi // node to break it so we have an operand. The only case this will // insert useless phis is if we have irreducible control flow. MemoryAccess *Result = MSSA->createMemoryPhi(BB); CachedPreviousDef.insert({BB, Result}); return Result; } if (VisitedBlocks.insert(BB).second) { // Mark us visited so we can detect a cycle SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps; // Recurse to get the values in our predecessors for placement of a // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. for (auto *Pred : predecessors(BB)) PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB)); // See if we can avoid the phi by simplifying it. auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); // If we couldn't simplify, we may have to create a phi if (Result == Phi) { if (!Phi) Phi = MSSA->createMemoryPhi(BB); // See if the existing phi operands match what we need. // Unlike normal SSA, we only allow one phi node per block, so we can't just // create a new one. if (Phi->getNumOperands() != 0) { // FIXME: Figure out whether this is dead code and if so remove it. if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) { // These will have been filled in by the recursive read we did above. std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin()); std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin()); } } else { unsigned i = 0; for (auto *Pred : predecessors(BB)) Phi->addIncoming(&*PhiOps[i++], Pred); InsertedPHIs.push_back(Phi); } Result = Phi; } // Set ourselves up for the next variable by resetting visited state. VisitedBlocks.erase(BB); CachedPreviousDef.insert({BB, Result}); return Result; } llvm_unreachable("Should have hit one of the three cases above"); }
void SuperBlock::constructSuperBlocks(Function& F) { superBlocks.clear(); // key: execution count, value: unvisited BBs map<double, list<BasicBlock*> >* execBBs = new map<double, list<BasicBlock*> >(); // stores the set of unvisited BBs set<BasicBlock*>* unvisitedBBs = new set<BasicBlock*>(); // stores the set of backedges described as (from, to) pair set<pair<const BasicBlock*, const BasicBlock*> >* backEdges = new set<pair<const BasicBlock*, const BasicBlock*> >(); // retrieve all backedges contained in this current function SmallVector<pair<const BasicBlock*, const BasicBlock*>, 32> backEdgesVector; // transfer everything from backEdgesVector to backEdges backEdges->insert(backEdgesVector.begin(), backEdgesVector.end()); // insert all the BBs in this function into set of unvisitedBBs for (Function::iterator funcIter = F.begin(), funcIterEnd = F.end(); funcIter != funcIterEnd; ++funcIter) { // gets number of instructions in this BB originalCodeSize += funcIter->size(); currCodeSize += funcIter->size(); double execCount = PI->getExecutionCount(funcIter); map<double, list<BasicBlock*> >::iterator it = execBBs->find(execCount); if (it == execBBs->end()) { list<BasicBlock*> arg(1, funcIter); execBBs->insert(pair<double, list<BasicBlock*> >(execCount, arg)); } else { it->second.push_back(funcIter); } unvisitedBBs->insert(funcIter); } // traverse each BB in descending order of their execution counts for (map<double, list<BasicBlock*> >::reverse_iterator arg_b = execBBs->rbegin(), arg_e = execBBs->rend(); arg_b != arg_e; ++arg_b) { for (list<BasicBlock*>::iterator cur_b = arg_b->second.begin(), cur_e = arg_b->second.end(); cur_b != cur_e; ++cur_b) { if (unvisitedBBs->find(*cur_b) == unvisitedBBs->end()) { continue; } // use this current BB as seed and remove from set unvisitedBBs->erase(*cur_b); list<BasicBlock*> trace; list<BasicBlock*>::iterator traceIter; trace.push_back(*cur_b); // run trace selection algorithm BasicBlock* currBB = *cur_b; BasicBlock* nextBB; double cum_prob = 1; // grow trace forward while (1) { nextBB = bestSuccessor(currBB, cum_prob, unvisitedBBs, backEdges); if (nextBB == NULL) { break; } trace.push_back(nextBB); unvisitedBBs->erase(nextBB); currBB = nextBB; } // start creating backwards from current BB again currBB = *cur_b; // grow trace backwards while (1) { nextBB = bestPredecessor(currBB, cum_prob, unvisitedBBs, backEdges); if (nextBB == NULL) { break; } trace.push_front(nextBB); unvisitedBBs->erase(nextBB); currBB = nextBB; } // record our newly found superblock // we don't record superblocks of size 1 if (trace.front() != trace.back()) { BasicBlock* head = trace.front(); trace.pop_front(); superBlocks[head] = trace; // populate partOfSuperBlock map partOfSuperBlock[head] = head; for (traceIter = trace.begin(); traceIter != trace.end(); ++traceIter) { partOfSuperBlock[*traceIter] = head; } } } } delete execBBs; delete unvisitedBBs; delete backEdges; }
/// handleEndBlock - Remove dead stores to stack-allocated locations in the /// function end block. Ex: /// %A = alloca i32 /// ... /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the // function. SmallSetVector<Value*, 16> DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) { if (isa<AllocaInst>(I)) DeadStackObjects.insert(I); // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } // Treat byval or inalloca arguments the same, stores to them are dead at the // end of the function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValOrInAllocaAttr()) DeadStackObjects.insert(AI); // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); // Stores to stack values are valid candidates for removal. bool AllDead = true; for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), E = Pointers.end(); I != E; ++I) if (!DeadStackObjects.count(*I)) { AllDead = false; break; } if (AllDead) { Instruction *Dead = BBI++; DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Objects: "; for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), E = Pointers.end(); I != E; ++I) { dbgs() << **I; if (llvm::next(I) != E) dbgs() << ", "; } dbgs() << '\n'); // DCE instructions only used to calculate that store. DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; } } // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(BBI, TLI)) { Instruction *Inst = BBI++; DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; } if (isa<AllocaInst>(BBI)) { // Remove allocas from the list of dead stack objects; there can't be // any references before the definition. DeadStackObjects.remove(BBI); continue; } if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. if (isAllocLikeFn(BBI, TLI)) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) continue; // If the call might load from any of our allocas, then any store above // the call is live. CouldRef Pred = { CS, AA }; DeadStackObjects.remove_if(Pred); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; continue; } AliasAnalysis::Location LoadedLoc; // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { if (!L->isUnordered()) // Be conservative with atomic/volatile load break; LoadedLoc = AA->getLocation(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { LoadedLoc = AA->getLocation(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { LoadedLoc = AA->getLocationForSource(MTI); } else if (!BBI->mayReadFromMemory()) { // Instruction doesn't read memory. Note that stores that weren't removed // above will hit this case. continue; } else { // Unknown inst; assume it clobbers everything. break; } // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. RemoveAccessedObjects(LoadedLoc, DeadStackObjects); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; }
int main(int argc_, const char **argv_) { llvm::sys::PrintStackTraceOnErrorSignal(); llvm::PrettyStackTraceProgram X(argc_, argv_); std::set<std::string> SavedStrings; SmallVector<const char*, 256> argv; ExpandArgv(argc_, argv_, argv, SavedStrings); // Handle -cc1 integrated tools. if (argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) { StringRef Tool = argv[1] + 4; if (Tool == "") return cc1_main(argv.data()+2, argv.data()+argv.size(), argv[0], (void*) (intptr_t) GetExecutablePath); if (Tool == "as") return cc1as_main(argv.data()+2, argv.data()+argv.size(), argv[0], (void*) (intptr_t) GetExecutablePath); // Reject unknown tools. llvm::errs() << "error: unknown integrated tool '" << Tool << "'\n"; return 1; } bool CanonicalPrefixes = true; for (int i = 1, size = argv.size(); i < size; ++i) { if (StringRef(argv[i]) == "-no-canonical-prefixes") { CanonicalPrefixes = false; break; } } // Handle QA_OVERRIDE_GCC3_OPTIONS and CCC_ADD_ARGS, used for editing a // command line behind the scenes. if (const char *OverrideStr = ::getenv("QA_OVERRIDE_GCC3_OPTIONS")) { // FIXME: Driver shouldn't take extra initial argument. ApplyQAOverride(argv, OverrideStr, SavedStrings); } else if (const char *Cur = ::getenv("CCC_ADD_ARGS")) { // FIXME: Driver shouldn't take extra initial argument. std::vector<const char*> ExtraArgs; for (;;) { const char *Next = strchr(Cur, ','); if (Next) { ExtraArgs.push_back(SaveStringInSet(SavedStrings, std::string(Cur, Next))); Cur = Next + 1; } else { if (*Cur != '\0') ExtraArgs.push_back(SaveStringInSet(SavedStrings, Cur)); break; } } argv.insert(&argv[1], ExtraArgs.begin(), ExtraArgs.end()); } llvm::sys::Path Path = GetExecutablePath(argv[0], CanonicalPrefixes); IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions; { // Note that ParseDiagnosticArgs() uses the cc1 option table. OwningPtr<OptTable> CC1Opts(createDriverOptTable()); unsigned MissingArgIndex, MissingArgCount; OwningPtr<InputArgList> Args(CC1Opts->ParseArgs(argv.begin()+1, argv.end(), MissingArgIndex, MissingArgCount)); // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. // Any errors that would be diagnosed here will also be diagnosed later, // when the DiagnosticsEngine actually exists. (void) ParseDiagnosticArgs(*DiagOpts, *Args); } // Now we can create the DiagnosticsEngine with a properly-filled-out // DiagnosticOptions instance. TextDiagnosticPrinter *DiagClient = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); DiagClient->setPrefix(llvm::sys::path::filename(Path.str())); IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs()); DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); Driver TheDriver(Path.str(), llvm::sys::getDefaultTargetTriple(), "a.out", Diags); // Attempt to find the original path used to invoke the driver, to determine // the installed path. We do this manually, because we want to support that // path being a symlink. { SmallString<128> InstalledPath(argv[0]); // Do a PATH lookup, if there are no directory components. if (llvm::sys::path::filename(InstalledPath) == InstalledPath) { llvm::sys::Path Tmp = llvm::sys::Program::FindProgramByName( llvm::sys::path::filename(InstalledPath.str())); if (!Tmp.empty()) InstalledPath = Tmp.str(); } llvm::sys::fs::make_absolute(InstalledPath); InstalledPath = llvm::sys::path::parent_path(InstalledPath); bool exists; if (!llvm::sys::fs::exists(InstalledPath.str(), exists) && exists) TheDriver.setInstalledDir(InstalledPath); } llvm::InitializeAllTargets(); ParseProgName(argv, SavedStrings, TheDriver); // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); if (TheDriver.CCPrintOptions) TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); if (TheDriver.CCPrintHeaders) TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); if (TheDriver.CCLogDiagnostics) TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); OwningPtr<Compilation> C(TheDriver.BuildCompilation(argv)); int Res = 0; SmallVector<std::pair<int, const Command *>, 4> FailingCommands; if (C.get()) Res = TheDriver.ExecuteCompilation(*C, FailingCommands); // Force a crash to test the diagnostics. if (::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) { Diags.Report(diag::err_drv_force_crash) << "FORCE_CLANG_DIAGNOSTICS_CRASH"; const Command *FailingCommand = 0; FailingCommands.push_back(std::make_pair(-1, FailingCommand)); } for (SmallVectorImpl< std::pair<int, const Command *> >::iterator it = FailingCommands.begin(), ie = FailingCommands.end(); it != ie; ++it) { int CommandRes = it->first; const Command *FailingCommand = it->second; if (!Res) Res = CommandRes; // If result status is < 0, then the driver command signalled an error. // If result status is 70, then the driver command reported a fatal error. // In these cases, generate additional diagnostic information if possible. if (CommandRes < 0 || CommandRes == 70) { TheDriver.generateCompilationDiagnostics(*C, FailingCommand); break; } } // If any timers were active but haven't been destroyed yet, print their // results now. This happens in -disable-free mode. llvm::TimerGroup::printAll(llvm::errs()); llvm::llvm_shutdown(); #ifdef _WIN32 // Exit status should not be negative on Win32, unless abnormal termination. // Once abnormal termiation was caught, negative status should not be // propagated. if (Res < 0) Res = 1; #endif // If we have multiple failing commands, we return the result of the first // failing command. return Res; }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is generally defined as the number of times the loop header /// executes. UnrollLoop relaxes the definition to permit early exits: here /// TripCount is the iteration on which control exits LatchBlock if no early /// exits were taken. Note that UnrollLoop assumes that the loop counter test /// terminates LatchBlock in order to remove unnecesssary instances of the /// test. In other words, control may exit the loop prior to TripCount /// iterations via an early branch, but control may not exit the loop from the /// LatchBlock's terminator prior to TripCount iterations. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that /// have a runtime (i.e. not compile time constant) trip count. Unrolling these /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" /// iterations before branching into the unrolled loop. UnrollLoop will not /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and /// AllowExpensiveTripCount is false. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. This way we don't // need to support "partial unrolling by 1". if (TripCount == 0 && Count < 2) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; SmallVector<BasicBlock *, 4> ExitBlocks; L->getExitBlocks(ExitBlocks); std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks(); // Go through all exits of L and see if there are any phi-nodes there. We just // conservatively assume that they're inserted to preserve LCSSA form, which // means that complete unrolling might break this form. We need to either fix // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For // now we just recompute LCSSA for the outer loop, but it should be possible // to fix it in-place. bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && std::any_of(ExitBlocks.begin(), ExitBlocks.end(), [&](BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); // Loops containing convergent instructions must have a count that divides // their TripMultiple. DEBUG( { bool HasConvergent = false; for (auto &BB : L->blocks()) for (auto &I : *BB) if (auto CS = CallSite(&I)) HasConvergent |= CS.isConvergent(); assert((!HasConvergent || TripMultiple % Count == 0) && "Unroll count must divide trip multiple if loop contains a " "convergent operation."); });
/// UnswitchNontrivialCondition - We determined that the loop is profitable /// to unswitch when LIC equal Val. Split it into loop versions and test the /// condition outside of either loop. Return the loops created as Out1/Out2. void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << F->getName() << " when '" << *Val << "' == " << *LIC << "\n"); if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); LoopBlocks.clear(); NewBlocks.clear(); // First step, split the preheader and exit blocks, and add these blocks to // the LoopBlocks list. BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this); LoopBlocks.push_back(NewPreheader); // We want the loop to come after the preheader, but before the exit blocks. LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // Split all of the edges from inside the loop to their exit blocks. Update // the appropriate Phi nodes as we do so. SplitExitEdges(L, ExitBlocks); // The exit blocks may have been changed due to edge splitting, recompute. ExitBlocks.clear(); L->getUniqueExitBlocks(ExitBlocks); // Add exit blocks to the loop blocks. LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); // Next step, clone all of the basic blocks that make up the loop (including // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } // Splice the newly inserted blocks into the function right before the // original preheader. F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop // as well. ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); } for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); assert(NewExit->getTerminator()->getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"); BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); // If the successor of the exit block had PHI nodes, add an entry for // NewExit. PHINode *PN; for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); ValueToValueMapTy::iterator It = VMap.find(V); if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } } // Rewrite the code to refer to itself. for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && "Preheader splitting did not work correctly!"); // Emit the new branch that selects between the two versions of this loop. EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); LPM->deleteSimpleAnalysisValue(OldBR, L); OldBR->eraseFromParent(); LoopProcessWorklist.push_back(NewLoop); redoLoop = true; // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody // deletes the instruction (for example by simplifying a PHI that feeds into // the condition that we're unswitching on), we don't rewrite the second // iteration. WeakVH LICHandle(LIC); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); // It's possible that simplifying one loop could cause the other to be // changed to another value or a constant. If its a constant, don't simplify // it. if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && LICHandle && !isa<Constant>(LICHandle)) RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true); }
// A soft instruction can be changed to work in other domains given by mask. void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Bitmask of available domains for this instruction after taking collapsed // operands into account. unsigned available = mask; // Scan the explicit use operands for incoming domains. SmallVector<int, 4> used; if (LiveRegs) for (unsigned i = mi->getDesc().getNumDefs(), e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; for (int rx : regIndices(mo.getReg())) { DomainValue *dv = LiveRegs[rx].Value; if (dv == nullptr) continue; // Bitmask of domains that dv and available have in common. unsigned common = dv->getCommonDomains(available); // Is it possible to use this collapsed register for free? if (dv->isCollapsed()) { // Restrict available domains to the ones in common with the operand. // If there are no common domains, we must pay the cross-domain // penalty for this operand. if (common) available = common; } else if (common) // Open DomainValue is compatible, save it for merging. used.push_back(rx); else // Open DomainValue is not compatible with instruction. It is useless // now. kill(rx); } } // If the collapsed operands force a single domain, propagate the collapse. if (isPowerOf2_32(available)) { unsigned domain = countTrailingZeros(available); TII->setExecutionDomain(mi, domain); visitHardInstr(mi, domain); return; } // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. SmallVector<LiveReg, 4> Regs; for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; assert(LiveRegs && "no space allocated for live registers"); const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. if (!LR.Value->getCommonDomains(available)) { kill(rx); continue; } // Sorted insertion. bool Inserted = false; for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end(); i != e && !Inserted; ++i) { if (LR.Def < i->Def) { Inserted = true; Regs.insert(i, LR); } } if (!Inserted) Regs.push_back(LR); } // doms are now sorted in order of appearance. Try to merge them all, giving // priority to the latest ones. DomainValue *dv = nullptr; while (!Regs.empty()) { if (!dv) { dv = Regs.pop_back_val().Value; // Force the first dv to match the current instruction. dv->AvailableDomains = dv->getCommonDomains(available); assert(dv->AvailableDomains && "Domain should have been filtered"); continue; } DomainValue *Latest = Regs.pop_back_val().Value; // Skip already merged values. if (Latest == dv || Latest->Next) continue; if (merge(dv, Latest)) continue; // If latest didn't merge, it is useless now. Kill all registers using it. for (int i : used) { assert(LiveRegs && "no space allocated for live registers"); if (LiveRegs[i].Value == Latest) kill(i); } } // dv is the DomainValue we are going to use for this instruction. if (!dv) { dv = alloc(); dv->AvailableDomains = available; } dv->Instrs.push_back(mi); // Finally set all defs and non-collapsed uses to dv. We must iterate through // all the operators, including imp-def ones. for (MachineInstr::mop_iterator ii = mi->operands_begin(), ee = mi->operands_end(); ii != ee; ++ii) { MachineOperand &mo = *ii; if (!mo.isReg()) continue; for (int rx : regIndices(mo.getReg())) { if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { kill(rx); setLiveReg(rx, dv); } } } }
void Diagnostic:: FormatDiagnostic(const char *DiagStr, const char *DiagEnd, SmallVectorImpl<char> &OutStr) const { /// FormattedArgs - Keep track of all of the arguments formatted by /// ConvertArgToString and pass them into subsequent calls to /// ConvertArgToString, allowing the implementation to avoid redundancies in /// obvious cases. SmallVector<DiagnosticsEngine::ArgumentValue, 8> FormattedArgs; /// QualTypeVals - Pass a vector of arrays so that QualType names can be /// compared to see if more information is needed to be printed. SmallVector<intptr_t, 2> QualTypeVals; SmallVector<char, 64> Tree; for (unsigned i = 0, e = getNumArgs(); i < e; ++i) if (getArgKind(i) == DiagnosticsEngine::ak_qualtype) QualTypeVals.push_back(getRawArg(i)); while (DiagStr != DiagEnd) { if (DiagStr[0] != '%') { // Append non-%0 substrings to Str if we have one. const char *StrEnd = std::find(DiagStr, DiagEnd, '%'); OutStr.append(DiagStr, StrEnd); DiagStr = StrEnd; continue; } else if (ispunct(DiagStr[1])) { OutStr.push_back(DiagStr[1]); // %% -> %. DiagStr += 2; continue; } // Skip the %. ++DiagStr; // This must be a placeholder for a diagnostic argument. The format for a // placeholder is one of "%0", "%modifier0", or "%modifier{arguments}0". // The digit is a number from 0-9 indicating which argument this comes from. // The modifier is a string of digits from the set [-a-z]+, arguments is a // brace enclosed string. const char *Modifier = 0, *Argument = 0; unsigned ModifierLen = 0, ArgumentLen = 0; // Check to see if we have a modifier. If so eat it. if (!isdigit(DiagStr[0])) { Modifier = DiagStr; while (DiagStr[0] == '-' || (DiagStr[0] >= 'a' && DiagStr[0] <= 'z')) ++DiagStr; ModifierLen = DiagStr-Modifier; // If we have an argument, get it next. if (DiagStr[0] == '{') { ++DiagStr; // Skip {. Argument = DiagStr; DiagStr = ScanFormat(DiagStr, DiagEnd, '}'); assert(DiagStr != DiagEnd && "Mismatched {}'s in diagnostic string!"); ArgumentLen = DiagStr-Argument; ++DiagStr; // Skip }. } } assert(isdigit(*DiagStr) && "Invalid format for argument in diagnostic"); unsigned ArgNo = *DiagStr++ - '0'; // Only used for type diffing. unsigned ArgNo2 = ArgNo; DiagnosticsEngine::ArgumentKind Kind = getArgKind(ArgNo); if (Kind == DiagnosticsEngine::ak_qualtype && ModifierIs(Modifier, ModifierLen, "diff")) { Kind = DiagnosticsEngine::ak_qualtype_pair; assert(*DiagStr == ',' && isdigit(*(DiagStr + 1)) && "Invalid format for diff modifier"); ++DiagStr; // Comma. ArgNo2 = *DiagStr++ - '0'; assert(getArgKind(ArgNo2) == DiagnosticsEngine::ak_qualtype && "Second value of type diff must be a qualtype"); } switch (Kind) { // ---- STRINGS ---- case DiagnosticsEngine::ak_std_string: { const std::string &S = getArgStdStr(ArgNo); assert(ModifierLen == 0 && "No modifiers for strings yet"); OutStr.append(S.begin(), S.end()); break; } case DiagnosticsEngine::ak_c_string: { const char *S = getArgCStr(ArgNo); assert(ModifierLen == 0 && "No modifiers for strings yet"); // Don't crash if get passed a null pointer by accident. if (!S) S = "(null)"; OutStr.append(S, S + strlen(S)); break; } // ---- INTEGERS ---- case DiagnosticsEngine::ak_sint: { int Val = getArgSInt(ArgNo); if (ModifierIs(Modifier, ModifierLen, "select")) { HandleSelectModifier(*this, (unsigned)Val, Argument, ArgumentLen, OutStr); } else if (ModifierIs(Modifier, ModifierLen, "s")) { HandleIntegerSModifier(Val, OutStr); } else if (ModifierIs(Modifier, ModifierLen, "plural")) { HandlePluralModifier(*this, (unsigned)Val, Argument, ArgumentLen, OutStr); } else if (ModifierIs(Modifier, ModifierLen, "ordinal")) { HandleOrdinalModifier((unsigned)Val, OutStr); } else { assert(ModifierLen == 0 && "Unknown integer modifier"); llvm::raw_svector_ostream(OutStr) << Val; } break; } case DiagnosticsEngine::ak_uint: { unsigned Val = getArgUInt(ArgNo); if (ModifierIs(Modifier, ModifierLen, "select")) { HandleSelectModifier(*this, Val, Argument, ArgumentLen, OutStr); } else if (ModifierIs(Modifier, ModifierLen, "s")) { HandleIntegerSModifier(Val, OutStr); } else if (ModifierIs(Modifier, ModifierLen, "plural")) { HandlePluralModifier(*this, (unsigned)Val, Argument, ArgumentLen, OutStr); } else if (ModifierIs(Modifier, ModifierLen, "ordinal")) { HandleOrdinalModifier(Val, OutStr); } else { assert(ModifierLen == 0 && "Unknown integer modifier"); llvm::raw_svector_ostream(OutStr) << Val; } break; } // ---- NAMES and TYPES ---- case DiagnosticsEngine::ak_identifierinfo: { const IdentifierInfo *II = getArgIdentifier(ArgNo); assert(ModifierLen == 0 && "No modifiers for strings yet"); // Don't crash if get passed a null pointer by accident. if (!II) { const char *S = "(null)"; OutStr.append(S, S + strlen(S)); continue; } llvm::raw_svector_ostream(OutStr) << '\'' << II->getName() << '\''; break; } case DiagnosticsEngine::ak_qualtype: case DiagnosticsEngine::ak_declarationname: case DiagnosticsEngine::ak_nameddecl: case DiagnosticsEngine::ak_nestednamespec: case DiagnosticsEngine::ak_declcontext: getDiags()->ConvertArgToString(Kind, getRawArg(ArgNo), Modifier, ModifierLen, Argument, ArgumentLen, FormattedArgs.data(), FormattedArgs.size(), OutStr, QualTypeVals); break; case DiagnosticsEngine::ak_qualtype_pair: // Create a struct with all the info needed for printing. TemplateDiffTypes TDT; TDT.FromType = getRawArg(ArgNo); TDT.ToType = getRawArg(ArgNo2); TDT.ElideType = getDiags()->ElideType; TDT.ShowColors = getDiags()->ShowColors; TDT.TemplateDiffUsed = false; intptr_t val = reinterpret_cast<intptr_t>(&TDT); const char *ArgumentEnd = Argument + ArgumentLen; const char *Pipe = ScanFormat(Argument, ArgumentEnd, '|'); // Print the tree. If this diagnostic already has a tree, skip the // second tree. if (getDiags()->PrintTemplateTree && Tree.empty()) { TDT.PrintFromType = true; TDT.PrintTree = true; getDiags()->ConvertArgToString(Kind, val, Modifier, ModifierLen, Argument, ArgumentLen, FormattedArgs.data(), FormattedArgs.size(), Tree, QualTypeVals); // If there is no tree information, fall back to regular printing. if (!Tree.empty()) { FormatDiagnostic(Pipe + 1, ArgumentEnd, OutStr); break; } } // Non-tree printing, also the fall-back when tree printing fails. // The fall-back is triggered when the types compared are not templates. const char *FirstDollar = ScanFormat(Argument, ArgumentEnd, '$'); const char *SecondDollar = ScanFormat(FirstDollar + 1, ArgumentEnd, '$'); // Append before text FormatDiagnostic(Argument, FirstDollar, OutStr); // Append first type TDT.PrintTree = false; TDT.PrintFromType = true; getDiags()->ConvertArgToString(Kind, val, Modifier, ModifierLen, Argument, ArgumentLen, FormattedArgs.data(), FormattedArgs.size(), OutStr, QualTypeVals); if (!TDT.TemplateDiffUsed) FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_qualtype, TDT.FromType)); // Append middle text FormatDiagnostic(FirstDollar + 1, SecondDollar, OutStr); // Append second type TDT.PrintFromType = false; getDiags()->ConvertArgToString(Kind, val, Modifier, ModifierLen, Argument, ArgumentLen, FormattedArgs.data(), FormattedArgs.size(), OutStr, QualTypeVals); if (!TDT.TemplateDiffUsed) FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_qualtype, TDT.ToType)); // Append end text FormatDiagnostic(SecondDollar + 1, Pipe, OutStr); break; } // Remember this argument info for subsequent formatting operations. Turn // std::strings into a null terminated string to make it be the same case as // all the other ones. if (Kind == DiagnosticsEngine::ak_qualtype_pair) continue; else if (Kind != DiagnosticsEngine::ak_std_string) FormattedArgs.push_back(std::make_pair(Kind, getRawArg(ArgNo))); else FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_c_string, (intptr_t)getArgStdStr(ArgNo).c_str())); } // Append the type tree to the end of the diagnostics. OutStr.append(Tree.begin(), Tree.end()); }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is generally defined as the number of times the loop header /// executes. UnrollLoop relaxes the definition to permit early exits: here /// TripCount is the iteration on which control exits LatchBlock if no early /// exits were taken. Note that UnrollLoop assumes that the loop counter test /// terminates LatchBlock in order to remove unnecesssary instances of the /// test. In other words, control may exit the loop prior to TripCount /// iterations via an early branch, but control may not exit the loop from the /// LatchBlock's terminator prior to TripCount iterations. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that /// have a runtime (i.e. not compile time constant) trip count. Unrolling these /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" /// iterations before branching into the unrolled loop. UnrollLoop will not /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and /// AllowExpensiveTripCount is false. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, bool AllowExpensiveTripCount, unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. This way we don't // need to support "partial unrolling by 1". if (TripCount == 0 && Count < 2) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; SmallVector<BasicBlock *, 4> ExitBlocks; L->getExitBlocks(ExitBlocks); Loop *ParentL = L->getParentLoop(); bool AllExitsAreInsideParentLoop = !ParentL || std::all_of(ExitBlocks.begin(), ExitBlocks.end(), [&](BasicBlock *BB) { return ParentL->contains(BB); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT, PreserveLCSSA)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. if (SE) SE->forgetLoop(L); // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } // Report the unrolling decision. DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, Twine("completely unrolled loop with ") + Twine(TripCount) + " iterations"); } else { auto EmitDiag = [&](const Twine &T) { emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, "unrolled loop by a factor of " + Twine(Count) + T); }; DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); EmitDiag(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); EmitDiag(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. LoopBlocksDFS DFS(L); DFS.perform(LI); // Stash the DFS iterators before adding blocks to the loop. LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; NewLoops[L] = L; for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Tell LI about New. if (*BB == Header) { assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); L->addBasicBlockToLoop(New, *LI); } else { // Figure out which loop New is in. const Loop *OldLoop = LI->getLoopFor(*BB); assert(OldLoop && "Should (at least) be in the loop being unrolled!"); Loop *&NewLoop = NewLoops[OldLoop]; if (!NewLoop) { // Found a new sub-loop. assert(*BB == OldLoop->getHeader() && "Header should be first in RPO"); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); assert(NewLoopParent && "Expected parent loop before sub-loop in RPO"); NewLoop = new Loop; NewLoopParent->addChildLoop(NewLoop); // Forget the old loop, since its inputs may have changed. if (SE) SE->forgetLoop(OldLoop); } NewLoop->addBasicBlockToLoop(New, *LI); } if (*BB == Header) // Loop over all of the PHI nodes in the block, changing them to use // the incoming values from the previous block. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; // Add phi entries for newly created values to all exit blocks. for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; ++SI) { if (L->contains(*SI)) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) Latches.push_back(New); NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(&*I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } else if (Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI)) InVal = LastValueMap[InVal]; } assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); PN->addIncoming(InVal, Latches.back()); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) { NeedConditional = false; } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll) { if (j == 0) Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { if (*SI == Headers[i]) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } } } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); } } // Merge adjacent basic blocks, if possible. SmallPtrSet<Loop *, 4> ForgottenLoops; for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. AC->clear(); // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. if (DT) DT->recalculate(*L->getHeader()->getParent()); // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, DT, LI, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const DataLayout &DL = Header->getModule()->getDataLayout(); const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = &*I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Value *V = SimplifyInstruction(Inst, DL)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; Loop *OuterL = L->getParentLoop(); // Update LoopInfo if the loop is completely removed. if (CompletelyUnroll) LI->updateUnloop(L);; // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (DT) { if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after // LoopInfo's been updated by updateUnloop. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified)) formLCSSARecursively(*OuterL, *DT, LI, SE); else assert(OuterL->isLCSSAForm(*DT) && "Loops should be in LCSSA form after loop-unroll."); } } return true; }
/// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled, /// this routine is called to collect the tokens for an MS asm statement. /// /// [MS] ms-asm-statement: /// ms-asm-block /// ms-asm-block ms-asm-statement /// /// [MS] ms-asm-block: /// '__asm' ms-asm-line '\n' /// '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt] /// /// [MS] ms-asm-instruction-block /// ms-asm-line /// ms-asm-line '\n' ms-asm-instruction-block /// StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { SourceManager &SrcMgr = PP.getSourceManager(); SourceLocation EndLoc = AsmLoc; SmallVector<Token, 4> AsmToks; bool SingleLineMode = true; unsigned BraceNesting = 0; unsigned short savedBraceCount = BraceCount; bool InAsmComment = false; FileID FID; unsigned LineNo = 0; unsigned NumTokensRead = 0; SmallVector<SourceLocation, 4> LBraceLocs; bool SkippedStartOfLine = false; if (Tok.is(tok::l_brace)) { // Braced inline asm: consume the opening brace. SingleLineMode = false; BraceNesting = 1; EndLoc = ConsumeBrace(); LBraceLocs.push_back(EndLoc); ++NumTokensRead; } else { // Single-line inline asm; compute which line it is on. std::pair<FileID, unsigned> ExpAsmLoc = SrcMgr.getDecomposedExpansionLoc(EndLoc); FID = ExpAsmLoc.first; LineNo = SrcMgr.getLineNumber(FID, ExpAsmLoc.second); LBraceLocs.push_back(SourceLocation()); } SourceLocation TokLoc = Tok.getLocation(); do { // If we hit EOF, we're done, period. if (isEofOrEom()) break; if (!InAsmComment && Tok.is(tok::l_brace)) { // Consume the opening brace. SkippedStartOfLine = Tok.isAtStartOfLine(); EndLoc = ConsumeBrace(); BraceNesting++; LBraceLocs.push_back(EndLoc); TokLoc = Tok.getLocation(); ++NumTokensRead; continue; } else if (!InAsmComment && Tok.is(tok::semi)) { // A semicolon in an asm is the start of a comment. InAsmComment = true; if (!SingleLineMode) { // Compute which line the comment is on. std::pair<FileID, unsigned> ExpSemiLoc = SrcMgr.getDecomposedExpansionLoc(TokLoc); FID = ExpSemiLoc.first; LineNo = SrcMgr.getLineNumber(FID, ExpSemiLoc.second); } } else if (SingleLineMode || InAsmComment) { // If end-of-line is significant, check whether this token is on a // new line. std::pair<FileID, unsigned> ExpLoc = SrcMgr.getDecomposedExpansionLoc(TokLoc); if (ExpLoc.first != FID || SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second) != LineNo) { // If this is a single-line __asm, we're done, except if the next // line begins with an __asm too, in which case we finish a comment // if needed and then keep processing the next line as a single // line __asm. bool isAsm = Tok.is(tok::kw_asm); if (SingleLineMode && !isAsm) break; // We're no longer in a comment. InAsmComment = false; if (isAsm) { LineNo = SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second); SkippedStartOfLine = Tok.isAtStartOfLine(); } } else if (!InAsmComment && Tok.is(tok::r_brace)) { // In MSVC mode, braces only participate in brace matching and // separating the asm statements. This is an intentional // departure from the Apple gcc behavior. if (!BraceNesting) break; } } if (!InAsmComment && BraceNesting && Tok.is(tok::r_brace) && BraceCount == (savedBraceCount + BraceNesting)) { // Consume the closing brace. SkippedStartOfLine = Tok.isAtStartOfLine(); EndLoc = ConsumeBrace(); BraceNesting--; // Finish if all of the opened braces in the inline asm section were // consumed. if (BraceNesting == 0 && !SingleLineMode) break; else { LBraceLocs.pop_back(); TokLoc = Tok.getLocation(); ++NumTokensRead; continue; } } // Consume the next token; make sure we don't modify the brace count etc. // if we are in a comment. EndLoc = TokLoc; if (InAsmComment) PP.Lex(Tok); else { // Set the token as the start of line if we skipped the original start // of line token in case it was a nested brace. if (SkippedStartOfLine) Tok.setFlag(Token::StartOfLine); AsmToks.push_back(Tok); ConsumeAnyToken(); } TokLoc = Tok.getLocation(); ++NumTokensRead; SkippedStartOfLine = false; } while (1); if (BraceNesting && BraceCount != savedBraceCount) { // __asm without closing brace (this can happen at EOF). for (unsigned i = 0; i < BraceNesting; ++i) { Diag(Tok, diag::err_expected) << tok::r_brace; Diag(LBraceLocs.back(), diag::note_matching) << tok::l_brace; LBraceLocs.pop_back(); } return StmtError(); } else if (NumTokensRead == 0) { // Empty __asm. Diag(Tok, diag::err_expected) << tok::l_brace; return StmtError(); } // Okay, prepare to use MC to parse the assembly. SmallVector<StringRef, 4> ConstraintRefs; SmallVector<Expr *, 4> Exprs; SmallVector<StringRef, 4> ClobberRefs; // We need an actual supported target. const llvm::Triple &TheTriple = Actions.Context.getTargetInfo().getTriple(); llvm::Triple::ArchType ArchTy = TheTriple.getArch(); const std::string &TT = TheTriple.getTriple(); const llvm::Target *TheTarget = nullptr; bool UnsupportedArch = (ArchTy != llvm::Triple::x86 && ArchTy != llvm::Triple::x86_64); if (UnsupportedArch) { Diag(AsmLoc, diag::err_msasm_unsupported_arch) << TheTriple.getArchName(); } else { std::string Error; TheTarget = llvm::TargetRegistry::lookupTarget(TT, Error); if (!TheTarget) Diag(AsmLoc, diag::err_msasm_unable_to_create_target) << Error; } assert(!LBraceLocs.empty() && "Should have at least one location here"); // If we don't support assembly, or the assembly is empty, we don't // need to instantiate the AsmParser, etc. if (!TheTarget || AsmToks.empty()) { return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLocs[0], AsmToks, StringRef(), /*NumOutputs*/ 0, /*NumInputs*/ 0, ConstraintRefs, ClobberRefs, Exprs, EndLoc); } // Expand the tokens into a string buffer. SmallString<512> AsmString; SmallVector<unsigned, 8> TokOffsets; if (buildMSAsmString(PP, AsmLoc, AsmToks, TokOffsets, AsmString)) return StmtError(); std::unique_ptr<llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT)); std::unique_ptr<llvm::MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TT)); // Get the instruction descriptor. std::unique_ptr<llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo()); std::unique_ptr<llvm::MCObjectFileInfo> MOFI(new llvm::MCObjectFileInfo()); std::unique_ptr<llvm::MCSubtargetInfo> STI( TheTarget->createMCSubtargetInfo(TT, "", "")); llvm::SourceMgr TempSrcMgr; llvm::MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &TempSrcMgr); MOFI->InitMCObjectFileInfo(TheTriple, llvm::Reloc::Default, llvm::CodeModel::Default, Ctx); std::unique_ptr<llvm::MemoryBuffer> Buffer = llvm::MemoryBuffer::getMemBuffer(AsmString, "<MS inline asm>"); // Tell SrcMgr about this buffer, which is what the parser will pick up. TempSrcMgr.AddNewSourceBuffer(std::move(Buffer), llvm::SMLoc()); std::unique_ptr<llvm::MCStreamer> Str(createNullStreamer(Ctx)); std::unique_ptr<llvm::MCAsmParser> Parser( createMCAsmParser(TempSrcMgr, Ctx, *Str.get(), *MAI)); // FIXME: init MCOptions from sanitizer flags here. llvm::MCTargetOptions MCOptions; std::unique_ptr<llvm::MCTargetAsmParser> TargetParser( TheTarget->createMCAsmParser(*STI, *Parser, *MII, MCOptions)); std::unique_ptr<llvm::MCInstPrinter> IP( TheTarget->createMCInstPrinter(llvm::Triple(TT), 1, *MAI, *MII, *MRI)); // Change to the Intel dialect. Parser->setAssemblerDialect(1); Parser->setTargetParser(*TargetParser.get()); Parser->setParsingInlineAsm(true); TargetParser->setParsingInlineAsm(true); ClangAsmParserCallback Callback(*this, AsmLoc, AsmString, AsmToks, TokOffsets); TargetParser->setSemaCallback(&Callback); TempSrcMgr.setDiagHandler(ClangAsmParserCallback::DiagHandlerCallback, &Callback); unsigned NumOutputs; unsigned NumInputs; std::string AsmStringIR; SmallVector<std::pair<void *, bool>, 4> OpExprs; SmallVector<std::string, 4> Constraints; SmallVector<std::string, 4> Clobbers; if (Parser->parseMSInlineAsm(AsmLoc.getPtrEncoding(), AsmStringIR, NumOutputs, NumInputs, OpExprs, Constraints, Clobbers, MII.get(), IP.get(), Callback)) return StmtError(); // Filter out "fpsw". Clang doesn't accept it, and it always lists flags and // fpsr as clobbers. auto End = std::remove(Clobbers.begin(), Clobbers.end(), "fpsw"); Clobbers.erase(End, Clobbers.end()); // Build the vector of clobber StringRefs. ClobberRefs.insert(ClobberRefs.end(), Clobbers.begin(), Clobbers.end()); // Recast the void pointers and build the vector of constraint StringRefs. unsigned NumExprs = NumOutputs + NumInputs; ConstraintRefs.resize(NumExprs); Exprs.resize(NumExprs); for (unsigned i = 0, e = NumExprs; i != e; ++i) { Expr *OpExpr = static_cast<Expr *>(OpExprs[i].first); if (!OpExpr) return StmtError(); // Need address of variable. if (OpExprs[i].second) OpExpr = Actions.BuildUnaryOp(getCurScope(), AsmLoc, UO_AddrOf, OpExpr).get(); ConstraintRefs[i] = StringRef(Constraints[i]); Exprs[i] = OpExpr; } // FIXME: We should be passing source locations for better diagnostics. return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLocs[0], AsmToks, AsmStringIR, NumOutputs, NumInputs, ConstraintRefs, ClobberRefs, Exprs, EndLoc); }
/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the /// observable behavior of the program other than finite running time. Note /// we do ensure that this never remove a loop that might be infinite, as doing /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // We can only remove the loop if there is a preheader that we can // branch from after removing it. BasicBlock* preheader = L->getLoopPreheader(); if (!preheader) return false; // If LoopSimplify form is not available, stay out of trouble. if (!L->hasDedicatedExits()) return false; // We can't remove loops that contain subloops. If the subloops were dead, // they would already have been removed in earlier executions of this pass. if (L->begin() != L->end()) return false; SmallVector<BasicBlock*, 4> exitingBlocks; L->getExitingBlocks(exitingBlocks); SmallVector<BasicBlock*, 4> exitBlocks; L->getUniqueExitBlocks(exitBlocks); // We require that the loop only have a single exit block. Otherwise, we'd // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. if (exitBlocks.size() != 1) return false; // Finally, we have to check that the loop really is dead. bool Changed = false; if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. BasicBlock* exitBlock = exitBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't // mess with this unless you have good reason and know what you're doing. // Tell ScalarEvolution that the loop is deleted. Do this before // deleting the loop so that ScalarEvolution can look at the loop // to determine what it needs to clean up. SE.forgetLoop(L); // Connect the preheader directly to the exit block. TerminatorInst* TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(exitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); P->setIncomingBlock(j, preheader); for (unsigned i = 1; i < exitingBlocks.size(); ++i) P->removeIncomingValue(exitingBlocks[i]); ++BI; } // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. DominatorTree& DT = getAnalysis<DominatorTree>(); SmallVector<DomTreeNode*, 8> ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { // Move all of the block's children to be children of the preheader, which // allows us to remove the domtree entry for the block. ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end()); for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(), DE = ChildNodes.end(); DI != DE; ++DI) { DT.changeImmediateDominator(*DI, DT[preheader]); } ChildNodes.clear(); DT.eraseNode(*LI); // Remove the block from the reference counting scheme, so that we can // delete it freely later. (*LI)->dropAllReferences(); } // Erase the instructions and the blocks without having to worry // about ordering because we already dropped the references. // NOTE: This iteration is safe because erasing the block does not remove its // entry from the loop's block list. We do that in the next section. for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) (*LI)->eraseFromParent(); // Finally, the blocks from loopinfo. This has to happen late because // otherwise our loop iterators won't work. LoopInfo& loopInfo = getAnalysis<LoopInfo>(); SmallPtrSet<BasicBlock*, 8> blocks; blocks.insert(L->block_begin(), L->block_end()); for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), E = blocks.end(); I != E; ++I) loopInfo.removeBlock(*I); // The last step is to inform the loop pass manager that we've // eliminated this loop. LPM.deleteLoopFromQueue(L); Changed = true; ++NumDeleted; return Changed; }
/// Remove dead functions that are not included in DNR (Do Not Remove) list. bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { SmallVector<CallGraphNode *, 16> FunctionsToRemove; SmallVector<Function *, 16> DeadFunctionsInComdats; auto RemoveCGN = [&](CallGraphNode *CGN) { // Remove any call graph edges from the function to its callees. CGN->removeAllCalledFunctions(); // Remove any edges from the external node to the function's call graph // node. These edges might have been made irrelegant due to // optimization of the program. CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); // Removing the node for callee from the call graph and delete it. FunctionsToRemove.push_back(CGN); }; // Scan for all of the functions, looking for ones that should now be removed // from the program. Insert the dead ones in the FunctionsToRemove set. for (const auto &I : CG) { CallGraphNode *CGN = I.second.get(); Function *F = CGN->getFunction(); if (!F || F->isDeclaration()) continue; // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) continue; // If the only remaining users of the function are dead constants, remove // them. F->removeDeadConstantUsers(); if (!F->isDefTriviallyDead()) continue; // It is unsafe to drop a function with discardable linkage from a COMDAT // without also dropping the other members of the COMDAT. // The inliner doesn't visit non-function entities which are in COMDAT // groups so it is unsafe to do so *unless* the linkage is local. if (!F->hasLocalLinkage()) { if (F->hasComdat()) { DeadFunctionsInComdats.push_back(F); continue; } } RemoveCGN(CGN); } if (!DeadFunctionsInComdats.empty()) { // Filter out the functions whose comdats remain alive. filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats); // Remove the rest. for (Function *F : DeadFunctionsInComdats) RemoveCGN(CG[F]); } if (FunctionsToRemove.empty()) return false; // Now that we know which functions to delete, do so. We didn't want to do // this inline, because that would invalidate our CallGraph::iterator // objects. :( // // Note that it doesn't matter that we are iterating over a non-stable order // here to do this, it doesn't matter which order the functions are deleted // in. array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); FunctionsToRemove.erase( std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), FunctionsToRemove.end()); for (CallGraphNode *CGN : FunctionsToRemove) { delete CG.removeFunctionFromModule(CGN); ++NumDeleted; } return true; }
static void createVectorVariantWrapper(llvm::Function *ScalarFunc, llvm::Function *VectorFunc, unsigned VLen, const SmallVectorImpl<ParamInfo> &Info) { assert(ScalarFunc->arg_size() == Info.size() && "Wrong number of parameter infos"); assert((VLen & (VLen - 1)) == 0 && "VLen must be a power-of-2"); bool IsMasked = VectorFunc->arg_size() == ScalarFunc->arg_size() + 1; llvm::LLVMContext &Context = ScalarFunc->getContext(); llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "entry", VectorFunc); llvm::BasicBlock *LoopCond = llvm::BasicBlock::Create(Context, "loop.cond", VectorFunc); llvm::BasicBlock *LoopBody = llvm::BasicBlock::Create(Context, "loop.body", VectorFunc); llvm::BasicBlock *MaskOn = IsMasked ? llvm::BasicBlock::Create(Context, "mask_on", VectorFunc) : 0; llvm::BasicBlock *MaskOff = IsMasked ? llvm::BasicBlock::Create(Context, "mask_off", VectorFunc) : 0; llvm::BasicBlock *LoopStep = llvm::BasicBlock::Create(Context, "loop.step", VectorFunc); llvm::BasicBlock *LoopEnd = llvm::BasicBlock::Create(Context, "loop.end", VectorFunc); llvm::Value *VectorRet = 0; SmallVector<llvm::Value*, 4> VectorArgs; // The loop counter. llvm::Type *IndexTy = llvm::Type::getInt32Ty(Context); llvm::Value *Index = 0; llvm::Value *Mask = 0; // Copy the names from the scalar args to the vector args. { llvm::Function::arg_iterator SI = ScalarFunc->arg_begin(), SE = ScalarFunc->arg_end(), VI = VectorFunc->arg_begin(); for ( ; SI != SE; ++SI, ++VI) VI->setName(SI->getName()); if (IsMasked) VI->setName("mask"); } llvm::IRBuilder<> Builder(Entry); { if (!VectorFunc->getReturnType()->isVoidTy()) VectorRet = Builder.CreateAlloca(VectorFunc->getReturnType()); Index = Builder.CreateAlloca(IndexTy, 0, "index"); Builder.CreateStore(llvm::ConstantInt::get(IndexTy, 0), Index); llvm::Function::arg_iterator VI = VectorFunc->arg_begin(); for (SmallVectorImpl<ParamInfo>::const_iterator I = Info.begin(), IE = Info.end(); I != IE; ++I, ++VI) { llvm::Value *Arg = VI; switch (I->Kind) { case PK_Vector: assert(Arg->getType()->isVectorTy() && "Not a vector"); assert(VLen == Arg->getType()->getVectorNumElements() && "Wrong number of elements"); break; case PK_LinearConst: Arg = buildLinearArg(Builder, VLen, Arg, cast<llvm::ConstantAsMetadata>(I->Step)->getValue()); Arg->setName(VI->getName() + ".linear"); break; case PK_Linear: { unsigned Number = cast<llvm::ConstantInt>( cast<llvm::ConstantAsMetadata>(I->Step)->getValue())->getZExtValue(); llvm::Function::arg_iterator ArgI = VectorFunc->arg_begin(); std::advance(ArgI, Number); llvm::Value *Step = ArgI; Arg = buildLinearArg(Builder, VLen, Arg, Step); Arg->setName(VI->getName() + ".linear"); } break; case PK_Uniform: Arg = Builder.CreateVectorSplat(VLen, Arg); Arg->setName(VI->getName() + ".uniform"); break; } VectorArgs.push_back(Arg); } if (IsMasked) Mask = buildMask(Builder, VLen, VI); Builder.CreateBr(LoopCond); } Builder.SetInsertPoint(LoopCond); { llvm::Value *Cond = Builder.CreateICmpULT( Builder.CreateLoad(Index), llvm::ConstantInt::get(IndexTy, VLen)); Builder.CreateCondBr(Cond, LoopBody, LoopEnd); } llvm::Value *VecIndex = 0; Builder.SetInsertPoint(LoopBody); { VecIndex = Builder.CreateLoad(Index); if (IsMasked) { llvm::Value *ScalarMask = Builder.CreateExtractElement(Mask, VecIndex); Builder.CreateCondBr(ScalarMask, MaskOn, MaskOff); } } Builder.SetInsertPoint(IsMasked ? MaskOn : LoopBody); { // Build the argument list for the scalar function by extracting element // 'VecIndex' from the vector arguments. SmallVector<llvm::Value*, 4> ScalarArgs; for (SmallVectorImpl<llvm::Value*>::iterator VI = VectorArgs.begin(), VE = VectorArgs.end(); VI != VE; ++VI) { assert((*VI)->getType()->isVectorTy() && "Not a vector"); ScalarArgs.push_back(Builder.CreateExtractElement(*VI, VecIndex)); } // Call the scalar function with the extracted scalar arguments. llvm::Value *ScalarRet = Builder.CreateCall(ScalarFunc, ScalarArgs); // If the function returns a value insert the scalar return value into the // vector return value. if (VectorRet) { llvm::Value *V = Builder.CreateLoad(VectorRet); V = Builder.CreateInsertElement(V, ScalarRet, VecIndex); Builder.CreateStore(V, VectorRet); } Builder.CreateBr(LoopStep); } if (IsMasked) { Builder.SetInsertPoint(MaskOff); if (VectorRet) { llvm::Value *V = Builder.CreateLoad(VectorRet); llvm::Value *Zero = llvm::Constant::getNullValue(ScalarFunc->getReturnType()); V = Builder.CreateInsertElement(V, Zero, VecIndex); Builder.CreateStore(V, VectorRet); } Builder.CreateBr(LoopStep); } Builder.SetInsertPoint(LoopStep); { // Index = Index + 1 VecIndex = Builder.CreateAdd(VecIndex, llvm::ConstantInt::get(IndexTy, 1)); Builder.CreateStore(VecIndex, Index); Builder.CreateBr(LoopCond); } Builder.SetInsertPoint(LoopEnd); { if (VectorRet) Builder.CreateRet(Builder.CreateLoad(VectorRet)); else Builder.CreateRetVoid(); } }
static void emitImplicitValueConstructor(SILGenFunction &gen, ConstructorDecl *ctor) { RegularLocation Loc(ctor); Loc.markAutoGenerated(); // FIXME: Handle 'self' along with the other arguments. auto *paramList = ctor->getParameterList(1); auto *selfDecl = ctor->getImplicitSelfDecl(); auto selfTyCan = selfDecl->getType()->getInOutObjectType(); auto selfIfaceTyCan = selfDecl->getInterfaceType()->getInOutObjectType(); SILType selfTy = gen.getLoweredType(selfTyCan); // Emit the indirect return argument, if any. SILValue resultSlot; if (selfTy.isAddressOnly(gen.SGM.M) && gen.silConv.useLoweredAddresses()) { auto &AC = gen.getASTContext(); auto VD = new (AC) ParamDecl(/*IsLet*/ false, SourceLoc(), SourceLoc(), AC.getIdentifier("$return_value"), SourceLoc(), AC.getIdentifier("$return_value"), Type(), ctor); VD->setInterfaceType(selfIfaceTyCan); resultSlot = gen.F.begin()->createFunctionArgument(selfTy, VD); } // Emit the elementwise arguments. SmallVector<RValue, 4> elements; for (size_t i = 0, size = paramList->size(); i < size; ++i) { auto ¶m = paramList->get(i); elements.push_back( emitImplicitValueConstructorArg( gen, Loc, param->getInterfaceType()->getCanonicalType(), ctor)); } emitConstructorMetatypeArg(gen, ctor); auto *decl = selfTy.getStructOrBoundGenericStruct(); assert(decl && "not a struct?!"); // If we have an indirect return slot, initialize it in-place. if (resultSlot) { auto elti = elements.begin(), eltEnd = elements.end(); for (VarDecl *field : decl->getStoredProperties()) { auto fieldTy = selfTy.getFieldType(field, gen.SGM.M); auto &fieldTL = gen.getTypeLowering(fieldTy); SILValue slot = gen.B.createStructElementAddr(Loc, resultSlot, field, fieldTL.getLoweredType().getAddressType()); InitializationPtr init(new KnownAddressInitialization(slot)); // An initialized 'let' property has a single value specified by the // initializer - it doesn't come from an argument. if (!field->isStatic() && field->isLet() && field->getParentInitializer()) { #ifndef NDEBUG auto fieldTy = decl->getDeclContext()->mapTypeIntoContext( field->getInterfaceType()); assert(fieldTy->isEqual(field->getParentInitializer()->getType()) && "Checked by sema"); #endif // Cleanup after this initialization. FullExpr scope(gen.Cleanups, field->getParentPatternBinding()); gen.emitExprInto(field->getParentInitializer(), init.get()); continue; } assert(elti != eltEnd && "number of args does not match number of fields"); (void)eltEnd; std::move(*elti).forwardInto(gen, Loc, init.get()); ++elti; } gen.B.createReturn(ImplicitReturnLocation::getImplicitReturnLoc(Loc), gen.emitEmptyTuple(Loc)); return; } // Otherwise, build a struct value directly from the elements. SmallVector<SILValue, 4> eltValues; auto elti = elements.begin(), eltEnd = elements.end(); for (VarDecl *field : decl->getStoredProperties()) { auto fieldTy = selfTy.getFieldType(field, gen.SGM.M); SILValue v; // An initialized 'let' property has a single value specified by the // initializer - it doesn't come from an argument. if (!field->isStatic() && field->isLet() && field->getParentInitializer()) { // Cleanup after this initialization. FullExpr scope(gen.Cleanups, field->getParentPatternBinding()); v = gen.emitRValue(field->getParentInitializer()) .forwardAsSingleStorageValue(gen, fieldTy, Loc); } else { assert(elti != eltEnd && "number of args does not match number of fields"); (void)eltEnd; v = std::move(*elti).forwardAsSingleStorageValue(gen, fieldTy, Loc); ++elti; } eltValues.push_back(v); } SILValue selfValue = gen.B.createStruct(Loc, selfTy, eltValues); gen.B.createReturn(ImplicitReturnLocation::getImplicitReturnLoc(Loc), selfValue); return; }
/// Emit the language-specific data that _except_handler3 and 4 expect. This is /// functionally equivalent to the __C_specific_handler table, except it is /// indexed by state number instead of IP. void WinException::emitExceptHandlerTable(const MachineFunction *MF) { MCStreamer &OS = *Asm->OutStreamer; const Function *F = MF->getFunction(); StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName()); WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F); emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); // Emit the __ehtable label that we use for llvm.x86.seh.lsda. MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName); OS.EmitLabel(LSDALabel); const Function *Per = MMI->getPersonality(); StringRef PerName = Per->getName(); int BaseState = -1; if (PerName == "_except_handler4") { // The LSDA for _except_handler4 starts with this struct, followed by the // scope table: // // struct EH4ScopeTable { // int32_t GSCookieOffset; // int32_t GSCookieXOROffset; // int32_t EHCookieOffset; // int32_t EHCookieXOROffset; // ScopeTableEntry ScopeRecord[]; // }; // // Only the EHCookieOffset field appears to vary, and it appears to be the // offset from the final saved SP value to the retaddr. OS.EmitIntValue(-2, 4); OS.EmitIntValue(0, 4); // FIXME: Calculate. OS.EmitIntValue(9999, 4); OS.EmitIntValue(0, 4); BaseState = -2; } // Build a list of pointers to LandingPadInfos and then sort by WinEHState. const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); SmallVector<const LandingPadInfo *, 4> LPads; LPads.reserve((PadInfos.size())); for (const LandingPadInfo &LPInfo : PadInfos) LPads.push_back(&LPInfo); std::sort(LPads.begin(), LPads.end(), [](const LandingPadInfo *L, const LandingPadInfo *R) { return L->WinEHState < R->WinEHState; }); // For each action in each lpad, emit one of these: // struct ScopeTableEntry { // int32_t EnclosingLevel; // int32_t (__cdecl *Filter)(); // void *HandlerOrFinally; // }; // // The "outermost" action will use BaseState as its enclosing level. Each // other action will refer to the previous state as its enclosing level. int CurState = 0; for (const LandingPadInfo *LPInfo : LPads) { int EnclosingLevel = BaseState; assert(CurState + int(LPInfo->SEHHandlers.size()) - 1 == LPInfo->WinEHState && "gaps in the SEH scope table"); for (auto I = LPInfo->SEHHandlers.rbegin(), E = LPInfo->SEHHandlers.rend(); I != E; ++I) { const SEHHandler &Handler = *I; const BlockAddress *BA = Handler.RecoverBA; const Function *F = Handler.FilterOrFinally; assert(F && "cannot catch all in 32-bit SEH without filter function"); const MCExpr *FilterOrNull = create32bitRef(BA ? Asm->getSymbol(F) : nullptr); const MCExpr *ExceptOrFinally = create32bitRef( BA ? Asm->GetBlockAddressSymbol(BA) : Asm->getSymbol(F)); OS.EmitIntValue(EnclosingLevel, 4); OS.EmitValue(FilterOrNull, 4); OS.EmitValue(ExceptOrFinally, 4); // The next state unwinds to this state. EnclosingLevel = CurState; CurState++; } } }
Function* generateFunctionWrapperWithParams(const std::string& wrapper_name, Function* f, Module* mod, std::vector<const Type*>& additionalParams, const bool inlineCall) { assert (f && mod); assert (f->getParent()); if (f->getParent() != mod) { errs() << "WARNING: generateFunctionWrapper(): module '" << mod->getModuleIdentifier() << "' is not the parent of function '" << f->getNameStr() << "' (parent: '" << f->getParent()->getModuleIdentifier() << "')!\n"; } // first make sure there is no function with that name in mod // TODO: Implement logic from LLVM tutorial that checks for matching extern // declaration without body and, in this case, goes on. if (mod->getFunction(wrapper_name)) { errs() << "ERROR: generateFunctionWrapper(): Function with name '" << wrapper_name << "' already exists in module '" << mod->getModuleIdentifier() << "'!\n"; return NULL; } // warn if f has a return value if (!f->getReturnType()->isVoidTy()) { errs() << "WARNING: generateFunctionWrapper(): target function '" << f->getNameStr() << "' must not have a return type (ignored)!\n"; } LLVMContext& context = mod->getContext(); IRBuilder<> builder(context); // determine all arguments of f std::vector<const Argument*> oldArgs; std::vector<const Type*> oldArgTypes; for (Function::const_arg_iterator A=f->arg_begin(), AE=f->arg_end(); A!=AE; ++A) { oldArgs.push_back(A); oldArgTypes.push_back(A->getType()); } // create a struct type with a member for each argument const StructType* argStructType = StructType::get(context, oldArgTypes, false); // create function //const FunctionType* fType = TypeBuilder<void(void*), true>::get(context); std::vector<const Type*> params; params.push_back(PointerType::getUnqual(argStructType)); for (std::vector<const Type*>::const_iterator it=additionalParams.begin(), E=additionalParams.end(); it!=E; ++it) { params.push_back(*it); } const FunctionType* fType = FunctionType::get(Type::getVoidTy(context), params, false); Function* wrapper = Function::Create(fType, Function::ExternalLinkage, wrapper_name, mod); // set name of argument Argument* arg_str = wrapper->arg_begin(); arg_str->setName("arg_struct"); // create entry block BasicBlock* entryBB = BasicBlock::Create(context, "entry", wrapper); builder.SetInsertPoint(entryBB); // create extractions of arguments out of the struct SmallVector<Value*, 8> extractedArgs; for (unsigned i=0, e=oldArgTypes.size(); i<e; ++i) { // create GEP std::vector<Value*> indices; indices.push_back(Constant::getNullValue(Type::getInt32Ty(context))); // step through pointer indices.push_back(ConstantInt::get(context, APInt(32, i))); // index of argument Value* gep = builder.CreateGEP(arg_str, indices.begin(), indices.end(), ""); // create load LoadInst* load = builder.CreateLoad(gep, false, ""); // store as argument for call to f extractedArgs.push_back(load); } // create the call to f CallInst* call = builder.CreateCall(f, extractedArgs.begin(), extractedArgs.end(), ""); // the function returns void builder.CreateRetVoid(); //wrapper->addAttribute(0, Attribute::NoUnwind); // function does not unwind stack -> why is there an index required ??? wrapper->setDoesNotCapture(1, true); // arg ptr does not capture wrapper->setDoesNotAlias(1, true); // arg ptr does not alias // inline call if required if (inlineCall) { InlineFunctionInfo IFI(NULL, new TargetData(mod)); const bool success = InlineFunction(call, IFI); if (!success) { errs() << "WARNING: could not inline function call inside wrapper: " << *call << "\n"; } assert (success); } //verifyFunction(*wrapper, NULL); return wrapper; }
bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; // Do not convert small short loops to CTR loop. unsigned ConstTripCount = SE->getSmallConstantTripCount(L); if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { SmallPtrSet<const Value *, 32> EphValues; auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( *L->getHeader()->getParent()); CodeMetrics::collectEphemeralValues(L, &AC, EphValues); CodeMetrics Metrics; for (BasicBlock *BB : L->blocks()) Metrics.analyzeBasicBlock(BB, *TTI, EphValues); // 6 is an approximate latency for the mtctr instruction. if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) return false; } // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); LLVM_DEBUG(dbgs() << "Nested loop converted\n"); } // If a nested loop has been converted, then we can't convert this loop. if (MadeChange) return MadeChange; // Bail out if the loop has irreducible control flow. LoopBlocksRPO RPOT(L); RPOT.perform(LI); if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) return false; #ifndef NDEBUG // Stop trying after reaching the limit (if any). int Limit = CTRLoopLimit; if (Limit >= 0) { if (Counter >= CTRLoopLimit) return false; Counter++; } #endif // We don't want to spill/restore the counter register, and so we don't // want to use the counter register if the loop contains calls. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) if (mightUseCTR(*I)) return MadeChange; SmallVector<BasicBlock*, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); // If there is an exit edge known to be frequently taken, // we should not transform this loop. for (auto &BB : ExitingBlocks) { Instruction *TI = BB->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { uint64_t TrueWeight = 0, FalseWeight = 0; if (!BI->isConditional() || !BI->extractProfMetadata(TrueWeight, FalseWeight)) continue; // If the exit path is more frequent than the loop path, // we return here without further analysis for this loop. bool TrueIsExit = !L->contains(BI->getSuccessor(0)); if (( TrueIsExit && FalseWeight < TrueWeight) || (!TrueIsExit && FalseWeight > TrueWeight)) return MadeChange; } } BasicBlock *CountedExitBlock = nullptr; const SCEV *ExitCount = nullptr; BranchInst *CountedExitBranch = nullptr; for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block " << (*I)->getName() << ": " << *EC << "\n"); if (isa<SCEVCouldNotCompute>(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { if (ConstEC->getValue()->isZero()) continue; } else if (!SE->isLoopInvariant(EC, L)) continue; if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) continue; // If this exiting block is contained in a nested loop, it is not eligible // for insertion of the branch-and-decrement since the inner loop would // end up messing up the value in the CTR. if (LI->getLoopFor(*I) != L) continue; // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // existing block. // We need to make sure that this block will run on every loop iteration. // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; for (pred_iterator PI = pred_begin(L->getHeader()), PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { if (!L->contains(*PI)) continue; if (!DT->dominates(*I, *PI)) { NotAlways = true; break; } } if (NotAlways) continue; // Make sure this blocks ends with a conditional branch. Instruction *TI = (*I)->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (!BI->isConditional()) continue; CountedExitBranch = BI; } else continue; // Note that this block may not be the loop latch block, even if the loop // has a latch block. CountedExitBlock = *I; ExitCount = EC; break; } if (!CountedExitBlock) return MadeChange; BasicBlock *Preheader = L->getLoopPreheader(); // If we don't have a preheader, then insert one. If we already have a // preheader, then we can use it (except if the preheader contains a use of // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). if (!Preheader || mightUseCTR(Preheader)) Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (!Preheader) return MadeChange; LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); // Insert the count into the preheader and replace the condition used by the // selected branch. MadeChange = true; SCEVExpander SCEVE(*SE, *DL, "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); IRBuilder<> CountBuilder(Preheader->getTerminator()); Module *M = Preheader->getParent()->getParent(); Function *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType); CountBuilder.CreateCall(MTCTRFunc, ECValue); IRBuilder<> CondBuilder(CountedExitBranch); Function *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); Value *OldCond = CountedExitBranch->getCondition(); CountedExitBranch->setCondition(NewCond); // The false branch must exit the loop. if (!L->contains(CountedExitBranch->getSuccessor(0))) CountedExitBranch->swapSuccessors(); // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); // Run through the basic blocks of the loop and see if any of them have dead // PHIs that can be removed. for (auto I : L->blocks()) DeleteDeadPHIs(I); ++NumCTRLoops; return MadeChange; }