void OMPGenerator::extractValuesFromStruct(SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) { for (unsigned i = 0; i < OldValues.size(); i++) { Value *Address = Builder.CreateStructGEP(Struct, i); Value *NewValue = Builder.CreateLoad(Address); Map.insert(std::make_pair(OldValues[i], NewValue)); } }
void ParallelLoopGenerator::extractValuesFromStruct( SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueToValueMapTy &Map) { for (unsigned i = 0; i < OldValues.size(); i++) { Value *Address = Builder.CreateStructGEP(Ty, Struct, i); Value *NewValue = Builder.CreateLoad(Address); Map[OldValues[i]] = NewValue; } }
void ParallelLoopGenerator::extractValuesFromStruct( SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { for (unsigned i = 0; i < OldValues.size(); i++) { Value *Address = Builder.CreateStructGEP(Ty, Struct, i); Value *NewValue = Builder.CreateLoad(Address); NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); Map[OldValues[i]] = NewValue; } }
BitVector CodeGenRegBank::computeCoveredRegisters(ArrayRef<Record*> Regs) { SetVector<const CodeGenRegister*> Set; // First add Regs with all sub-registers. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { CodeGenRegister *Reg = getReg(Regs[i]); if (Set.insert(Reg)) // Reg is new, add all sub-registers. // The pre-ordering is not important here. Reg->addSubRegsPreOrder(Set, *this); } // Second, find all super-registers that are completely covered by the set. for (unsigned i = 0; i != Set.size(); ++i) { const CodeGenRegister::SuperRegList &SR = Set[i]->getSuperRegs(); for (unsigned j = 0, e = SR.size(); j != e; ++j) { const CodeGenRegister *Super = SR[j]; if (!Super->CoveredBySubRegs || Set.count(Super)) continue; // This new super-register is covered by its sub-registers. bool AllSubsInSet = true; const CodeGenRegister::SubRegMap &SRM = Super->getSubRegs(); for (CodeGenRegister::SubRegMap::const_iterator I = SRM.begin(), E = SRM.end(); I != E; ++I) if (!Set.count(I->second)) { AllSubsInSet = false; break; } // All sub-registers in Set, add Super as well. // We will visit Super later to recheck its super-registers. if (AllSubsInSet) Set.insert(Super); } } // Convert to BitVector. BitVector BV(Registers.size() + 1); for (unsigned i = 0, e = Set.size(); i != e; ++i) BV.set(Set[i]->EnumValue); return BV; }
SetVector<Value*> qdp_jit_vec::get_all_linked_stores_from_store( Value* V ) { bool all=false; SetVector<Value*> stores; stores.insert(V); while(!all) { SetVector<Value*> new_stores; new_stores = get_stores( get_loads( stores ) ); all = (new_stores.size() == stores.size()); stores = new_stores; } return stores; }
TEST(SetVector, EraseTest) { SetVector<int> S; S.insert(0); S.insert(1); S.insert(2); auto I = S.erase(std::next(S.begin())); // Test that the returned iterator is the expected one-after-erase // and the size/contents is the expected sequence {0, 2}. EXPECT_EQ(std::next(S.begin()), I); EXPECT_EQ(2u, S.size()); EXPECT_EQ(0, *S.begin()); EXPECT_EQ(2, *std::next(S.begin())); }
// If a linkonce global is present in the MustPreserveSymbols, we need to make // sure we honor this. To force the compiler to not drop it, we add it to the // "llvm.compiler.used" global. void LTOCodeGenerator::preserveDiscardableGVs( Module &TheModule, llvm::function_ref<bool(const GlobalValue &)> mustPreserveGV) { SetVector<Constant *> UsedValuesSet; if (GlobalVariable *LLVMUsed = TheModule.getGlobalVariable("llvm.compiler.used")) { ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); for (auto &V : Inits->operands()) UsedValuesSet.insert(cast<Constant>(&V)); LLVMUsed->eraseFromParent(); } llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(TheModule.getContext()); auto mayPreserveGlobal = [&](GlobalValue &GV) { if (!GV.isDiscardableIfUnused() || GV.isDeclaration()) return; if (!mustPreserveGV(GV)) return; if (GV.hasAvailableExternallyLinkage()) { emitWarning( (Twine("Linker asked to preserve available_externally global: '") + GV.getName() + "'").str()); return; } if (GV.hasInternalLinkage()) { emitWarning((Twine("Linker asked to preserve internal global: '") + GV.getName() + "'").str()); return; } UsedValuesSet.insert(ConstantExpr::getBitCast(&GV, i8PTy)); }; for (auto &GV : TheModule) mayPreserveGlobal(GV); for (auto &GV : TheModule.globals()) mayPreserveGlobal(GV); for (auto &GV : TheModule.aliases()) mayPreserveGlobal(GV); if (UsedValuesSet.empty()) return; llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, UsedValuesSet.size()); auto *LLVMUsed = new llvm::GlobalVariable( TheModule, ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, UsedValuesSet.getArrayRef()), "llvm.compiler.used"); LLVMUsed->setSection("llvm.metadata"); }
bool LiveRangeCalc::isJointlyDominated(const MachineBasicBlock *MBB, ArrayRef<SlotIndex> Defs, const SlotIndexes &Indexes) { const MachineFunction &MF = *MBB->getParent(); BitVector DefBlocks(MF.getNumBlockIDs()); for (SlotIndex I : Defs) DefBlocks.set(Indexes.getMBBFromIndex(I)->getNumber()); SetVector<unsigned> PredQueue; PredQueue.insert(MBB->getNumber()); for (unsigned i = 0; i != PredQueue.size(); ++i) { unsigned BN = PredQueue[i]; if (DefBlocks[BN]) return true; const MachineBasicBlock *B = MF.getBlockNumbered(BN); for (const MachineBasicBlock *P : B->predecessors()) PredQueue.insert(P->getNumber()); } return false; }
// // runMCDesc - Print out MC register descriptions. // void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, CodeGenRegBank &RegBank) { emitSourceFileHeader("MC Register Information", OS); OS << "\n#ifdef GET_REGINFO_MC_DESC\n"; OS << "#undef GET_REGINFO_MC_DESC\n"; const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters(); // The lists of sub-registers, super-registers, and overlaps all go in the // same array. That allows us to share suffixes. typedef std::vector<const CodeGenRegister*> RegVec; // Differentially encoded lists. SequenceToOffsetTable<DiffVec> DiffSeqs; SmallVector<DiffVec, 4> SubRegLists(Regs.size()); SmallVector<DiffVec, 4> SuperRegLists(Regs.size()); SmallVector<DiffVec, 4> OverlapLists(Regs.size()); SmallVector<DiffVec, 4> RegUnitLists(Regs.size()); SmallVector<unsigned, 4> RegUnitInitScale(Regs.size()); // Keep track of sub-register names as well. These are not differentially // encoded. typedef SmallVector<const CodeGenSubRegIndex*, 4> SubRegIdxVec; SequenceToOffsetTable<SubRegIdxVec> SubRegIdxSeqs; SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size()); SequenceToOffsetTable<std::string> RegStrings; // Precompute register lists for the SequenceToOffsetTable. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister *Reg = Regs[i]; RegStrings.add(Reg->getName()); // Compute the ordered sub-register list. SetVector<const CodeGenRegister*> SR; Reg->addSubRegsPreOrder(SR, RegBank); diffEncode(SubRegLists[i], Reg->EnumValue, SR.begin(), SR.end()); DiffSeqs.add(SubRegLists[i]); // Compute the corresponding sub-register indexes. SubRegIdxVec &SRIs = SubRegIdxLists[i]; for (unsigned j = 0, je = SR.size(); j != je; ++j) SRIs.push_back(Reg->getSubRegIndex(SR[j])); SubRegIdxSeqs.add(SRIs); // Super-registers are already computed. const RegVec &SuperRegList = Reg->getSuperRegs(); diffEncode(SuperRegLists[i], Reg->EnumValue, SuperRegList.begin(), SuperRegList.end()); DiffSeqs.add(SuperRegLists[i]); // The list of overlaps doesn't need to have any particular order, and Reg // itself must be omitted. DiffVec &OverlapList = OverlapLists[i]; CodeGenRegister::Set OSet; Reg->computeOverlaps(OSet, RegBank); OSet.erase(Reg); diffEncode(OverlapList, Reg->EnumValue, OSet.begin(), OSet.end()); DiffSeqs.add(OverlapList); // Differentially encode the register unit list, seeded by register number. // First compute a scale factor that allows more diff-lists to be reused: // // D0 -> (S0, S1) // D1 -> (S2, S3) // // A scale factor of 2 allows D0 and D1 to share a diff-list. The initial // value for the differential decoder is the register number multiplied by // the scale. // // Check the neighboring registers for arithmetic progressions. unsigned ScaleA = ~0u, ScaleB = ~0u; ArrayRef<unsigned> RUs = Reg->getNativeRegUnits(); if (i > 0 && Regs[i-1]->getNativeRegUnits().size() == RUs.size()) ScaleB = RUs.front() - Regs[i-1]->getNativeRegUnits().front(); if (i+1 != Regs.size() && Regs[i+1]->getNativeRegUnits().size() == RUs.size()) ScaleA = Regs[i+1]->getNativeRegUnits().front() - RUs.front(); unsigned Scale = std::min(ScaleB, ScaleA); // Default the scale to 0 if it can't be encoded in 4 bits. if (Scale >= 16) Scale = 0; RegUnitInitScale[i] = Scale; DiffSeqs.add(diffEncode(RegUnitLists[i], Scale * Reg->EnumValue, RUs)); } // Compute the final layout of the sequence table. DiffSeqs.layout(); SubRegIdxSeqs.layout(); OS << "namespace llvm {\n\n"; const std::string &TargetName = Target.getName(); // Emit the shared table of differential lists. OS << "extern const uint16_t " << TargetName << "RegDiffLists[] = {\n"; DiffSeqs.emit(OS, printDiff16); OS << "};\n\n"; // Emit the table of sub-register indexes. OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[] = {\n"; SubRegIdxSeqs.emit(OS, printSubRegIndex); OS << "};\n\n"; // Emit the string table. RegStrings.layout(); OS << "extern const char " << TargetName << "RegStrings[] = {\n"; RegStrings.emit(OS, printChar); OS << "};\n\n"; OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[] = { // Descriptors\n"; OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n"; // Emit the register descriptors now. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister *Reg = Regs[i]; OS << " { " << RegStrings.get(Reg->getName()) << ", " << DiffSeqs.get(OverlapLists[i]) << ", " << DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i]) << ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", " << (DiffSeqs.get(RegUnitLists[i])*16 + RegUnitInitScale[i]) << " },\n"; } OS << "};\n\n"; // End of register descriptors... // Emit the table of register unit roots. Each regunit has one or two root // registers. OS << "extern const uint16_t " << TargetName << "RegUnitRoots[][2] = {\n"; for (unsigned i = 0, e = RegBank.getNumNativeRegUnits(); i != e; ++i) { ArrayRef<const CodeGenRegister*> Roots = RegBank.getRegUnit(i).getRoots(); assert(!Roots.empty() && "All regunits must have a root register."); assert(Roots.size() <= 2 && "More than two roots not supported yet."); OS << " { " << getQualifiedName(Roots.front()->TheDef); for (unsigned r = 1; r != Roots.size(); ++r) OS << ", " << getQualifiedName(Roots[r]->TheDef); OS << " },\n"; } OS << "};\n\n"; ArrayRef<CodeGenRegisterClass*> RegisterClasses = RegBank.getRegClasses(); // Loop over all of the register classes... emitting each one. OS << "namespace { // Register classes...\n"; // Emit the register enum value arrays for each RegisterClass for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = *RegisterClasses[rc]; ArrayRef<Record*> Order = RC.getOrder(); // Give the register class a legal C name if it's anonymous. std::string Name = RC.getName(); // Emit the register list now. OS << " // " << Name << " Register Class...\n" << " const uint16_t " << Name << "[] = {\n "; for (unsigned i = 0, e = Order.size(); i != e; ++i) { Record *Reg = Order[i]; OS << getQualifiedName(Reg) << ", "; } OS << "\n };\n\n"; OS << " // " << Name << " Bit set.\n" << " const uint8_t " << Name << "Bits[] = {\n "; BitVectorEmitter BVE; for (unsigned i = 0, e = Order.size(); i != e; ++i) { Record *Reg = Order[i]; BVE.add(Target.getRegBank().getReg(Reg)->EnumValue); } BVE.print(OS); OS << "\n };\n\n"; } OS << "}\n\n"; OS << "extern const MCRegisterClass " << TargetName << "MCRegisterClasses[] = {\n"; for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = *RegisterClasses[rc]; // Asserts to make sure values will fit in table assuming types from // MCRegisterInfo.h assert((RC.SpillSize/8) <= 0xffff && "SpillSize too large."); assert((RC.SpillAlignment/8) <= 0xffff && "SpillAlignment too large."); assert(RC.CopyCost >= -128 && RC.CopyCost <= 127 && "Copy cost too large."); OS << " { " << '\"' << RC.getName() << "\", " << RC.getName() << ", " << RC.getName() << "Bits, " << RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), " << RC.getQualifiedName() + "RegClassID" << ", " << RC.SpillSize/8 << ", " << RC.SpillAlignment/8 << ", " << RC.CopyCost << ", " << RC.Allocatable << " },\n"; } OS << "};\n\n"; ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices(); EmitRegMappingTables(OS, Regs, false); // Emit Reg encoding table OS << "extern const uint16_t " << TargetName; OS << "RegEncodingTable[] = {\n"; // Add entry for NoRegister OS << " 0,\n"; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { Record *Reg = Regs[i]->TheDef; BitsInit *BI = Reg->getValueAsBitsInit("HWEncoding"); uint64_t Value = 0; for (unsigned b = 0, be = BI->getNumBits(); b != be; ++b) { if (BitInit *B = dynamic_cast<BitInit*>(BI->getBit(b))) Value |= (uint64_t)B->getValue() << b; } OS << " " << Value << ",\n"; } OS << "};\n"; // End of HW encoding table // MCRegisterInfo initialization routine. OS << "static inline void Init" << TargetName << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, " << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n" << " RI->InitMCRegisterInfo(" << TargetName << "RegDesc, " << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, " << RegisterClasses.size() << ", " << TargetName << "RegUnitRoots, " << RegBank.getNumNativeRegUnits() << ", " << TargetName << "RegDiffLists, " << TargetName << "RegStrings, " << TargetName << "SubRegIdxLists, " << SubRegIndices.size() << ",\n" << " " << TargetName << "RegEncodingTable);\n\n"; EmitRegMapping(OS, Regs, false); OS << "}\n\n"; OS << "} // End llvm namespace \n"; OS << "#endif // GET_REGINFO_MC_DESC\n\n"; }
NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA, bool FullChain, const RegisterSet &DefRRs) { SetVector<NodeId> DefQ; SetVector<NodeId> Owners; // The initial queue should not have reaching defs for shadows. The // whole point of a shadow is that it will have a reaching def that // is not aliased to the reaching defs of the related shadows. NodeId Start = RefA.Id; auto SNA = DFG.addr<RefNode*>(Start); if (NodeId RD = SNA.Addr->getReachingDef()) DefQ.insert(RD); // Collect all the reaching defs, going up until a phi node is encountered, // or there are no more reaching defs. From this set, the actual set of // reaching defs will be selected. // The traversal upwards must go on until a covering def is encountered. // It is possible that a collection of non-covering (individually) defs // will be sufficient, but keep going until a covering one is found. for (unsigned i = 0; i < DefQ.size(); ++i) { auto TA = DFG.addr<DefNode*>(DefQ[i]); if (TA.Addr->getFlags() & NodeAttrs::PhiRef) continue; // Stop at the covering/overwriting def of the initial register reference. RegisterRef RR = TA.Addr->getRegRef(); if (RAI.covers(RR, RefRR)) { uint16_t Flags = TA.Addr->getFlags(); if (!(Flags & NodeAttrs::Preserving)) continue; } // Get the next level of reaching defs. This will include multiple // reaching defs for shadows. for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA)) if (auto RD = NodeAddr<RefNode*>(S).Addr->getReachingDef()) DefQ.insert(RD); } // Remove all non-phi defs that are not aliased to RefRR, and collect // the owners of the remaining defs. SetVector<NodeId> Defs; for (auto N : DefQ) { auto TA = DFG.addr<DefNode*>(N); bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef; if (!IsPhi && !RAI.alias(RefRR, TA.Addr->getRegRef())) continue; Defs.insert(TA.Id); Owners.insert(TA.Addr->getOwner(DFG).Id); } // Return the MachineBasicBlock containing a given instruction. auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* { if (IA.Addr->getKind() == NodeAttrs::Stmt) return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent(); assert(IA.Addr->getKind() == NodeAttrs::Phi); NodeAddr<PhiNode*> PA = IA; NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG); return BA.Addr->getCode(); }; // Less(A,B) iff instruction A is further down in the dominator tree than B. auto Less = [&Block,this] (NodeId A, NodeId B) -> bool { if (A == B) return false; auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B); MachineBasicBlock *BA = Block(OA), *BB = Block(OB); if (BA != BB) return MDT.dominates(BB, BA); // They are in the same block. bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt; bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt; if (StmtA) { if (!StmtB) // OB is a phi and phis dominate statements. return true; auto CA = NodeAddr<StmtNode*>(OA).Addr->getCode(); auto CB = NodeAddr<StmtNode*>(OB).Addr->getCode(); // The order must be linear, so tie-break such equalities. if (CA == CB) return A < B; return MDT.dominates(CB, CA); } else { // OA is a phi. if (StmtB) return false; // Both are phis. There is no ordering between phis (in terms of // the data-flow), so tie-break this via node id comparison. return A < B; } }; std::vector<NodeId> Tmp(Owners.begin(), Owners.end()); std::sort(Tmp.begin(), Tmp.end(), Less); // The vector is a list of instructions, so that defs coming from // the same instruction don't need to be artificially ordered. // Then, when computing the initial segment, and iterating over an // instruction, pick the defs that contribute to the covering (i.e. is // not covered by previously added defs). Check the defs individually, // i.e. first check each def if is covered or not (without adding them // to the tracking set), and then add all the selected ones. // The reason for this is this example: // *d1<A>, *d2<B>, ... Assume A and B are aliased (can happen in phi nodes). // *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be // covered if we added A first, and A would be covered // if we added B first. NodeList RDefs; RegisterSet RRs = DefRRs; auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool { return TA.Addr->getKind() == NodeAttrs::Def && Defs.count(TA.Id); }; for (auto T : Tmp) { if (!FullChain && RAI.covers(RRs, RefRR)) break; auto TA = DFG.addr<InstrNode*>(T); bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA); NodeList Ds; for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) { auto QR = DA.Addr->getRegRef(); // Add phi defs even if they are covered by subsequent defs. This is // for cases where the reached use is not covered by any of the defs // encountered so far: the phi def is needed to expose the liveness // of that use to the entry of the block. // Example: // phi d1<R3>(,d2,), ... Phi def d1 is covered by d2. // d2<R3>(d1,,u3), ... // ..., u3<D1>(d2) This use needs to be live on entry. if (FullChain || IsPhi || !RAI.covers(RRs, QR)) Ds.push_back(DA); } RDefs.insert(RDefs.end(), Ds.begin(), Ds.end()); for (NodeAddr<DefNode*> DA : Ds) { // When collecting a full chain of definitions, do not consider phi // defs to actually define a register. uint16_t Flags = DA.Addr->getFlags(); if (!FullChain || !(Flags & NodeAttrs::PhiRef)) if (!(Flags & NodeAttrs::Preserving)) RRs.insert(DA.Addr->getRegRef()); } } return RDefs; }
void Liveness::computePhiInfo() { RealUseMap.clear(); NodeList Phis; NodeAddr<FuncNode*> FA = DFG.getFunc(); auto Blocks = FA.Addr->members(DFG); for (NodeAddr<BlockNode*> BA : Blocks) { auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); Phis.insert(Phis.end(), Ps.begin(), Ps.end()); } // phi use -> (map: reaching phi -> set of registers defined in between) std::map<NodeId,std::map<NodeId,RegisterSet>> PhiUp; std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation. // Go over all phis. for (NodeAddr<PhiNode*> PhiA : Phis) { // Go over all defs and collect the reached uses that are non-phi uses // (i.e. the "real uses"). auto &RealUses = RealUseMap[PhiA.Id]; auto PhiRefs = PhiA.Addr->members(DFG); // Have a work queue of defs whose reached uses need to be found. // For each def, add to the queue all reached (non-phi) defs. SetVector<NodeId> DefQ; NodeSet PhiDefs; for (auto R : PhiRefs) { if (!DFG.IsRef<NodeAttrs::Def>(R)) continue; DefQ.insert(R.Id); PhiDefs.insert(R.Id); } for (unsigned i = 0; i < DefQ.size(); ++i) { NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]); NodeId UN = DA.Addr->getReachedUse(); while (UN != 0) { NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN); if (!(A.Addr->getFlags() & NodeAttrs::PhiRef)) RealUses[getRestrictedRegRef(A)].insert(A.Id); UN = A.Addr->getSibling(); } NodeId DN = DA.Addr->getReachedDef(); while (DN != 0) { NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN); for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) { uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags(); // Must traverse the reached-def chain. Consider: // def(D0) -> def(R0) -> def(R0) -> use(D0) // The reachable use of D0 passes through a def of R0. if (!(Flags & NodeAttrs::PhiRef)) DefQ.insert(T.Id); } DN = A.Addr->getSibling(); } } // Filter out these uses that appear to be reachable, but really // are not. For example: // // R1:0 = d1 // = R1:0 u2 Reached by d1. // R0 = d3 // = R1:0 u4 Still reached by d1: indirectly through // the def d3. // R1 = d5 // = R1:0 u6 Not reached by d1 (covered collectively // by d3 and d5), but following reached // defs and uses from d1 will lead here. auto HasDef = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool { return PhiDefs.count(DA.Id); }; for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) { // For each reached register UI->first, there is a set UI->second, of // uses of it. For each such use, check if it is reached by this phi, // i.e. check if the set of its reaching uses intersects the set of // this phi's defs. auto &Uses = UI->second; for (auto I = Uses.begin(), E = Uses.end(); I != E; ) { auto UA = DFG.addr<UseNode*>(*I); NodeList RDs = getAllReachingDefs(UI->first, UA); if (std::any_of(RDs.begin(), RDs.end(), HasDef)) ++I; else I = Uses.erase(I); } if (Uses.empty()) UI = RealUses.erase(UI); else ++UI; } // If this phi reaches some "real" uses, add it to the queue for upward // propagation. if (!RealUses.empty()) PhiUQ.push_back(PhiA.Id); // Go over all phi uses and check if the reaching def is another phi. // Collect the phis that are among the reaching defs of these uses. // While traversing the list of reaching defs for each phi use, collect // the set of registers defined between this phi (Phi) and the owner phi // of the reaching def. for (auto I : PhiRefs) { if (!DFG.IsRef<NodeAttrs::Use>(I)) continue; NodeAddr<UseNode*> UA = I; auto &UpMap = PhiUp[UA.Id]; RegisterSet DefRRs; for (NodeAddr<DefNode*> DA : getAllReachingDefs(UA)) { if (DA.Addr->getFlags() & NodeAttrs::PhiRef) UpMap[DA.Addr->getOwner(DFG).Id] = DefRRs; else DefRRs.insert(DA.Addr->getRegRef()); } } } if (Trace) { dbgs() << "Phi-up-to-phi map:\n"; for (auto I : PhiUp) { dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {"; for (auto R : I.second) dbgs() << ' ' << Print<NodeId>(R.first, DFG) << Print<RegisterSet>(R.second, DFG); dbgs() << " }\n"; } } // Propagate the reached registers up in the phi chain. // // The following type of situation needs careful handling: // // phi d1<R1:0> (1) // | // ... d2<R1> // | // phi u3<R1:0> (2) // | // ... u4<R1> // // The phi node (2) defines a register pair R1:0, and reaches a "real" // use u4 of just R1. The same phi node is also known to reach (upwards) // the phi node (1). However, the use u4 is not reached by phi (1), // because of the intervening definition d2 of R1. The data flow between // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0. // // When propagating uses up the phi chains, get the all reaching defs // for a given phi use, and traverse the list until the propagated ref // is covered, or until or until reaching the final phi. Only assume // that the reference reaches the phi in the latter case. for (unsigned i = 0; i < PhiUQ.size(); ++i) { auto PA = DFG.addr<PhiNode*>(PhiUQ[i]); auto &RealUses = RealUseMap[PA.Id]; for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) { NodeAddr<UseNode*> UA = U; auto &UpPhis = PhiUp[UA.Id]; for (auto UP : UpPhis) { bool Changed = false; auto &MidDefs = UP.second; // Collect the set UpReached of uses that are reached by the current // phi PA, and are not covered by any intervening def between PA and // the upward phi UP. RegisterSet UpReached; for (auto T : RealUses) { if (!isRestricted(PA, UA, T.first)) continue; if (!RAI.covers(MidDefs, T.first)) UpReached.insert(T.first); } if (UpReached.empty()) continue; // Update the set PRUs of real uses reached by the upward phi UP with // the actual set of uses (UpReached) that the UP phi reaches. auto &PRUs = RealUseMap[UP.first]; for (auto R : UpReached) { unsigned Z = PRUs[R].size(); PRUs[R].insert(RealUses[R].begin(), RealUses[R].end()); Changed |= (PRUs[R].size() != Z); } if (Changed) PhiUQ.push_back(UP.first); } } } if (Trace) { dbgs() << "Real use map:\n"; for (auto I : RealUseMap) { dbgs() << "phi " << Print<NodeId>(I.first, DFG); NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first); NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG); if (!Ds.empty()) { RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(); dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>'; } else { dbgs() << "<noreg>"; } dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n'; } } }
// // runMCDesc - Print out MC register descriptions. // void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, CodeGenRegBank &RegBank) { EmitSourceFileHeader("MC Register Information", OS); OS << "\n#ifdef GET_REGINFO_MC_DESC\n"; OS << "#undef GET_REGINFO_MC_DESC\n"; std::map<const CodeGenRegister*, CodeGenRegister::Set> Overlaps; RegBank.computeOverlaps(Overlaps); OS << "namespace llvm {\n\n"; const std::string &TargetName = Target.getName(); OS << "\nnamespace {\n"; const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters(); // Emit an overlap list for all registers. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister *Reg = Regs[i]; const CodeGenRegister::Set &O = Overlaps[Reg]; // Move Reg to the front so TRI::getAliasSet can share the list. OS << " const unsigned " << Reg->getName() << "_Overlaps[] = { " << getQualifiedName(Reg->TheDef) << ", "; for (CodeGenRegister::Set::const_iterator I = O.begin(), E = O.end(); I != E; ++I) if (*I != Reg) OS << getQualifiedName((*I)->TheDef) << ", "; OS << "0 };\n"; } // Emit the empty sub-registers list OS << " const unsigned Empty_SubRegsSet[] = { 0 };\n"; // Loop over all of the registers which have sub-registers, emitting the // sub-registers list to memory. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister &Reg = *Regs[i]; if (Reg.getSubRegs().empty()) continue; // getSubRegs() orders by SubRegIndex. We want a topological order. SetVector<CodeGenRegister*> SR; Reg.addSubRegsPreOrder(SR); OS << " const unsigned " << Reg.getName() << "_SubRegsSet[] = { "; for (unsigned j = 0, je = SR.size(); j != je; ++j) OS << getQualifiedName(SR[j]->TheDef) << ", "; OS << "0 };\n"; } // Emit the empty super-registers list OS << " const unsigned Empty_SuperRegsSet[] = { 0 };\n"; // Loop over all of the registers which have super-registers, emitting the // super-registers list to memory. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister &Reg = *Regs[i]; const CodeGenRegister::SuperRegList &SR = Reg.getSuperRegs(); if (SR.empty()) continue; OS << " const unsigned " << Reg.getName() << "_SuperRegsSet[] = { "; for (unsigned j = 0, je = SR.size(); j != je; ++j) OS << getQualifiedName(SR[j]->TheDef) << ", "; OS << "0 };\n"; } OS << "}\n"; // End of anonymous namespace... OS << "\nextern const MCRegisterDesc " << TargetName << "RegDesc[] = { // Descriptors\n"; OS << " { \"NOREG\",\t0,\t0,\t0 },\n"; // Now that register alias and sub-registers sets have been emitted, emit the // register descriptors now. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister &Reg = *Regs[i]; OS << " { \""; OS << Reg.getName() << "\",\t" << Reg.getName() << "_Overlaps,\t"; if (!Reg.getSubRegs().empty()) OS << Reg.getName() << "_SubRegsSet,\t"; else OS << "Empty_SubRegsSet,\t"; if (!Reg.getSuperRegs().empty()) OS << Reg.getName() << "_SuperRegsSet"; else OS << "Empty_SuperRegsSet"; OS << " },\n"; } OS << "};\n\n"; // End of register descriptors... ArrayRef<CodeGenRegisterClass*> RegisterClasses = RegBank.getRegClasses(); // Loop over all of the register classes... emitting each one. OS << "namespace { // Register classes...\n"; // Emit the register enum value arrays for each RegisterClass for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = *RegisterClasses[rc]; ArrayRef<Record*> Order = RC.getOrder(); // Give the register class a legal C name if it's anonymous. std::string Name = RC.getName(); // Emit the register list now. OS << " // " << Name << " Register Class...\n" << " static const unsigned " << Name << "[] = {\n "; for (unsigned i = 0, e = Order.size(); i != e; ++i) { Record *Reg = Order[i]; OS << getQualifiedName(Reg) << ", "; } OS << "\n };\n\n"; OS << " // " << Name << " Bit set.\n" << " static const unsigned char " << Name << "Bits[] = {\n "; BitVectorEmitter BVE; for (unsigned i = 0, e = Order.size(); i != e; ++i) { Record *Reg = Order[i]; BVE.add(Target.getRegBank().getReg(Reg)->EnumValue); } BVE.print(OS); OS << "\n };\n\n"; } OS << "}\n\n"; OS << "extern const MCRegisterClass " << TargetName << "MCRegisterClasses[] = {\n"; for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = *RegisterClasses[rc]; OS << " MCRegisterClass(" << RC.getQualifiedName() + "RegClassID" << ", " << '\"' << RC.getName() << "\", " << RC.SpillSize/8 << ", " << RC.SpillAlignment/8 << ", " << RC.CopyCost << ", " << RC.Allocatable << ", " << RC.getName() << ", " << RC.getName() << " + " << RC.getOrder().size() << ", " << RC.getName() << "Bits, sizeof(" << RC.getName() << "Bits)" << "),\n"; } OS << "};\n\n"; // MCRegisterInfo initialization routine. OS << "static inline void Init" << TargetName << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, " << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n"; OS << " RI->InitMCRegisterInfo(" << TargetName << "RegDesc, " << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, " << RegisterClasses.size() << ");\n\n"; EmitRegMapping(OS, Regs, false); OS << "}\n\n"; OS << "} // End llvm namespace \n"; OS << "#endif // GET_REGINFO_MC_DESC\n\n"; }
int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(argv[0]); PrettyStackTraceProgram X(argc, argv); LLVMContext Context; llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n"); // Use lazy loading, since we only care about selected global values. SMDiagnostic Err; std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context); if (!M.get()) { Err.print(argv[0], errs()); return 1; } // Use SetVector to avoid duplicates. SetVector<GlobalValue *> GVs; // Figure out which aliases we should extract. for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) { GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]); if (!GA) { errs() << argv[0] << ": program doesn't contain alias named '" << ExtractAliases[i] << "'!\n"; return 1; } GVs.insert(GA); } // Extract aliases via regular expression matching. for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { std::string Error; Regex RegEx(ExtractRegExpAliases[i]); if (!RegEx.isValid(Error)) { errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " "invalid regex: " << Error; } bool match = false; for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); GA != E; GA++) { if (RegEx.match(GA->getName())) { GVs.insert(&*GA); match = true; } } if (!match) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractRegExpAliases[i] << "'!\n"; return 1; } } // Figure out which globals we should extract. for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]); if (!GV) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractGlobals[i] << "'!\n"; return 1; } GVs.insert(GV); } // Extract globals via regular expression matching. for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { std::string Error; Regex RegEx(ExtractRegExpGlobals[i]); if (!RegEx.isValid(Error)) { errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " "invalid regex: " << Error; } bool match = false; for (auto &GV : M->globals()) { if (RegEx.match(GV.getName())) { GVs.insert(&GV); match = true; } } if (!match) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractRegExpGlobals[i] << "'!\n"; return 1; } } // Figure out which functions we should extract. for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { GlobalValue *GV = M->getFunction(ExtractFuncs[i]); if (!GV) { errs() << argv[0] << ": program doesn't contain function named '" << ExtractFuncs[i] << "'!\n"; return 1; } GVs.insert(GV); } // Extract functions via regular expression matching. for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { std::string Error; StringRef RegExStr = ExtractRegExpFuncs[i]; Regex RegEx(RegExStr); if (!RegEx.isValid(Error)) { errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " "invalid regex: " << Error; } bool match = false; for (Module::iterator F = M->begin(), E = M->end(); F != E; F++) { if (RegEx.match(F->getName())) { GVs.insert(&*F); match = true; } } if (!match) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractRegExpFuncs[i] << "'!\n"; return 1; } } auto Materialize = [&](GlobalValue &GV) { if (std::error_code EC = GV.materialize()) { errs() << argv[0] << ": error reading input: " << EC.message() << "\n"; exit(1); } }; // Materialize requisite global values. if (!DeleteFn) { for (size_t i = 0, e = GVs.size(); i != e; ++i) Materialize(*GVs[i]); } else { // Deleting. Materialize every GV that's *not* in GVs. SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end()); for (auto &F : *M) { if (!GVSet.count(&F)) Materialize(F); } } { std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end()); legacy::PassManager Extract; Extract.add(createGVExtractionPass(Gvs, DeleteFn)); Extract.run(*M); // Now that we have all the GVs we want, mark the module as fully // materialized. // FIXME: should the GVExtractionPass handle this? M->materializeAll(); } // In addition to deleting all other functions, we also want to spiff it // up a little bit. Do this now. legacy::PassManager Passes; if (!DeleteFn) Passes.add(createGlobalDCEPass()); // Delete unreachable globals Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls std::error_code EC; tool_output_file Out(OutputFilename, EC, sys::fs::F_None); if (EC) { errs() << EC.message() << '\n'; return 1; } if (OutputAssembly) Passes.add( createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder)); else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true)) Passes.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder)); Passes.run(*M.get()); // Declare success. Out.keep(); return 0; }
int main(int argc, char **argv) { InitLLVM X(argc, argv); LLVMContext Context; cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n"); // Use lazy loading, since we only care about selected global values. SMDiagnostic Err; std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context); if (!M.get()) { Err.print(argv[0], errs()); return 1; } // Use SetVector to avoid duplicates. SetVector<GlobalValue *> GVs; // Figure out which aliases we should extract. for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) { GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]); if (!GA) { errs() << argv[0] << ": program doesn't contain alias named '" << ExtractAliases[i] << "'!\n"; return 1; } GVs.insert(GA); } // Extract aliases via regular expression matching. for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { std::string Error; Regex RegEx(ExtractRegExpAliases[i]); if (!RegEx.isValid(Error)) { errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " "invalid regex: " << Error; } bool match = false; for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); GA != E; GA++) { if (RegEx.match(GA->getName())) { GVs.insert(&*GA); match = true; } } if (!match) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractRegExpAliases[i] << "'!\n"; return 1; } } // Figure out which globals we should extract. for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]); if (!GV) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractGlobals[i] << "'!\n"; return 1; } GVs.insert(GV); } // Extract globals via regular expression matching. for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { std::string Error; Regex RegEx(ExtractRegExpGlobals[i]); if (!RegEx.isValid(Error)) { errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " "invalid regex: " << Error; } bool match = false; for (auto &GV : M->globals()) { if (RegEx.match(GV.getName())) { GVs.insert(&GV); match = true; } } if (!match) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractRegExpGlobals[i] << "'!\n"; return 1; } } // Figure out which functions we should extract. for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { GlobalValue *GV = M->getFunction(ExtractFuncs[i]); if (!GV) { errs() << argv[0] << ": program doesn't contain function named '" << ExtractFuncs[i] << "'!\n"; return 1; } GVs.insert(GV); } // Extract functions via regular expression matching. for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { std::string Error; StringRef RegExStr = ExtractRegExpFuncs[i]; Regex RegEx(RegExStr); if (!RegEx.isValid(Error)) { errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " "invalid regex: " << Error; } bool match = false; for (Module::iterator F = M->begin(), E = M->end(); F != E; F++) { if (RegEx.match(F->getName())) { GVs.insert(&*F); match = true; } } if (!match) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractRegExpFuncs[i] << "'!\n"; return 1; } } // Figure out which BasicBlocks we should extract. SmallVector<BasicBlock *, 4> BBs; for (StringRef StrPair : ExtractBlocks) { auto BBInfo = StrPair.split(':'); // Get the function. Function *F = M->getFunction(BBInfo.first); if (!F) { errs() << argv[0] << ": program doesn't contain a function named '" << BBInfo.first << "'!\n"; return 1; } // Do not materialize this function. GVs.insert(F); // Get the basic block. auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { return BB.getName().equals(BBInfo.second); }); if (Res == F->end()) { errs() << argv[0] << ": function " << F->getName() << " doesn't contain a basic block named '" << BBInfo.second << "'!\n"; return 1; } BBs.push_back(&*Res); } // Use *argv instead of argv[0] to work around a wrong GCC warning. ExitOnError ExitOnErr(std::string(*argv) + ": error reading input: "); if (Recursive) { std::vector<llvm::Function *> Workqueue; for (GlobalValue *GV : GVs) { if (auto *F = dyn_cast<Function>(GV)) { Workqueue.push_back(F); } } while (!Workqueue.empty()) { Function *F = &*Workqueue.back(); Workqueue.pop_back(); ExitOnErr(F->materialize()); for (auto &BB : *F) { for (auto &I : BB) { auto *CI = dyn_cast<CallInst>(&I); if (!CI) continue; Function *CF = CI->getCalledFunction(); if (!CF) continue; if (CF->isDeclaration() || GVs.count(CF)) continue; GVs.insert(CF); Workqueue.push_back(CF); } } } } auto Materialize = [&](GlobalValue &GV) { ExitOnErr(GV.materialize()); }; // Materialize requisite global values. if (!DeleteFn) { for (size_t i = 0, e = GVs.size(); i != e; ++i) Materialize(*GVs[i]); } else { // Deleting. Materialize every GV that's *not* in GVs. SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end()); for (auto &F : *M) { if (!GVSet.count(&F)) Materialize(F); } } { std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end()); legacy::PassManager Extract; Extract.add(createGVExtractionPass(Gvs, DeleteFn)); Extract.run(*M); // Now that we have all the GVs we want, mark the module as fully // materialized. // FIXME: should the GVExtractionPass handle this? ExitOnErr(M->materializeAll()); } // Extract the specified basic blocks from the module and erase the existing // functions. if (!ExtractBlocks.empty()) { legacy::PassManager PM; PM.add(createBlockExtractorPass(BBs, true)); PM.run(*M); } // In addition to deleting all other functions, we also want to spiff it // up a little bit. Do this now. legacy::PassManager Passes; if (!DeleteFn) Passes.add(createGlobalDCEPass()); // Delete unreachable globals Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls std::error_code EC; ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None); if (EC) { errs() << EC.message() << '\n'; return 1; } if (OutputAssembly) Passes.add( createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder)); else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true)) Passes.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder)); Passes.run(*M.get()); // Declare success. Out.keep(); return 0; }
int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(); PrettyStackTraceProgram X(argc, argv); LLVMContext &Context = getGlobalContext(); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n"); // Use lazy loading, since we only care about selected global values. SMDiagnostic Err; std::auto_ptr<Module> M; M.reset(getLazyIRFileModule(InputFilename, Err, Context)); if (M.get() == 0) { Err.print(argv[0], errs()); return 1; } // Use SetVector to avoid duplicates. SetVector<GlobalValue *> GVs; // Figure out which globals we should extract. for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { GlobalValue *GV = M.get()->getNamedGlobal(ExtractGlobals[i]); if (!GV) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractGlobals[i] << "'!\n"; return 1; } GVs.insert(GV); } // Extract globals via regular expression matching. for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { std::string Error; Regex RegEx(ExtractRegExpGlobals[i]); if (!RegEx.isValid(Error)) { errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " "invalid regex: " << Error; } bool match = false; for (Module::global_iterator GV = M.get()->global_begin(), E = M.get()->global_end(); GV != E; GV++) { if (RegEx.match(GV->getName())) { GVs.insert(&*GV); match = true; } } if (!match) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractRegExpGlobals[i] << "'!\n"; return 1; } } // Figure out which functions we should extract. for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { GlobalValue *GV = M.get()->getFunction(ExtractFuncs[i]); if (!GV) { errs() << argv[0] << ": program doesn't contain function named '" << ExtractFuncs[i] << "'!\n"; return 1; } GVs.insert(GV); } // Extract functions via regular expression matching. for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { std::string Error; StringRef RegExStr = ExtractRegExpFuncs[i]; Regex RegEx(RegExStr); if (!RegEx.isValid(Error)) { errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " "invalid regex: " << Error; } bool match = false; for (Module::iterator F = M.get()->begin(), E = M.get()->end(); F != E; F++) { if (RegEx.match(F->getName())) { GVs.insert(&*F); match = true; } } if (!match) { errs() << argv[0] << ": program doesn't contain global named '" << ExtractRegExpFuncs[i] << "'!\n"; return 1; } } // Materialize requisite global values. if (!DeleteFn) for (size_t i = 0, e = GVs.size(); i != e; ++i) { GlobalValue *GV = GVs[i]; if (GV->isMaterializable()) { std::string ErrInfo; if (GV->Materialize(&ErrInfo)) { errs() << argv[0] << ": error reading input: " << ErrInfo << "\n"; return 1; } } } else { // Deleting. Materialize every GV that's *not* in GVs. SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end()); for (Module::global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { GlobalVariable *G = I; if (!GVSet.count(G) && G->isMaterializable()) { std::string ErrInfo; if (G->Materialize(&ErrInfo)) { errs() << argv[0] << ": error reading input: " << ErrInfo << "\n"; return 1; } } } for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) { Function *F = I; if (!GVSet.count(F) && F->isMaterializable()) { std::string ErrInfo; if (F->Materialize(&ErrInfo)) { errs() << argv[0] << ": error reading input: " << ErrInfo << "\n"; return 1; } } } } // In addition to deleting all other functions, we also want to spiff it // up a little bit. Do this now. PassManager Passes; Passes.add(new TargetData(M.get())); // Use correct TargetData std::vector<GlobalValue*> Gvs(GVs.begin(), GVs.end()); Passes.add(createGVExtractionPass(Gvs, DeleteFn)); if (!DeleteFn) Passes.add(createGlobalDCEPass()); // Delete unreachable globals Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls std::string ErrorInfo; tool_output_file Out(OutputFilename.c_str(), ErrorInfo, raw_fd_ostream::F_Binary); if (!ErrorInfo.empty()) { errs() << ErrorInfo << '\n'; return 1; } if (OutputAssembly) Passes.add(createPrintModulePass(&Out.os())); else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true)) Passes.add(createBitcodeWriterPass(Out.os())); Passes.run(*M.get()); // Declare success. Out.keep(); return 0; }
void ADFun<Base,RecBase>::ForSparseHesCase( const std::set<size_t>& set_type , const SetVector& r , const SetVector& s , SetVector& h ) { // used to identify the RecBase type in calls to sweeps RecBase not_used_rec_base; // size_t n = Domain(); # ifndef NDEBUG size_t m = Range(); # endif std::set<size_t>::const_iterator itr_1; // // check SetVector is Simple Vector class with sets for elements CheckSimpleVector<std::set<size_t>, SetVector>( local::one_element_std_set<size_t>(), local::two_element_std_set<size_t>() ); CPPAD_ASSERT_KNOWN( r.size() == 1, "ForSparseHes: size of s is not equal to one." ); CPPAD_ASSERT_KNOWN( s.size() == 1, "ForSparseHes: size of s is not equal to one." ); // // sparsity pattern corresponding to r local::sparse_list for_jac_pattern; for_jac_pattern.resize(num_var_tape_, n + 1); itr_1 = r[0].begin(); while( itr_1 != r[0].end() ) { size_t i = *itr_1++; CPPAD_ASSERT_UNKNOWN( ind_taddr_[i] < n + 1 ); // ind_taddr_[i] is operator taddr for i-th independent variable CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[i] ) == local::InvOp ); // // Use add_element when only adding one element per set is added. for_jac_pattern.add_element( ind_taddr_[i], ind_taddr_[i] ); } // compute forward Jacobiain sparsity pattern bool dependency = false; local::sweep::for_jac<addr_t>( &play_, dependency, n, num_var_tape_, for_jac_pattern, not_used_rec_base ); // sparsity pattern correspnding to s local::sparse_list rev_jac_pattern; rev_jac_pattern.resize(num_var_tape_, 1); itr_1 = s[0].begin(); while( itr_1 != s[0].end() ) { size_t i = *itr_1++; CPPAD_ASSERT_KNOWN( i < m, "ForSparseHes: an element of the set s[0] has value " "greater than or equal m" ); CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < num_var_tape_ ); // // Use add_element when only adding one element per set is added. rev_jac_pattern.add_element( dep_taddr_[i], 0); } // // compute reverse sparsity pattern for dependency analysis // (note that we are only want non-zero derivatives not true dependency) local::sweep::rev_jac<addr_t>( &play_, dependency, n, num_var_tape_, rev_jac_pattern, not_used_rec_base ); // // vector of sets that will hold reverse Hessain values local::sparse_list for_hes_pattern; for_hes_pattern.resize(n+1, n+1); // // compute the Hessian sparsity patterns local::sweep::for_hes<addr_t>( &play_, n, num_var_tape_, for_jac_pattern, rev_jac_pattern, for_hes_pattern, not_used_rec_base ); // return values corresponding to independent variables // j is index corresponding to reverse mode partial h.resize(n); CPPAD_ASSERT_UNKNOWN( for_hes_pattern.end() == n+1 ); for(size_t i = 0; i < n; i++) { CPPAD_ASSERT_UNKNOWN( ind_taddr_[i] == i + 1 ); CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[i] ) == local::InvOp ); // extract the result from for_hes_pattern local::sparse_list::const_iterator itr_2(for_hes_pattern, ind_taddr_[i] ); size_t j = *itr_2; while( j < for_hes_pattern.end() ) { CPPAD_ASSERT_UNKNOWN( 0 < j ) h[i].insert(j-1); j = *(++itr_2); } } }
void ADFun<Base,RecBase>::ForSparseHesCase( bool set_type , const SetVector& r , const SetVector& s , SetVector& h ) { // used to identify the RecBase type in calls to sweeps RecBase not_used_rec_base; // size_t n = Domain(); size_t m = Range(); // // check Vector is Simple SetVector class with bool elements CheckSimpleVector<bool, SetVector>(); // CPPAD_ASSERT_KNOWN( size_t(r.size()) == n, "ForSparseHes: size of r is not equal to\n" "domain dimension for ADFun object." ); CPPAD_ASSERT_KNOWN( size_t(s.size()) == m, "ForSparseHes: size of s is not equal to\n" "range dimension for ADFun object." ); // // sparsity pattern corresponding to r local::sparse_pack for_jac_pattern; for_jac_pattern.resize(num_var_tape_, n + 1); for(size_t i = 0; i < n; i++) { CPPAD_ASSERT_UNKNOWN( ind_taddr_[i] < n + 1 ); // ind_taddr_[i] is operator taddr for i-th independent variable CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[i] ) == local::InvOp ); // // Use add_element when only adding one element per set is added. if( r[i] ) for_jac_pattern.add_element( ind_taddr_[i], ind_taddr_[i] ); } // compute forward Jacobiain sparsity pattern bool dependency = false; local::sweep::for_jac<addr_t>( &play_, dependency, n, num_var_tape_, for_jac_pattern, not_used_rec_base ); // sparsity pattern correspnding to s local::sparse_pack rev_jac_pattern; rev_jac_pattern.resize(num_var_tape_, 1); for(size_t i = 0; i < m; i++) { CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < num_var_tape_ ); // // Use add_element when only adding one element per set is added. if( s[i] ) rev_jac_pattern.add_element( dep_taddr_[i], 0); } // compute reverse sparsity pattern for dependency analysis // (note that we are only want non-zero derivatives not true dependency) local::sweep::rev_jac<addr_t>( &play_, dependency, n, num_var_tape_, rev_jac_pattern, not_used_rec_base ); // vector of sets that will hold the forward Hessain values local::sparse_pack for_hes_pattern; for_hes_pattern.resize(n+1, n+1); // // compute the Hessian sparsity patterns local::sweep::for_hes<addr_t>( &play_, n, num_var_tape_, for_jac_pattern, rev_jac_pattern, for_hes_pattern, not_used_rec_base ); // initialize return values corresponding to independent variables h.resize(n * n); for(size_t i = 0; i < n; i++) { for(size_t j = 0; j < n; j++) h[ i * n + j ] = false; } // copy to result pattern CPPAD_ASSERT_UNKNOWN( for_hes_pattern.end() == n+1 ); for(size_t i = 0; i < n; i++) { // ind_taddr_[i] is operator taddr for i-th independent variable CPPAD_ASSERT_UNKNOWN( ind_taddr_[i] == i + 1 ); CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[i] ) == local::InvOp ); // extract the result from for_hes_pattern local::sparse_pack::const_iterator itr(for_hes_pattern, ind_taddr_[i] ); size_t j = *itr; while( j < for_hes_pattern.end() ) { CPPAD_ASSERT_UNKNOWN( 0 < j ) h[ i * n + (j-1) ] = true; j = *(++itr); } } }
void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) { BasicBlock::iterator LoopBody; SetVector<Value *> Values; SetVector<Value *> IVS; std::vector<int> NumIterations; PTXGenerator::ValueToValueMapTy VMap; assert(!GPUTriple.empty() && "Target triple should be set properly for GPGPU code generation."); PTXGenerator PTXGen(Builder, P, GPUTriple); // Get original IVS and ScopStmt unsigned TiledLoopDepth, NonPLoopDepth; const clast_stmt *InnerStmt = getScheduleInfo(F, NumIterations, TiledLoopDepth, NonPLoopDepth); const clast_stmt *TmpStmt; const clast_user_stmt *U; const clast_for *InnerFor; if (CLAST_STMT_IS_A(InnerStmt, stmt_for)) { InnerFor = (const clast_for *)InnerStmt; TmpStmt = InnerFor->body; } else TmpStmt = InnerStmt; U = (const clast_user_stmt *)TmpStmt; ScopStmt *Statement = (ScopStmt *)U->statement->usr; for (unsigned i = 0; i < Statement->getNumIterators() - NonPLoopDepth; i++) { const Value *IV = Statement->getInductionVariableForDimension(i); IVS.insert(const_cast<Value *>(IV)); } unsigned OutBytes; Values = getGPUValues(OutBytes); PTXGen.setOutputBytes(OutBytes); PTXGen.startGeneration(Values, IVS, VMap, &LoopBody); BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); Builder.SetInsertPoint(LoopBody); BasicBlock *AfterBB = 0; if (NonPLoopDepth) { Value *LowerBound, *UpperBound, *IV, *Stride; Type *IntPtrTy = getIntPtrTy(); LowerBound = ExpGen.codegen(InnerFor->LB, IntPtrTy); UpperBound = ExpGen.codegen(InnerFor->UB, IntPtrTy); Stride = Builder.getInt(APInt_from_MPZ(InnerFor->stride)); IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB, CmpInst::ICMP_SLE); const Value *OldIV_ = Statement->getInductionVariableForDimension(2); Value *OldIV = const_cast<Value *>(OldIV_); VMap.insert(std::make_pair<Value *, Value *>(OldIV, IV)); } updateWithValueMap(VMap); BlockGenerator::generate(Builder, *Statement, ValueMap, P); if (AfterBB) Builder.SetInsertPoint(AfterBB->begin()); // FIXME: The replacement of the host base address with the parameter of ptx // subfunction should have been done by updateWithValueMap. We use the // following codes to avoid affecting other parts of Polly. This should be // fixed later. Function *FN = Builder.GetInsertBlock()->getParent(); for (unsigned j = 0; j < Values.size(); j++) { Value *baseAddr = Values[j]; for (Function::iterator B = FN->begin(); B != FN->end(); ++B) { for (BasicBlock::iterator I = B->begin(); I != B->end(); ++I) I->replaceUsesOfWith(baseAddr, ValueMap[baseAddr]); } } Builder.SetInsertPoint(AfterLoop); PTXGen.setLaunchingParameters(NumIterations[0], NumIterations[1], NumIterations[2], NumIterations[3]); PTXGen.finishGeneration(FN); }
// // RegisterInfoEmitter::run - Main register file description emitter. // void RegisterInfoEmitter::run(raw_ostream &OS) { CodeGenTarget Target(Records); CodeGenRegBank &RegBank = Target.getRegBank(); RegBank.computeDerivedInfo(); std::map<const CodeGenRegister*, CodeGenRegister::Set> Overlaps; RegBank.computeOverlaps(Overlaps); EmitSourceFileHeader("Register Information Source Fragment", OS); OS << "namespace llvm {\n\n"; // Start out by emitting each of the register classes. const std::vector<CodeGenRegisterClass> &RegisterClasses = Target.getRegisterClasses(); // Collect all registers belonging to any allocatable class. std::set<Record*> AllocatableRegs; // Loop over all of the register classes... emitting each one. OS << "namespace { // Register classes...\n"; // Emit the register enum value arrays for each RegisterClass for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = RegisterClasses[rc]; ArrayRef<Record*> Order = RC.getOrder(); // Collect allocatable registers. if (RC.Allocatable) AllocatableRegs.insert(Order.begin(), Order.end()); // Give the register class a legal C name if it's anonymous. std::string Name = RC.getName(); // Emit the register list now. OS << " // " << Name << " Register Class...\n" << " static const unsigned " << Name << "[] = {\n "; for (unsigned i = 0, e = Order.size(); i != e; ++i) { Record *Reg = Order[i]; OS << getQualifiedName(Reg) << ", "; } OS << "\n };\n\n"; } // Emit the ValueType arrays for each RegisterClass for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = RegisterClasses[rc]; // Give the register class a legal C name if it's anonymous. std::string Name = RC.getName() + "VTs"; // Emit the register list now. OS << " // " << Name << " Register Class Value Types...\n" << " static const EVT " << Name << "[] = {\n "; for (unsigned i = 0, e = RC.VTs.size(); i != e; ++i) OS << getEnumName(RC.VTs[i]) << ", "; OS << "MVT::Other\n };\n\n"; } OS << "} // end anonymous namespace\n\n"; // Now that all of the structs have been emitted, emit the instances. if (!RegisterClasses.empty()) { OS << "namespace " << RegisterClasses[0].Namespace << " { // Register class instances\n"; for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) OS << " " << RegisterClasses[i].getName() << "Class\t" << RegisterClasses[i].getName() << "RegClass;\n"; std::map<unsigned, std::set<unsigned> > SuperClassMap; std::map<unsigned, std::set<unsigned> > SuperRegClassMap; OS << "\n"; unsigned NumSubRegIndices = RegBank.getSubRegIndices().size(); if (NumSubRegIndices) { // Emit the sub-register classes for each RegisterClass for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = RegisterClasses[rc]; std::vector<Record*> SRC(NumSubRegIndices); for (DenseMap<Record*,Record*>::const_iterator i = RC.SubRegClasses.begin(), e = RC.SubRegClasses.end(); i != e; ++i) { // Build SRC array. unsigned idx = RegBank.getSubRegIndexNo(i->first); SRC.at(idx-1) = i->second; // Find the register class number of i->second for SuperRegClassMap. for (unsigned rc2 = 0, e2 = RegisterClasses.size(); rc2 != e2; ++rc2) { const CodeGenRegisterClass &RC2 = RegisterClasses[rc2]; if (RC2.TheDef == i->second) { SuperRegClassMap[rc2].insert(rc); break; } } } // Give the register class a legal C name if it's anonymous. std::string Name = RC.TheDef->getName(); OS << " // " << Name << " Sub-register Classes...\n" << " static const TargetRegisterClass* const " << Name << "SubRegClasses[] = {\n "; for (unsigned idx = 0; idx != NumSubRegIndices; ++idx) { if (idx) OS << ", "; if (SRC[idx]) OS << "&" << getQualifiedName(SRC[idx]) << "RegClass"; else OS << "0"; } OS << "\n };\n\n"; } // Emit the super-register classes for each RegisterClass for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = RegisterClasses[rc]; // Give the register class a legal C name if it's anonymous. std::string Name = RC.TheDef->getName(); OS << " // " << Name << " Super-register Classes...\n" << " static const TargetRegisterClass* const " << Name << "SuperRegClasses[] = {\n "; bool Empty = true; std::map<unsigned, std::set<unsigned> >::iterator I = SuperRegClassMap.find(rc); if (I != SuperRegClassMap.end()) { for (std::set<unsigned>::iterator II = I->second.begin(), EE = I->second.end(); II != EE; ++II) { const CodeGenRegisterClass &RC2 = RegisterClasses[*II]; if (!Empty) OS << ", "; OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass"; Empty = false; } } OS << (!Empty ? ", " : "") << "NULL"; OS << "\n };\n\n"; } } else { // No subregindices in this target OS << " static const TargetRegisterClass* const " << "NullRegClasses[] = { NULL };\n\n"; } // Emit the sub-classes array for each RegisterClass for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = RegisterClasses[rc]; // Give the register class a legal C name if it's anonymous. std::string Name = RC.TheDef->getName(); OS << " // " << Name << " Register Class sub-classes...\n" << " static const TargetRegisterClass* const " << Name << "Subclasses[] = {\n "; bool Empty = true; for (unsigned rc2 = 0, e2 = RegisterClasses.size(); rc2 != e2; ++rc2) { const CodeGenRegisterClass &RC2 = RegisterClasses[rc2]; // Sub-classes are used to determine if a virtual register can be used // as an instruction operand, or if it must be copied first. if (rc == rc2 || !RC.hasSubClass(&RC2)) continue; if (!Empty) OS << ", "; OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass"; Empty = false; std::map<unsigned, std::set<unsigned> >::iterator SCMI = SuperClassMap.find(rc2); if (SCMI == SuperClassMap.end()) { SuperClassMap.insert(std::make_pair(rc2, std::set<unsigned>())); SCMI = SuperClassMap.find(rc2); } SCMI->second.insert(rc); } OS << (!Empty ? ", " : "") << "NULL"; OS << "\n };\n\n"; } for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = RegisterClasses[rc]; // Give the register class a legal C name if it's anonymous. std::string Name = RC.TheDef->getName(); OS << " // " << Name << " Register Class super-classes...\n" << " static const TargetRegisterClass* const " << Name << "Superclasses[] = {\n "; bool Empty = true; std::map<unsigned, std::set<unsigned> >::iterator I = SuperClassMap.find(rc); if (I != SuperClassMap.end()) { for (std::set<unsigned>::iterator II = I->second.begin(), EE = I->second.end(); II != EE; ++II) { const CodeGenRegisterClass &RC2 = RegisterClasses[*II]; if (!Empty) OS << ", "; OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass"; Empty = false; } } OS << (!Empty ? ", " : "") << "NULL"; OS << "\n };\n\n"; } // Emit methods. for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) { const CodeGenRegisterClass &RC = RegisterClasses[i]; OS << RC.getName() << "Class::" << RC.getName() << "Class() : TargetRegisterClass(" << RC.getName() + "RegClassID" << ", " << '\"' << RC.getName() << "\", " << RC.getName() + "VTs" << ", " << RC.getName() + "Subclasses" << ", " << RC.getName() + "Superclasses" << ", " << (NumSubRegIndices ? RC.getName() + "Sub" : std::string("Null")) << "RegClasses, " << (NumSubRegIndices ? RC.getName() + "Super" : std::string("Null")) << "RegClasses, " << RC.SpillSize/8 << ", " << RC.SpillAlignment/8 << ", " << RC.CopyCost << ", " << RC.Allocatable << ", " << RC.getName() << ", " << RC.getName() << " + " << RC.getOrder().size() << ") {}\n"; if (!RC.AltOrderSelect.empty()) { OS << "\nstatic inline unsigned " << RC.getName() << "AltOrderSelect(const MachineFunction &MF) {" << RC.AltOrderSelect << "}\n\nArrayRef<unsigned> " << RC.getName() << "Class::" << "getRawAllocationOrder(const MachineFunction &MF) const {\n"; for (unsigned oi = 1 , oe = RC.getNumOrders(); oi != oe; ++oi) { ArrayRef<Record*> Elems = RC.getOrder(oi); OS << " static const unsigned AltOrder" << oi << "[] = {"; for (unsigned elem = 0; elem != Elems.size(); ++elem) OS << (elem ? ", " : " ") << getQualifiedName(Elems[elem]); OS << " };\n"; } OS << " static const ArrayRef<unsigned> Order[] = {\n" << " ArrayRef<unsigned>(" << RC.getName(); for (unsigned oi = 1, oe = RC.getNumOrders(); oi != oe; ++oi) OS << "),\n ArrayRef<unsigned>(AltOrder" << oi; OS << ")\n };\n const unsigned Select = " << RC.getName() << "AltOrderSelect(MF);\n assert(Select < " << RC.getNumOrders() << ");\n return Order[Select];\n}\n"; } } OS << "}\n"; } OS << "\nnamespace {\n"; OS << " const TargetRegisterClass* const RegisterClasses[] = {\n"; for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) OS << " &" << getQualifiedName(RegisterClasses[i].TheDef) << "RegClass,\n"; OS << " };\n"; typedef std::map<Record*, std::vector<int64_t>, LessRecord> DwarfRegNumsMapTy; DwarfRegNumsMapTy DwarfRegNums; const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters(); // Emit an overlap list for all registers. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister *Reg = Regs[i]; const CodeGenRegister::Set &O = Overlaps[Reg]; // Move Reg to the front so TRI::getAliasSet can share the list. OS << " const unsigned " << Reg->getName() << "_Overlaps[] = { " << getQualifiedName(Reg->TheDef) << ", "; for (CodeGenRegister::Set::const_iterator I = O.begin(), E = O.end(); I != E; ++I) if (*I != Reg) OS << getQualifiedName((*I)->TheDef) << ", "; OS << "0 };\n"; } // Emit the empty sub-registers list OS << " const unsigned Empty_SubRegsSet[] = { 0 };\n"; // Loop over all of the registers which have sub-registers, emitting the // sub-registers list to memory. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister &Reg = *Regs[i]; if (Reg.getSubRegs().empty()) continue; // getSubRegs() orders by SubRegIndex. We want a topological order. SetVector<CodeGenRegister*> SR; Reg.addSubRegsPreOrder(SR); OS << " const unsigned " << Reg.getName() << "_SubRegsSet[] = { "; for (unsigned j = 0, je = SR.size(); j != je; ++j) OS << getQualifiedName(SR[j]->TheDef) << ", "; OS << "0 };\n"; } // Emit the empty super-registers list OS << " const unsigned Empty_SuperRegsSet[] = { 0 };\n"; // Loop over all of the registers which have super-registers, emitting the // super-registers list to memory. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister &Reg = *Regs[i]; const CodeGenRegister::SuperRegList &SR = Reg.getSuperRegs(); if (SR.empty()) continue; OS << " const unsigned " << Reg.getName() << "_SuperRegsSet[] = { "; for (unsigned j = 0, je = SR.size(); j != je; ++j) OS << getQualifiedName(SR[j]->TheDef) << ", "; OS << "0 };\n"; } OS<<"\n const TargetRegisterDesc RegisterDescriptors[] = { // Descriptors\n"; OS << " { \"NOREG\",\t0,\t0,\t0,\t0,\t0 },\n"; // Now that register alias and sub-registers sets have been emitted, emit the // register descriptors now. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister &Reg = *Regs[i]; OS << " { \""; OS << Reg.getName() << "\",\t" << Reg.getName() << "_Overlaps,\t"; if (!Reg.getSubRegs().empty()) OS << Reg.getName() << "_SubRegsSet,\t"; else OS << "Empty_SubRegsSet,\t"; if (!Reg.getSuperRegs().empty()) OS << Reg.getName() << "_SuperRegsSet,\t"; else OS << "Empty_SuperRegsSet,\t"; OS << Reg.CostPerUse << ",\t" << int(AllocatableRegs.count(Reg.TheDef)) << " },\n"; } OS << " };\n"; // End of register descriptors... // Calculate the mapping of subregister+index pairs to physical registers. // This will also create further anonymous indexes. unsigned NamedIndices = RegBank.getNumNamedIndices(); // Emit SubRegIndex names, skipping 0 const std::vector<Record*> &SubRegIndices = RegBank.getSubRegIndices(); OS << "\n const char *const SubRegIndexTable[] = { \""; for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) { OS << SubRegIndices[i]->getName(); if (i+1 != e) OS << "\", \""; } OS << "\" };\n\n"; // Emit names of the anonymus subreg indexes. if (SubRegIndices.size() > NamedIndices) { OS << " enum {"; for (unsigned i = NamedIndices, e = SubRegIndices.size(); i != e; ++i) { OS << "\n " << SubRegIndices[i]->getName() << " = " << i+1; if (i+1 != e) OS << ','; } OS << "\n };\n\n"; } OS << "}\n\n"; // End of anonymous namespace... std::string ClassName = Target.getName() + "GenRegisterInfo"; // Emit the subregister + index mapping function based on the information // calculated above. OS << "unsigned " << ClassName << "::getSubReg(unsigned RegNo, unsigned Index) const {\n" << " switch (RegNo) {\n" << " default:\n return 0;\n"; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs(); if (SRM.empty()) continue; OS << " case " << getQualifiedName(Regs[i]->TheDef) << ":\n"; OS << " switch (Index) {\n"; OS << " default: return 0;\n"; for (CodeGenRegister::SubRegMap::const_iterator ii = SRM.begin(), ie = SRM.end(); ii != ie; ++ii) OS << " case " << getQualifiedName(ii->first) << ": return " << getQualifiedName(ii->second->TheDef) << ";\n"; OS << " };\n" << " break;\n"; } OS << " };\n"; OS << " return 0;\n"; OS << "}\n\n"; OS << "unsigned " << ClassName << "::getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const {\n" << " switch (RegNo) {\n" << " default:\n return 0;\n"; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs(); if (SRM.empty()) continue; OS << " case " << getQualifiedName(Regs[i]->TheDef) << ":\n"; for (CodeGenRegister::SubRegMap::const_iterator ii = SRM.begin(), ie = SRM.end(); ii != ie; ++ii) OS << " if (SubRegNo == " << getQualifiedName(ii->second->TheDef) << ") return " << getQualifiedName(ii->first) << ";\n"; OS << " return 0;\n"; } OS << " };\n"; OS << " return 0;\n"; OS << "}\n\n"; // Emit composeSubRegIndices OS << "unsigned " << ClassName << "::composeSubRegIndices(unsigned IdxA, unsigned IdxB) const {\n" << " switch (IdxA) {\n" << " default:\n return IdxB;\n"; for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) { bool Open = false; for (unsigned j = 0; j != e; ++j) { if (Record *Comp = RegBank.getCompositeSubRegIndex(SubRegIndices[i], SubRegIndices[j])) { if (!Open) { OS << " case " << getQualifiedName(SubRegIndices[i]) << ": switch(IdxB) {\n default: return IdxB;\n"; Open = true; } OS << " case " << getQualifiedName(SubRegIndices[j]) << ": return " << getQualifiedName(Comp) << ";\n"; } } if (Open) OS << " }\n"; } OS << " }\n}\n\n"; // Emit the constructor of the class... OS << ClassName << "::" << ClassName << "(int CallFrameSetupOpcode, int CallFrameDestroyOpcode)\n" << " : TargetRegisterInfo(RegisterDescriptors, " << Regs.size()+1 << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n" << " SubRegIndexTable,\n" << " CallFrameSetupOpcode, CallFrameDestroyOpcode) {\n" << "}\n\n"; // Collect all information about dwarf register numbers // First, just pull all provided information to the map unsigned maxLength = 0; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { Record *Reg = Regs[i]->TheDef; std::vector<int64_t> RegNums = Reg->getValueAsListOfInts("DwarfNumbers"); maxLength = std::max((size_t)maxLength, RegNums.size()); if (DwarfRegNums.count(Reg)) errs() << "Warning: DWARF numbers for register " << getQualifiedName(Reg) << "specified multiple times\n"; DwarfRegNums[Reg] = RegNums; } // Now we know maximal length of number list. Append -1's, where needed for (DwarfRegNumsMapTy::iterator I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) for (unsigned i = I->second.size(), e = maxLength; i != e; ++i) I->second.push_back(-1); // Emit reverse information about the dwarf register numbers. OS << "int " << ClassName << "::getLLVMRegNumFull(unsigned DwarfRegNum, " << "unsigned Flavour) const {\n" << " switch (Flavour) {\n" << " default:\n" << " assert(0 && \"Unknown DWARF flavour\");\n" << " return -1;\n"; for (unsigned i = 0, e = maxLength; i != e; ++i) { OS << " case " << i << ":\n" << " switch (DwarfRegNum) {\n" << " default:\n" << " assert(0 && \"Invalid DwarfRegNum\");\n" << " return -1;\n"; for (DwarfRegNumsMapTy::iterator I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) { int DwarfRegNo = I->second[i]; if (DwarfRegNo >= 0) OS << " case " << DwarfRegNo << ":\n" << " return " << getQualifiedName(I->first) << ";\n"; } OS << " };\n"; } OS << " };\n}\n\n"; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { Record *Reg = Regs[i]->TheDef; const RecordVal *V = Reg->getValue("DwarfAlias"); if (!V || !V->getValue()) continue; DefInit *DI = dynamic_cast<DefInit*>(V->getValue()); Record *Alias = DI->getDef(); DwarfRegNums[Reg] = DwarfRegNums[Alias]; } // Emit information about the dwarf register numbers. OS << "int " << ClassName << "::getDwarfRegNumFull(unsigned RegNum, " << "unsigned Flavour) const {\n" << " switch (Flavour) {\n" << " default:\n" << " assert(0 && \"Unknown DWARF flavour\");\n" << " return -1;\n"; for (unsigned i = 0, e = maxLength; i != e; ++i) { OS << " case " << i << ":\n" << " switch (RegNum) {\n" << " default:\n" << " assert(0 && \"Invalid RegNum\");\n" << " return -1;\n"; // Sort by name to get a stable order. for (DwarfRegNumsMapTy::iterator I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) { int RegNo = I->second[i]; OS << " case " << getQualifiedName(I->first) << ":\n" << " return " << RegNo << ";\n"; } OS << " };\n"; } OS << " };\n}\n\n"; OS << "} // End llvm namespace \n"; }
bool AAEval::runOnFunction(Function &F) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); SetVector<Value *> Pointers; SetVector<CallSite> CallSites; SetVector<Value *> Loads; SetVector<Value *> Stores; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) // Add all pointer arguments. Pointers.insert(I); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); if (EvalTBAA && isa<LoadInst>(&*I)) Loads.insert(&*I); if (EvalTBAA && isa<StoreInst>(&*I)) Stores.insert(&*I); Instruction &Inst = *I; if (CallSite CS = cast<Value>(&Inst)) { Value *Callee = CS.getCalledValue(); // Skip actual functions for direct function calls. if (!isa<Function>(Callee) && isInterestingPointer(Callee)) Pointers.insert(Callee); // Consider formals. for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) if (isInterestingPointer(*AI)) Pointers.insert(*AI); CallSites.insert(CS); } else { // Consider all operands. for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end(); OI != OE; ++OI) if (isInterestingPointer(*OI)) Pointers.insert(*OI); } } if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef) errs() << "Function: " << F.getName() << ": " << Pointers.size() << " pointers, " << CallSites.size() << " call sites\n"; // iterate over the worklist, and run the full (n^2)/2 disambiguations for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { uint64_t I1Size = AliasAnalysis::UnknownSize; Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { uint64_t I2Size = AliasAnalysis::UnknownSize; Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); switch (AA.alias(*I1, I1Size, *I2, I2Size)) { case AliasAnalysis::NoAlias: PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); ++NoAlias; break; case AliasAnalysis::MayAlias: PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); ++MayAlias; break; case AliasAnalysis::PartialAlias: PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAlias; break; case AliasAnalysis::MustAlias: PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); ++MustAlias; break; } } } if (EvalTBAA) { // iterate over all pairs of load, store for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end(); I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end(); I2 != E2; ++I2) { switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)), AA.getLocation(cast<StoreInst>(*I2)))) { case AliasAnalysis::NoAlias: PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); ++NoAlias; break; case AliasAnalysis::MayAlias: PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); ++MayAlias; break; case AliasAnalysis::PartialAlias: PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAlias; break; case AliasAnalysis::MustAlias: PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); ++MustAlias; break; } } } // iterate over all pairs of store, store for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)), AA.getLocation(cast<StoreInst>(*I2)))) { case AliasAnalysis::NoAlias: PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); ++NoAlias; break; case AliasAnalysis::MayAlias: PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); ++MayAlias; break; case AliasAnalysis::PartialAlias: PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAlias; break; case AliasAnalysis::MustAlias: PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); ++MustAlias; break; } } } } // Mod/ref alias analysis: compare all pairs of calls and values for (SetVector<CallSite>::iterator C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { Instruction *I = C->getInstruction(); for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); V != Ve; ++V) { uint64_t Size = AliasAnalysis::UnknownSize; Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(*C, *V, Size)) { case AliasAnalysis::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent()); ++NoModRef; break; case AliasAnalysis::Mod: PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent()); ++Mod; break; case AliasAnalysis::Ref: PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent()); ++Ref; break; case AliasAnalysis::ModRef: PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); ++ModRef; break; } } } // Mod/ref alias analysis: compare all pairs of calls for (SetVector<CallSite>::iterator C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) { if (D == C) continue; switch (AA.getModRefInfo(*C, *D)) { case AliasAnalysis::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); ++NoModRef; break; case AliasAnalysis::Mod: PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); ++Mod; break; case AliasAnalysis::Ref: PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); ++Ref; break; case AliasAnalysis::ModRef: PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); ++ModRef; break; } } } return false; }
void AAEvaluator::runInternal(Function &F, AAResults &AA) { const DataLayout &DL = F.getParent()->getDataLayout(); ++FunctionCount; SetVector<Value *> Pointers; SmallSetVector<CallBase *, 16> Calls; SetVector<Value *> Loads; SetVector<Value *> Stores; for (auto &I : F.args()) if (I.getType()->isPointerTy()) // Add all pointer arguments. Pointers.insert(&I); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); if (EvalAAMD && isa<LoadInst>(&*I)) Loads.insert(&*I); if (EvalAAMD && isa<StoreInst>(&*I)) Stores.insert(&*I); Instruction &Inst = *I; if (auto *Call = dyn_cast<CallBase>(&Inst)) { Value *Callee = Call->getCalledValue(); // Skip actual functions for direct function calls. if (!isa<Function>(Callee) && isInterestingPointer(Callee)) Pointers.insert(Callee); // Consider formals. for (Use &DataOp : Call->data_ops()) if (isInterestingPointer(DataOp)) Pointers.insert(DataOp); Calls.insert(Call); } else { // Consider all operands. for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end(); OI != OE; ++OI) if (isInterestingPointer(*OI)) Pointers.insert(*OI); } } if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef) errs() << "Function: " << F.getName() << ": " << Pointers.size() << " pointers, " << Calls.size() << " call sites\n"; // iterate over the worklist, and run the full (n^2)/2 disambiguations for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { auto I1Size = LocationSize::unknown(); Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); if (I1ElTy->isSized()) I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy)); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { auto I2Size = LocationSize::unknown(); Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType(); if (I2ElTy->isSized()) I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy)); AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size); switch (AR) { case NoAlias: PrintResults(AR, PrintNoAlias, *I1, *I2, F.getParent()); ++NoAliasCount; break; case MayAlias: PrintResults(AR, PrintMayAlias, *I1, *I2, F.getParent()); ++MayAliasCount; break; case PartialAlias: PrintResults(AR, PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAliasCount; break; case MustAlias: PrintResults(AR, PrintMustAlias, *I1, *I2, F.getParent()); ++MustAliasCount; break; } } } if (EvalAAMD) { // iterate over all pairs of load, store for (Value *Load : Loads) { for (Value *Store : Stores) { AliasResult AR = AA.alias(MemoryLocation::get(cast<LoadInst>(Load)), MemoryLocation::get(cast<StoreInst>(Store))); switch (AR) { case NoAlias: PrintLoadStoreResults(AR, PrintNoAlias, Load, Store, F.getParent()); ++NoAliasCount; break; case MayAlias: PrintLoadStoreResults(AR, PrintMayAlias, Load, Store, F.getParent()); ++MayAliasCount; break; case PartialAlias: PrintLoadStoreResults(AR, PrintPartialAlias, Load, Store, F.getParent()); ++PartialAliasCount; break; case MustAlias: PrintLoadStoreResults(AR, PrintMustAlias, Load, Store, F.getParent()); ++MustAliasCount; break; } } } // iterate over all pairs of store, store for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { AliasResult AR = AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)), MemoryLocation::get(cast<StoreInst>(*I2))); switch (AR) { case NoAlias: PrintLoadStoreResults(AR, PrintNoAlias, *I1, *I2, F.getParent()); ++NoAliasCount; break; case MayAlias: PrintLoadStoreResults(AR, PrintMayAlias, *I1, *I2, F.getParent()); ++MayAliasCount; break; case PartialAlias: PrintLoadStoreResults(AR, PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAliasCount; break; case MustAlias: PrintLoadStoreResults(AR, PrintMustAlias, *I1, *I2, F.getParent()); ++MustAliasCount; break; } } } } // Mod/ref alias analysis: compare all pairs of calls and values for (CallBase *Call : Calls) { for (auto Pointer : Pointers) { auto Size = LocationSize::unknown(); Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType(); if (ElTy->isSized()) Size = LocationSize::precise(DL.getTypeStoreSize(ElTy)); switch (AA.getModRefInfo(Call, Pointer, Size)) { case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer, F.getParent()); ++NoModRefCount; break; case ModRefInfo::Mod: PrintModRefResults("Just Mod", PrintMod, Call, Pointer, F.getParent()); ++ModCount; break; case ModRefInfo::Ref: PrintModRefResults("Just Ref", PrintRef, Call, Pointer, F.getParent()); ++RefCount; break; case ModRefInfo::ModRef: PrintModRefResults("Both ModRef", PrintModRef, Call, Pointer, F.getParent()); ++ModRefCount; break; case ModRefInfo::Must: PrintModRefResults("Must", PrintMust, Call, Pointer, F.getParent()); ++MustCount; break; case ModRefInfo::MustMod: PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, Call, Pointer, F.getParent()); ++MustModCount; break; case ModRefInfo::MustRef: PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, Call, Pointer, F.getParent()); ++MustRefCount; break; case ModRefInfo::MustModRef: PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, Call, Pointer, F.getParent()); ++MustModRefCount; break; } } } // Mod/ref alias analysis: compare all pairs of calls for (CallBase *CallA : Calls) { for (CallBase *CallB : Calls) { if (CallA == CallB) continue; switch (AA.getModRefInfo(CallA, CallB)) { case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, CallA, CallB, F.getParent()); ++NoModRefCount; break; case ModRefInfo::Mod: PrintModRefResults("Just Mod", PrintMod, CallA, CallB, F.getParent()); ++ModCount; break; case ModRefInfo::Ref: PrintModRefResults("Just Ref", PrintRef, CallA, CallB, F.getParent()); ++RefCount; break; case ModRefInfo::ModRef: PrintModRefResults("Both ModRef", PrintModRef, CallA, CallB, F.getParent()); ++ModRefCount; break; case ModRefInfo::Must: PrintModRefResults("Must", PrintMust, CallA, CallB, F.getParent()); ++MustCount; break; case ModRefInfo::MustMod: PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, CallA, CallB, F.getParent()); ++MustModCount; break; case ModRefInfo::MustRef: PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, CallA, CallB, F.getParent()); ++MustRefCount; break; case ModRefInfo::MustModRef: PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, CallA, CallB, F.getParent()); ++MustModRefCount; break; } } } }
// // runMCDesc - Print out MC register descriptions. // void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, CodeGenRegBank &RegBank) { EmitSourceFileHeader("MC Register Information", OS); OS << "\n#ifdef GET_REGINFO_MC_DESC\n"; OS << "#undef GET_REGINFO_MC_DESC\n"; std::map<const CodeGenRegister*, CodeGenRegister::Set> Overlaps; RegBank.computeOverlaps(Overlaps); OS << "namespace llvm {\n\n"; const std::string &TargetName = Target.getName(); const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters(); OS << "extern const uint16_t " << TargetName << "RegOverlaps[] = {\n"; // Emit an overlap list for all registers. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister *Reg = Regs[i]; const CodeGenRegister::Set &O = Overlaps[Reg]; // Move Reg to the front so TRI::getAliasSet can share the list. OS << " /* " << Reg->getName() << "_Overlaps */ " << getQualifiedName(Reg->TheDef) << ", "; for (CodeGenRegister::Set::const_iterator I = O.begin(), E = O.end(); I != E; ++I) if (*I != Reg) OS << getQualifiedName((*I)->TheDef) << ", "; OS << "0,\n"; } OS << "};\n\n"; OS << "extern const uint16_t " << TargetName << "SubRegsSet[] = {\n"; // Emit the empty sub-registers list OS << " /* Empty_SubRegsSet */ 0,\n"; // Loop over all of the registers which have sub-registers, emitting the // sub-registers list to memory. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister &Reg = *Regs[i]; if (Reg.getSubRegs().empty()) continue; // getSubRegs() orders by SubRegIndex. We want a topological order. SetVector<CodeGenRegister*> SR; Reg.addSubRegsPreOrder(SR, RegBank); OS << " /* " << Reg.getName() << "_SubRegsSet */ "; for (unsigned j = 0, je = SR.size(); j != je; ++j) OS << getQualifiedName(SR[j]->TheDef) << ", "; OS << "0,\n"; } OS << "};\n\n"; OS << "extern const uint16_t " << TargetName << "SuperRegsSet[] = {\n"; // Emit the empty super-registers list OS << " /* Empty_SuperRegsSet */ 0,\n"; // Loop over all of the registers which have super-registers, emitting the // super-registers list to memory. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister &Reg = *Regs[i]; const CodeGenRegister::SuperRegList &SR = Reg.getSuperRegs(); if (SR.empty()) continue; OS << " /* " << Reg.getName() << "_SuperRegsSet */ "; for (unsigned j = 0, je = SR.size(); j != je; ++j) OS << getQualifiedName(SR[j]->TheDef) << ", "; OS << "0,\n"; } OS << "};\n\n"; OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[] = { // Descriptors\n"; OS << " { \"NOREG\", 0, 0, 0 },\n"; // Now that register alias and sub-registers sets have been emitted, emit the // register descriptors now. unsigned OverlapsIndex = 0; unsigned SubRegIndex = 1; // skip 1 for empty set unsigned SuperRegIndex = 1; // skip 1 for empty set for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister *Reg = Regs[i]; OS << " { \""; OS << Reg->getName() << "\", /* " << Reg->getName() << "_Overlaps */ " << OverlapsIndex << ", "; OverlapsIndex += Overlaps[Reg].size() + 1; if (!Reg->getSubRegs().empty()) { OS << "/* " << Reg->getName() << "_SubRegsSet */ " << SubRegIndex << ", "; // FIXME not very nice to recalculate this SetVector<CodeGenRegister*> SR; Reg->addSubRegsPreOrder(SR, RegBank); SubRegIndex += SR.size() + 1; } else OS << "/* Empty_SubRegsSet */ 0, "; if (!Reg->getSuperRegs().empty()) { OS << "/* " << Reg->getName() << "_SuperRegsSet */ " << SuperRegIndex; SuperRegIndex += Reg->getSuperRegs().size() + 1; } else OS << "/* Empty_SuperRegsSet */ 0"; OS << " },\n"; } OS << "};\n\n"; // End of register descriptors... ArrayRef<CodeGenRegisterClass*> RegisterClasses = RegBank.getRegClasses(); // Loop over all of the register classes... emitting each one. OS << "namespace { // Register classes...\n"; // Emit the register enum value arrays for each RegisterClass for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = *RegisterClasses[rc]; ArrayRef<Record*> Order = RC.getOrder(); // Give the register class a legal C name if it's anonymous. std::string Name = RC.getName(); // Emit the register list now. OS << " // " << Name << " Register Class...\n" << " const uint16_t " << Name << "[] = {\n "; for (unsigned i = 0, e = Order.size(); i != e; ++i) { Record *Reg = Order[i]; OS << getQualifiedName(Reg) << ", "; } OS << "\n };\n\n"; OS << " // " << Name << " Bit set.\n" << " const uint8_t " << Name << "Bits[] = {\n "; BitVectorEmitter BVE; for (unsigned i = 0, e = Order.size(); i != e; ++i) { Record *Reg = Order[i]; BVE.add(Target.getRegBank().getReg(Reg)->EnumValue); } BVE.print(OS); OS << "\n };\n\n"; } OS << "}\n\n"; OS << "extern const MCRegisterClass " << TargetName << "MCRegisterClasses[] = {\n"; for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) { const CodeGenRegisterClass &RC = *RegisterClasses[rc]; // Asserts to make sure values will fit in table assuming types from // MCRegisterInfo.h assert((RC.SpillSize/8) <= 0xffff && "SpillSize too large."); assert((RC.SpillAlignment/8) <= 0xffff && "SpillAlignment too large."); assert(RC.CopyCost >= -128 && RC.CopyCost <= 127 && "Copy cost too large."); OS << " { " << '\"' << RC.getName() << "\", " << RC.getName() << ", " << RC.getName() << "Bits, " << RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), " << RC.getQualifiedName() + "RegClassID" << ", " << RC.SpillSize/8 << ", " << RC.SpillAlignment/8 << ", " << RC.CopyCost << ", " << RC.Allocatable << " },\n"; } OS << "};\n\n"; // Emit the data table for getSubReg(). ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices(); if (SubRegIndices.size()) { OS << "const uint16_t " << TargetName << "SubRegTable[][" << SubRegIndices.size() << "] = {\n"; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs(); OS << " /* " << Regs[i]->TheDef->getName() << " */\n"; if (SRM.empty()) { OS << " {0},\n"; continue; } OS << " {"; for (unsigned j = 0, je = SubRegIndices.size(); j != je; ++j) { // FIXME: We really should keep this to 80 columns... CodeGenRegister::SubRegMap::const_iterator SubReg = SRM.find(SubRegIndices[j]); if (SubReg != SRM.end()) OS << getQualifiedName(SubReg->second->TheDef); else OS << "0"; if (j != je - 1) OS << ", "; } OS << "}" << (i != e ? "," : "") << "\n"; } OS << "};\n\n"; OS << "const uint16_t *get" << TargetName << "SubRegTable() {\n return (const uint16_t *)" << TargetName << "SubRegTable;\n}\n\n"; } // MCRegisterInfo initialization routine. OS << "static inline void Init" << TargetName << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, " << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n"; OS << " RI->InitMCRegisterInfo(" << TargetName << "RegDesc, " << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, " << RegisterClasses.size() << ", " << TargetName << "RegOverlaps, " << TargetName << "SubRegsSet, " << TargetName << "SuperRegsSet, "; if (SubRegIndices.size() != 0) OS << "(uint16_t*)" << TargetName << "SubRegTable, " << SubRegIndices.size() << ");\n\n"; else OS << "NULL, 0);\n\n"; EmitRegMapping(OS, Regs, false); OS << "}\n\n"; OS << "} // End llvm namespace \n"; OS << "#endif // GET_REGINFO_MC_DESC\n\n"; }
bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, MachineBasicBlock &MBB, BitVector &DefOnEntry, BitVector &UndefOnEntry) { unsigned BN = MBB.getNumber(); if (DefOnEntry[BN]) return true; if (UndefOnEntry[BN]) return false; auto MarkDefined = [BN, &DefOnEntry](MachineBasicBlock &B) -> bool { for (MachineBasicBlock *S : B.successors()) DefOnEntry[S->getNumber()] = true; DefOnEntry[BN] = true; return true; }; SetVector<unsigned> WorkList; // Checking if the entry of MBB is reached by some def: add all predecessors // that are potentially defined-on-exit to the work list. for (MachineBasicBlock *P : MBB.predecessors()) WorkList.insert(P->getNumber()); for (unsigned i = 0; i != WorkList.size(); ++i) { // Determine if the exit from the block is reached by some def. unsigned N = WorkList[i]; MachineBasicBlock &B = *MF->getBlockNumbered(N); if (Seen[N]) { const LiveOutPair &LOB = Map[&B]; if (LOB.first != nullptr && LOB.first != &UndefVNI) return MarkDefined(B); } SlotIndex Begin, End; std::tie(Begin, End) = Indexes->getMBBRange(&B); // Treat End as not belonging to B. // If LR has a segment S that starts at the next block, i.e. [End, ...), // std::upper_bound will return the segment following S. Instead, // S should be treated as the first segment that does not overlap B. LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End.getPrevSlot()); if (UB != LR.begin()) { LiveRange::Segment &Seg = *std::prev(UB); if (Seg.end > Begin) { // There is a segment that overlaps B. If the range is not explicitly // undefined between the end of the segment and the end of the block, // treat the block as defined on exit. If it is, go to the next block // on the work list. if (LR.isUndefIn(Undefs, Seg.end, End)) continue; return MarkDefined(B); } } // No segment overlaps with this block. If this block is not defined on // entry, or it undefines the range, do not process its predecessors. if (UndefOnEntry[N] || LR.isUndefIn(Undefs, Begin, End)) { UndefOnEntry[N] = true; continue; } if (DefOnEntry[N]) return MarkDefined(B); // Still don't know: add all predecessors to the work list. for (MachineBasicBlock *P : B.predecessors()) WorkList.insert(P->getNumber()); } UndefOnEntry[BN] = true; return false; }
void Liveness::computePhiInfo() { RealUseMap.clear(); NodeList Phis; NodeAddr<FuncNode*> FA = DFG.getFunc(); NodeList Blocks = FA.Addr->members(DFG); for (NodeAddr<BlockNode*> BA : Blocks) { auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); Phis.insert(Phis.end(), Ps.begin(), Ps.end()); } // phi use -> (map: reaching phi -> set of registers defined in between) std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp; std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation. // Go over all phis. for (NodeAddr<PhiNode*> PhiA : Phis) { // Go over all defs and collect the reached uses that are non-phi uses // (i.e. the "real uses"). RefMap &RealUses = RealUseMap[PhiA.Id]; NodeList PhiRefs = PhiA.Addr->members(DFG); // Have a work queue of defs whose reached uses need to be found. // For each def, add to the queue all reached (non-phi) defs. SetVector<NodeId> DefQ; NodeSet PhiDefs; for (NodeAddr<RefNode*> R : PhiRefs) { if (!DFG.IsRef<NodeAttrs::Def>(R)) continue; DefQ.insert(R.Id); PhiDefs.insert(R.Id); } // Collect the super-set of all possible reached uses. This set will // contain all uses reached from this phi, either directly from the // phi defs, or (recursively) via non-phi defs reached by the phi defs. // This set of uses will later be trimmed to only contain these uses that // are actually reached by the phi defs. for (unsigned i = 0; i < DefQ.size(); ++i) { NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]); // Visit all reached uses. Phi defs should not really have the "dead" // flag set, but check it anyway for consistency. bool IsDead = DA.Addr->getFlags() & NodeAttrs::Dead; NodeId UN = !IsDead ? DA.Addr->getReachedUse() : 0; while (UN != 0) { NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN); uint16_t F = A.Addr->getFlags(); if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) { RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG)); RealUses[R.Reg].insert({A.Id,R.Mask}); } UN = A.Addr->getSibling(); } // Visit all reached defs, and add them to the queue. These defs may // override some of the uses collected here, but that will be handled // later. NodeId DN = DA.Addr->getReachedDef(); while (DN != 0) { NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN); for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) { uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags(); // Must traverse the reached-def chain. Consider: // def(D0) -> def(R0) -> def(R0) -> use(D0) // The reachable use of D0 passes through a def of R0. if (!(Flags & NodeAttrs::PhiRef)) DefQ.insert(T.Id); } DN = A.Addr->getSibling(); } } // Filter out these uses that appear to be reachable, but really // are not. For example: // // R1:0 = d1 // = R1:0 u2 Reached by d1. // R0 = d3 // = R1:0 u4 Still reached by d1: indirectly through // the def d3. // R1 = d5 // = R1:0 u6 Not reached by d1 (covered collectively // by d3 and d5), but following reached // defs and uses from d1 will lead here. auto InPhiDefs = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool { return PhiDefs.count(DA.Id); }; for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) { // For each reached register UI->first, there is a set UI->second, of // uses of it. For each such use, check if it is reached by this phi, // i.e. check if the set of its reaching uses intersects the set of // this phi's defs. NodeRefSet &Uses = UI->second; for (auto I = Uses.begin(), E = Uses.end(); I != E; ) { auto UA = DFG.addr<UseNode*>(I->first); // Undef flag is checked above. assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0); RegisterRef R(UI->first, I->second); NodeList RDs = getAllReachingDefs(R, UA); // If none of the reaching defs of R are from this phi, remove this // use of R. I = any_of(RDs, InPhiDefs) ? std::next(I) : Uses.erase(I); } UI = Uses.empty() ? RealUses.erase(UI) : std::next(UI); } // If this phi reaches some "real" uses, add it to the queue for upward // propagation. if (!RealUses.empty()) PhiUQ.push_back(PhiA.Id); // Go over all phi uses and check if the reaching def is another phi. // Collect the phis that are among the reaching defs of these uses. // While traversing the list of reaching defs for each phi use, accumulate // the set of registers defined between this phi (PhiA) and the owner phi // of the reaching def. NodeSet SeenUses; for (auto I : PhiRefs) { if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id)) continue; NodeAddr<PhiUseNode*> PUA = I; if (PUA.Addr->getReachingDef() == 0) continue; RegisterRef UR = PUA.Addr->getRegRef(DFG); NodeList Ds = getAllReachingDefs(UR, PUA, true, false, NoRegs); RegisterAggr DefRRs(PRI); for (NodeAddr<DefNode*> D : Ds) { if (D.Addr->getFlags() & NodeAttrs::PhiRef) { NodeId RP = D.Addr->getOwner(DFG).Id; std::map<NodeId,RegisterAggr> &M = PhiUp[PUA.Id]; auto F = M.find(RP); if (F == M.end()) M.insert(std::make_pair(RP, DefRRs)); else F->second.insert(DefRRs); } DefRRs.insert(D.Addr->getRegRef(DFG)); } for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PhiA, PUA)) SeenUses.insert(T.Id); } } if (Trace) { dbgs() << "Phi-up-to-phi map with intervening defs:\n"; for (auto I : PhiUp) { dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {"; for (auto R : I.second) dbgs() << ' ' << Print<NodeId>(R.first, DFG) << Print<RegisterAggr>(R.second, DFG); dbgs() << " }\n"; } } // Propagate the reached registers up in the phi chain. // // The following type of situation needs careful handling: // // phi d1<R1:0> (1) // | // ... d2<R1> // | // phi u3<R1:0> (2) // | // ... u4<R1> // // The phi node (2) defines a register pair R1:0, and reaches a "real" // use u4 of just R1. The same phi node is also known to reach (upwards) // the phi node (1). However, the use u4 is not reached by phi (1), // because of the intervening definition d2 of R1. The data flow between // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0. // // When propagating uses up the phi chains, get the all reaching defs // for a given phi use, and traverse the list until the propagated ref // is covered, or until reaching the final phi. Only assume that the // reference reaches the phi in the latter case. for (unsigned i = 0; i < PhiUQ.size(); ++i) { auto PA = DFG.addr<PhiNode*>(PhiUQ[i]); NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG); RefMap &RUM = RealUseMap[PA.Id]; for (NodeAddr<UseNode*> UA : PUs) { std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id]; RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG)); for (const std::pair<NodeId,RegisterAggr> &P : PUM) { bool Changed = false; const RegisterAggr &MidDefs = P.second; // Collect the set PropUp of uses that are reached by the current // phi PA, and are not covered by any intervening def between the // currently visited use UA and the the upward phi P. if (MidDefs.hasCoverOf(UR)) continue; // General algorithm: // for each (R,U) : U is use node of R, U is reached by PA // if MidDefs does not cover (R,U) // then add (R-MidDefs,U) to RealUseMap[P] // for (const std::pair<RegisterId,NodeRefSet> &T : RUM) { RegisterRef R = DFG.restrictRef(RegisterRef(T.first), UR); if (!R) continue; for (std::pair<NodeId,LaneBitmask> V : T.second) { RegisterRef S = DFG.restrictRef(RegisterRef(R.Reg, V.second), R); if (!S) continue; if (RegisterRef SS = MidDefs.clearIn(S)) { NodeRefSet &RS = RealUseMap[P.first][SS.Reg]; Changed |= RS.insert({V.first,SS.Mask}).second; } } } if (Changed) PhiUQ.push_back(P.first); } } } if (Trace) { dbgs() << "Real use map:\n"; for (auto I : RealUseMap) { dbgs() << "phi " << Print<NodeId>(I.first, DFG); NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first); NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG); if (!Ds.empty()) { RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG); dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>'; } else { dbgs() << "<noreg>"; } dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n'; } } }
void PTXGenerator::createSubfunction(SetVector<Value *> &UsedValues, SetVector<Value *> &OriginalIVS, PTXGenerator::ValueToValueMapTy &VMap, Function **SubFunction) { Function *FN = createSubfunctionDefinition(UsedValues.size()); Module *M = getModule(); LLVMContext &Context = FN->getContext(); IntegerType *Ty = Builder.getInt64Ty(); // Store the previous basic block. BasicBlock *PrevBB = Builder.GetInsertBlock(); // Create basic blocks. BasicBlock *HeaderBB = BasicBlock::Create(Context, "ptx.setup", FN); BasicBlock *ExitBB = BasicBlock::Create(Context, "ptx.exit", FN); BasicBlock *BodyBB = BasicBlock::Create(Context, "ptx.loop_body", FN); DominatorTree &DT = P->getAnalysis<DominatorTree>(); DT.addNewBlock(HeaderBB, PrevBB); DT.addNewBlock(ExitBB, HeaderBB); DT.addNewBlock(BodyBB, HeaderBB); Builder.SetInsertPoint(HeaderBB); // Insert VMap items with maps of array base address on the host to base // address on the device. Function::arg_iterator AI = FN->arg_begin(); for (unsigned j = 0; j < UsedValues.size(); j++) { Value *BaseAddr = UsedValues[j]; Type *ArrayTy = BaseAddr->getType(); Value *Param = Builder.CreateBitCast(AI, ArrayTy); VMap.insert(std::make_pair<Value *, Value *>(BaseAddr, Param)); AI++; } // FIXME: These intrinsics should be inserted on-demand. However, we insert // them all currently for simplicity. Function *GetNctaidX = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_nctaid_x); Function *GetNctaidY = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_nctaid_y); Function *GetCtaidX = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ctaid_x); Function *GetCtaidY = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ctaid_y); Function *GetNtidX = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ntid_x); Function *GetNtidY = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ntid_y); Function *GetTidX = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_tid_x); Function *GetTidY = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_tid_y); Value *GridWidth = Builder.CreateCall(GetNctaidX); GridWidth = Builder.CreateIntCast(GridWidth, Ty, false); Value *GridHeight = Builder.CreateCall(GetNctaidY); GridHeight = Builder.CreateIntCast(GridHeight, Ty, false); Value *BlockWidth = Builder.CreateCall(GetNtidX); BlockWidth = Builder.CreateIntCast(BlockWidth, Ty, false); Value *BlockHeight = Builder.CreateCall(GetNtidY); BlockHeight = Builder.CreateIntCast(BlockHeight, Ty, false); Value *BIDx = Builder.CreateCall(GetCtaidX); BIDx = Builder.CreateIntCast(BIDx, Ty, false); Value *BIDy = Builder.CreateCall(GetCtaidY); BIDy = Builder.CreateIntCast(BIDy, Ty, false); Value *TIDx = Builder.CreateCall(GetTidX); TIDx = Builder.CreateIntCast(TIDx, Ty, false); Value *TIDy = Builder.CreateCall(GetTidY); TIDy = Builder.CreateIntCast(TIDy, Ty, false); Builder.CreateBr(BodyBB); Builder.SetInsertPoint(BodyBB); unsigned NumDims = OriginalIVS.size(); std::vector<Value *> Substitutions; Value *BlockID, *ThreadID; switch (NumDims) { case 1: { Value *BlockSize = Builder.CreateMul(BlockWidth, BlockHeight, "p_gpu_blocksize"); BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i"); BlockID = Builder.CreateAdd(BlockID, BIDx); BlockID = Builder.CreateMul(BlockID, BlockSize); ThreadID = Builder.CreateMul(TIDy, BlockWidth, "p_gpu_index_j"); ThreadID = Builder.CreateAdd(ThreadID, TIDx); ThreadID = Builder.CreateAdd(ThreadID, BlockID); Substitutions.push_back(ThreadID); break; } case 2: { BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i"); BlockID = Builder.CreateAdd(BlockID, BIDx); Substitutions.push_back(BlockID); ThreadID = Builder.CreateMul(TIDy, BlockWidth, "p_gpu_index_j"); ThreadID = Builder.CreateAdd(ThreadID, TIDx); Substitutions.push_back(ThreadID); break; } case 3: { BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i"); BlockID = Builder.CreateAdd(BlockID, BIDx); Substitutions.push_back(BlockID); Substitutions.push_back(TIDy); Substitutions.push_back(TIDx); break; } case 4: { Substitutions.push_back(BIDy); Substitutions.push_back(BIDx); Substitutions.push_back(TIDy); Substitutions.push_back(TIDx); break; } default: assert(true && "We cannot transform parallel loops whose depth is larger than 4."); return; } assert(OriginalIVS.size() == Substitutions.size() && "The size of IVS should be equal to the size of substitutions."); for (unsigned i = 0; i < OriginalIVS.size(); ++i) { VMap.insert( std::make_pair<Value *, Value *>(OriginalIVS[i], Substitutions[i])); } Builder.CreateBr(ExitBB); Builder.SetInsertPoint(--Builder.GetInsertPoint()); BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); // Add the termination of the ptx-device subfunction. Builder.SetInsertPoint(ExitBB); Builder.CreateRetVoid(); Builder.SetInsertPoint(LoopBody); *SubFunction = FN; }