// // Method: findGlobalPoolNodes() // // Description: // This method finds DSNodes that are reachable from globals and that need a // pool. The Automatic Pool Allocation transform will use the returned // information to build global pools for the DSNodes in question. // // Note that this method does not assign DSNodes to pools; it merely decides // which DSNodes are reachable from globals and will need a pool of global // scope. // // Outputs: // Nodes - The DSNodes that are both reachable from globals and which should // have global pools will be *added* to this container. // void AllHeapNodesHeuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) { // Get the globals graph for the program. DSGraph* GG = Graphs->getGlobalsGraph(); // Get all of the nodes reachable from globals. DenseSet<const DSNode*> GlobalHeapNodes; GetNodesReachableFromGlobals (GG, GlobalHeapNodes); // // Create a global pool for each global DSNode. // for (DenseSet<const DSNode *>::iterator NI = GlobalHeapNodes.begin(); NI != GlobalHeapNodes.end();++NI) { const DSNode * N = *NI; PoolMap[N] = OnePool(N); } // // Now find all DSNodes belonging to function-local DSGraphs which are // mirrored in the globals graph. These DSNodes require a global pool, too. // for (Module::iterator F = M->begin(); F != M->end(); ++F) { if (Graphs->hasDSGraph(*F)) { DSGraph* G = Graphs->getDSGraph(*F); DSGraph::NodeMapTy NodeMap; G->computeGToGGMapping (NodeMap); // // Scan through all DSNodes in the local graph. If a local DSNode has a // corresponding DSNode in the globals graph that is reachable from a // global, then add the local DSNode to the set of DSNodes reachable from // a global. // DSGraph::node_iterator ni = G->node_begin(); for (; ni != G->node_end(); ++ni) { DSNode * N = ni; DSNode * GGN = NodeMap[N].getNode(); //assert (!GGN || GlobalHeapNodes.count (GGN)); if (GGN && GlobalHeapNodes.count (GGN)) PoolMap[GGN].NodesInPool.push_back (N); } } } // // Copy the values into the output container. Note that DenseSet has no // iterator traits (or whatever allows us to treat DenseSet has a generic // container), so we have to use a loop to copy values from the DenseSet into // the output container. // for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(), E = GlobalHeapNodes.end(); I != E; ++I) { Nodes.insert (*I); } return; }
static void MarkNodesWhichMustBePassedIn(DenseSet<const DSNode*> &MarkedNodes, Function &F, DSGraph* G, EntryPointAnalysis* EPA) { // All DSNodes reachable from arguments must be passed in... // Unless this is an entry point to the program if (!EPA->isEntryPoint(&F)) { for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { DSGraph::ScalarMapTy::iterator AI = G->getScalarMap().find(I); if (AI != G->getScalarMap().end()) if (DSNode * N = AI->second.getNode()) N->markReachableNodes(MarkedNodes); } } // Marked the returned node as needing to be passed in. if (DSNode * RetNode = G->getReturnNodeFor(F).getNode()) RetNode->markReachableNodes(MarkedNodes); // Calculate which DSNodes are reachable from globals. If a node is reachable // from a global, we will create a global pool for it, so no argument passage // is required. DenseSet<const DSNode*> NodesFromGlobals; GetNodesReachableFromGlobals(G, NodesFromGlobals); // Remove any nodes reachable from a global. These nodes will be put into // global pools, which do not require arguments to be passed in. for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(), E = NodesFromGlobals.end(); I != E; ++I) MarkedNodes.erase(*I); }
void AliasAnalysisChecker::collectMissingAliases( const DenseSet<ValuePair> &DynamicAliases, vector<ValuePair> &MissingAliases) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); AliasAnalysis &BaselineAA = getAnalysis<BaselineAliasAnalysis>(); MissingAliases.clear(); for (DenseSet<ValuePair>::const_iterator I = DynamicAliases.begin(); I != DynamicAliases.end(); ++I) { Value *V1 = I->first, *V2 = I->second; if (IntraProc && !DynAAUtils::IsIntraProcQuery(V1, V2)) { continue; } // Ignore BitCasts and PhiNodes. The reports on them are typically // redundant. if (isa<BitCastInst>(V1) || isa<BitCastInst>(V2)) continue; if (isa<PHINode>(V1) || isa<PHINode>(V2)) continue; if (!CheckAllPointers) { if (!DynAAUtils::PointerIsDereferenced(V1) || !DynAAUtils::PointerIsDereferenced(V2)) { continue; } } if (BaselineAA.alias(V1, V2) != AliasAnalysis::NoAlias && AA.alias(V1, V2) == AliasAnalysis::NoAlias) { MissingAliases.push_back(make_pair(V1, V2)); } } }
// // Method: findGlobalPoolNodes() // // Description: // This method finds DSNodes that are reachable from globals and that need a // pool. The Automatic Pool Allocation transform will use the returned // information to build global pools for the DSNodes in question. // // Note that this method does not assign DSNodes to pools; it merely decides // which DSNodes are reachable from globals and will need a pool of global // scope. // // Outputs: // Nodes - The DSNodes that are both reachable from globals and which should // have global pools will be *added* to this container. // void Heuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) { // Get the globals graph for the program. DSGraph* GG = Graphs->getGlobalsGraph(); // Get all of the nodes reachable from globals. DenseSet<const DSNode*> GlobalHeapNodes; GetNodesReachableFromGlobals (GG, GlobalHeapNodes); // // Now find all DSNodes belonging to function-local DSGraphs which are // mirrored in the globals graph. These DSNodes require a global pool, too. // for (Module::iterator F = M->begin(); F != M->end(); ++F) { if (Graphs->hasDSGraph(*F)) { DSGraph* G = Graphs->getDSGraph(*F); GetNodesReachableFromGlobals (G, GlobalHeapNodes); } } // // Copy the values into the output container. Note that DenseSet has no // iterator traits (or whatever allows us to treat DenseSet has a generic // container), so we have to use a loop to copy values from the DenseSet into // the output container. // for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(), E = GlobalHeapNodes.end(); I != E; ++I) { Nodes.insert (*I); } return; }
// Collects missing aliases to <MissingAliases>. void AliasAnalysisChecker::collectMissingAliases( const DenseSet<ValuePair> &DynamicAliases) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); AliasAnalysis &BaselineAA = getAnalysis<BaselineAliasAnalysis>(); MissingAliases.clear(); for (DenseSet<ValuePair>::const_iterator I = DynamicAliases.begin(); I != DynamicAliases.end(); ++I) { Value *V1 = I->first, *V2 = I->second; if (IntraProc && !DynAAUtils::IsIntraProcQuery(V1, V2)) { continue; } if (!CheckAllPointers) { if (!DynAAUtils::PointerIsDereferenced(V1) || !DynAAUtils::PointerIsDereferenced(V2)) { continue; } } if (BaselineAA.alias(V1, V2) != AliasAnalysis::NoAlias && AA.alias(V1, V2) == AliasAnalysis::NoAlias) { MissingAliases.push_back(make_pair(V1, V2)); } } }
void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { // When an edge in the graph has been threaded, values that we could not // determine a value for before (i.e. were marked overdefined) may be possible // to solve now. We do NOT try to proactively update these values. Instead, // we clear their entries from the cache, and allow lazy updating to recompute // them when needed. // The updating process is fairly simple: we need to dropped cached info // for all values that were marked overdefined in OldSucc, and for those same // values in any successor of OldSucc (except NewSucc) in which they were // also marked overdefined. std::vector<BasicBlock*> worklist; worklist.push_back(OldSucc); DenseSet<Value*> ClearSet; for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) { if (I->first == OldSucc) ClearSet.insert(I->second); } // Use a worklist to perform a depth-first search of OldSucc's successors. // NOTE: We do not need a visited list since any blocks we have already // visited will have had their overdefined markers cleared already, and we // thus won't loop to their successors. while (!worklist.empty()) { BasicBlock *ToUpdate = worklist.back(); worklist.pop_back(); // Skip blocks only accessible through NewSucc. if (ToUpdate == NewSucc) continue; bool changed = false; for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end(); I != E; ++I) { // If a value was marked overdefined in OldSucc, and is here too... DenseSet<OverDefinedPairTy>::iterator OI = OverDefinedCache.find(std::make_pair(ToUpdate, *I)); if (OI == OverDefinedCache.end()) continue; // Remove it from the caches. ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)]; ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); assert(CI != Entry.end() && "Couldn't find entry to update?"); Entry.erase(CI); OverDefinedCache.erase(OI); // If we removed anything, then we potentially need to update // blocks successors too. changed = true; } if (!changed) continue; worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); } }
// Find the number of arguments we need to add to the functions. void CSDataRando::findFunctionArgNodes(const std::vector<const Function *> &Functions) { std::vector<DSNodeHandle> RootNodes; for (const Function *F : Functions) { DSGraph *G = DSA->getDSGraph(*F); G->getFunctionArgumentsForCall(F, RootNodes); } // No additional args to pass. if (RootNodes.size() == 0) { return; } DenseSet<const DSNode*> MarkedNodes; for (DSNodeHandle &NH : RootNodes) { if (DSNode *N = NH.getNode()) { N->markReachableNodes(MarkedNodes); } } // Remove global nodes from the arg nodes. If we are using the bottom-up // analysis then if a node is a global node all contexts will use the global map. for (auto i : GlobalNodes) { MarkedNodes.erase(i); } // Remove any nodes that are marked do not encrypt. SmallVector<const DSNode*, 8> MarkedNodeWorkList; for (auto i : MarkedNodes) { if (i->isDoNotEncryptNode()) { MarkedNodeWorkList.push_back(i); } } for (auto i : MarkedNodeWorkList) { MarkedNodes.erase(i); } if (MarkedNodes.empty()) { return; } // Create a FuncInfo entry for each of the functions with the arg nodes that // need to be passed for (const Function *F : Functions) { FuncInfo &FI = FunctionInfo[F]; FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end()); } }
/// FindFunctionPoolArgs - In the first pass over the program, we decide which /// arguments will have to be added for each function, build the FunctionInfo /// map and recording this info in the ArgNodes set. static void FindFunctionPoolArgs(Function &F, FuncInfo& FI, EntryPointAnalysis* EPA) { DenseSet<const DSNode*> MarkedNodes; if (FI.G->node_begin() == FI.G->node_end()) return; // No memory activity, nothing is required // Find DataStructure nodes which are allocated in pools non-local to the // current function. This set will contain all of the DSNodes which require // pools to be passed in from outside of the function. MarkNodesWhichMustBePassedIn(MarkedNodes, F, FI.G,EPA); //FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end()); //Work around DenseSet not having iterator traits for (DenseSet<const DSNode*>::iterator ii = MarkedNodes.begin(), ee = MarkedNodes.end(); ii != ee; ++ii) FI.ArgNodes.insert(FI.ArgNodes.end(), *ii); }
// // Method: eraseCallsTo() // // Description: // This method removes the specified function from DSCallsites within the // specified function. We do not do anything with call sites that call this // function indirectly (for which there is not much point as we do not yet // know the targets of indirect function calls). // void StdLibDataStructures::eraseCallsTo(Function* F) { typedef std::pair<DSGraph*,Function*> RemovalPair; DenseSet<RemovalPair> ToRemove; for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); ii != ee; ++ii) if (CallInst* CI = dyn_cast<CallInst>(*ii)){ if (CI->getCalledValue() == F) { DSGraph* Graph = getDSGraph(*CI->getParent()->getParent()); //delete the call DEBUG(errs() << "Removing " << F->getName().str() << " from " << CI->getParent()->getParent()->getName().str() << "\n"); ToRemove.insert(std::make_pair(Graph, F)); } }else if (InvokeInst* CI = dyn_cast<InvokeInst>(*ii)){ if (CI->getCalledValue() == F) { DSGraph* Graph = getDSGraph(*CI->getParent()->getParent()); //delete the call DEBUG(errs() << "Removing " << F->getName().str() << " from " << CI->getParent()->getParent()->getName().str() << "\n"); ToRemove.insert(std::make_pair(Graph, F)); } } else if(ConstantExpr *CE = dyn_cast<ConstantExpr>(*ii)) { if(CE->isCast()) { for (Value::use_iterator ci = CE->use_begin(), ce = CE->use_end(); ci != ce; ++ci) { if (CallInst* CI = dyn_cast<CallInst>(*ci)){ if(CI->getCalledValue() == CE) { DSGraph* Graph = getDSGraph(*CI->getParent()->getParent()); //delete the call DEBUG(errs() << "Removing " << F->getName().str() << " from " << CI->getParent()->getParent()->getName().str() << "\n"); ToRemove.insert(std::make_pair(Graph, F)); } } } } } for(DenseSet<RemovalPair>::iterator I = ToRemove.begin(), E = ToRemove.end(); I != E; ++I) I->first->removeFunctionCalls(*I->second); }
bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not // specified. const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (!ST.debuggerInsertNops()) return false; // Skip machine functions without debug info. if (!MF.getMMI().hasDebugInfo()) return false; // Target instruction info. const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo()); // Set containing line numbers that have nop inserted. DenseSet<unsigned> NopInserted; for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { // Skip DBG_VALUE instructions and instructions without location. if (MI->isDebugValue() || !MI->getDebugLoc()) continue; // Insert nop instruction if line number does not have nop inserted. auto DL = MI->getDebugLoc(); if (NopInserted.find(DL.getLine()) == NopInserted.end()) { BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) .addImm(0); NopInserted.insert(DL.getLine()); } } } return true; }
// Reroll the provided loop with respect to the provided induction variable. // Generally, we're looking for a loop like this: // // %iv = phi [ (preheader, ...), (body, %iv.next) ] // f(%iv) // %iv.1 = add %iv, 1 <-- a root increment // f(%iv.1) // %iv.2 = add %iv, 2 <-- a root increment // f(%iv.2) // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment // f(%iv.scale_m_1) // ... // %iv.next = add %iv, scale // %cmp = icmp(%iv, ...) // br %cmp, header, exit // // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can // be intermixed with eachother. The restriction imposed by this algorithm is // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1), // etc. be the same. // // First, we collect the use set of %iv, excluding the other increment roots. // This gives us f(%iv). Then we iterate over the loop instructions (scale-1) // times, having collected the use set of f(%iv.(i+1)), during which we: // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to // the next unmatched instruction in f(%iv.(i+1)). // - Ensure that both matched instructions don't have any external users // (with the exception of last-in-chain reduction instructions). // - Track the (aliasing) write set, and other side effects, of all // instructions that belong to future iterations that come before the matched // instructions. If the matched instructions read from that write set, then // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly, // if any of these future instructions had side effects (could not be // speculatively executed), and so do the matched instructions, when we // cannot reorder those side-effect-producing instructions, and rerolling // fails. // // Finally, we make sure that all loop instructions are either loop increment // roots, belong to simple latch code, parts of validated reductions, part of // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions // have been validated), then we reroll the loop. bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions) { const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV)); uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))-> getValue()->getZExtValue(); // The collection of loop increment instructions. SmallInstructionVector LoopIncs; uint64_t Scale = Inc; // The effective induction variable, IV, is normally also the real induction // variable. When we're dealing with a loop like: // for (int i = 0; i < 500; ++i) // x[3*i] = ...; // x[3*i+1] = ...; // x[3*i+2] = ...; // then the real IV is still i, but the effective IV is (3*i). Instruction *RealIV = IV; if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs)) return false; assert(Scale <= MaxInc && "Scale is too large"); assert(Scale > 1 && "Scale must be at least 2"); // The set of increment instructions for each increment value. SmallVector<SmallInstructionVector, 32> Roots(Scale-1); SmallInstructionSet AllRoots; if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs)) return false; DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *RealIV << "\n"); // An array of just the possible reductions for this scale factor. When we // collect the set of all users of some root instructions, these reduction // instructions are treated as 'final' (their uses are not considered). // This is important because we don't want the root use set to search down // the reduction chain. SmallInstructionSet PossibleRedSet; SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet; Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet, PossibleRedLastSet); // We now need to check for equivalence of the use graph of each root with // that of the primary induction variable (excluding the roots). Our goal // here is not to solve the full graph isomorphism problem, but rather to // catch common cases without a lot of work. As a result, we will assume // that the relative order of the instructions in each unrolled iteration // is the same (although we will not make an assumption about how the // different iterations are intermixed). Note that while the order must be // the same, the instructions may not be in the same basic block. SmallInstructionSet Exclude(AllRoots); Exclude.insert(LoopIncs.begin(), LoopIncs.end()); DenseSet<Instruction *> BaseUseSet; collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet); DenseSet<Instruction *> AllRootUses; std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1); bool MatchFailed = false; for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) { DenseSet<Instruction *> &RootUseSet = RootUseSets[i]; collectInLoopUserSet(L, Roots[i], SmallInstructionSet(), PossibleRedSet, RootUseSet); DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() << " vs. iteration increment " << (i+1) << " use set size: " << RootUseSet.size() << "\n"); if (BaseUseSet.size() != RootUseSet.size()) { MatchFailed = true; break; } // In addition to regular aliasing information, we need to look for // instructions from later (future) iterations that have side effects // preventing us from reordering them past other instructions with side // effects. bool FutureSideEffects = false; AliasSetTracker AST(*AA); // The map between instructions in f(%iv.(i+1)) and f(%iv). DenseMap<Value *, Value *> BaseMap; assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops"); for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(), JE = Header->end(); J1 != JE && !MatchFailed; ++J1) { if (cast<Instruction>(J1) == RealIV) continue; if (cast<Instruction>(J1) == IV) continue; if (!BaseUseSet.count(J1)) continue; if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs. continue; while (J2 != JE && (!RootUseSet.count(J2) || std::find(Roots[i].begin(), Roots[i].end(), J2) != Roots[i].end())) { // As we iterate through the instructions, instructions that don't // belong to previous iterations (or the base case), must belong to // future iterations. We want to track the alias set of writes from // previous iterations. if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) && !AllRootUses.count(J2)) { if (J2->mayWriteToMemory()) AST.add(J2); // Note: This is specifically guarded by a check on isa<PHINode>, // which while a valid (somewhat arbitrary) micro-optimization, is // needed because otherwise isSafeToSpeculativelyExecute returns // false on PHI nodes. if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL)) FutureSideEffects = true; } ++J2; } if (!J1->isSameOperationAs(J2)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << "\n"); MatchFailed = true; break; } // Make sure that this instruction, which is in the use set of this // root instruction, does not also belong to the base set or the set of // some previous root instruction. if (BaseUseSet.count(J2) || AllRootUses.count(J2)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (prev. case overlap)\n"); MatchFailed = true; break; } // Make sure that we don't alias with any instruction in the alias set // tracker. If we do, then we depend on a future iteration, and we // can't reroll. if (J2->mayReadFromMemory()) { for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end(); K != KE && !MatchFailed; ++K) { if (K->aliasesUnknownInst(J2, *AA)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (depends on future store)\n"); MatchFailed = true; break; } } } // If we've past an instruction from a future iteration that may have // side effects, and this instruction might also, then we can't reorder // them, and this matching fails. As an exception, we allow the alias // set tracker to handle regular (simple) load/store dependencies. if (FutureSideEffects && ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) || (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (side effects prevent reordering)\n"); MatchFailed = true; break; } // For instructions that are part of a reduction, if the operation is // associative, then don't bother matching the operands (because we // already know that the instructions are isomorphic, and the order // within the iteration does not matter). For non-associative reductions, // we do need to match the operands, because we need to reject // out-of-order instructions within an iteration! // For example (assume floating-point addition), we need to reject this: // x += a[i]; x += b[i]; // x += a[i+1]; x += b[i+1]; // x += b[i+2]; x += a[i+2]; bool InReduction = Reductions.isPairInSame(J1, J2); if (!(InReduction && J1->isAssociative())) { bool Swapped = false, SomeOpMatched = false;; for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { Value *Op2 = J2->getOperand(j); // If this is part of a reduction (and the operation is not // associatve), then we match all operands, but not those that are // part of the reduction. if (InReduction) if (Instruction *Op2I = dyn_cast<Instruction>(Op2)) if (Reductions.isPairInSame(J2, Op2I)) continue; DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2); if (BMI != BaseMap.end()) Op2 = BMI->second; else if (std::find(Roots[i].begin(), Roots[i].end(), (Instruction*) Op2) != Roots[i].end()) Op2 = IV; if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) { // If we've not already decided to swap the matched operands, and // we've not already matched our first operand (note that we could // have skipped matching the first operand because it is part of a // reduction above), and the instruction is commutative, then try // the swapped match. if (!Swapped && J1->isCommutative() && !SomeOpMatched && J1->getOperand(!j) == Op2) { Swapped = true; } else { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (operand " << j << ")\n"); MatchFailed = true; break; } } SomeOpMatched = true; } } if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) || (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (uses outside loop)\n"); MatchFailed = true; break; } if (!MatchFailed) BaseMap.insert(std::pair<Value *, Value *>(J2, J1)); AllRootUses.insert(J2); Reductions.recordPair(J1, J2, i+1); ++J2; } } if (MatchFailed) return false; DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *RealIV << "\n"); DenseSet<Instruction *> LoopIncUseSet; collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(), SmallInstructionSet(), LoopIncUseSet); DEBUG(dbgs() << "LRR: Loop increment set size: " << LoopIncUseSet.size() << "\n"); // Make sure that all instructions in the loop have been included in some // use set. for (BasicBlock::iterator J = Header->begin(), JE = Header->end(); J != JE; ++J) { if (isa<DbgInfoIntrinsic>(J)) continue; if (cast<Instruction>(J) == RealIV) continue; if (cast<Instruction>(J) == IV) continue; if (BaseUseSet.count(J) || AllRootUses.count(J) || (LoopIncUseSet.count(J) && (J->isTerminator() || isSafeToSpeculativelyExecute(J, DL)))) continue; if (AllRoots.count(J)) continue; if (Reductions.isSelectedPHI(J)) continue; DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV << " unprocessed instruction found: " << *J << "\n"); MatchFailed = true; break; } if (MatchFailed) return false; DEBUG(dbgs() << "LRR: all instructions processed from " << *RealIV << "\n"); if (!Reductions.validateSelected()) return false; // At this point, we've validated the rerolling, and we're committed to // making changes! Reductions.replaceSelected(); // Remove instructions associated with non-base iterations. for (BasicBlock::reverse_iterator J = Header->rbegin(); J != Header->rend();) { if (AllRootUses.count(&*J)) { Instruction *D = &*J; DEBUG(dbgs() << "LRR: removing: " << *D << "\n"); D->eraseFromParent(); continue; } ++J; } // Insert the new induction variable. const SCEV *Start = RealIVSCEV->getStart(); if (Inc == 1) Start = SE->getMulExpr(Start, SE->getConstant(Start->getType(), Scale)); const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start, SE->getConstant(RealIVSCEV->getType(), 1), L, SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. SCEVExpander Expander(*SE, "reroll"); Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(), JE = BaseUseSet.end(); J != JE; ++J) (*J)->replaceUsesOfWith(IV, NewIV); if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) { if (LoopIncUseSet.count(BI)) { const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); if (Inc == 1) ICSCEV = SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale)); // Iteration count SCEV minus 1 const SCEV *ICMinus1SCEV = SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); Value *ICMinus1; // Iteration count minus 1 if (isa<SCEVConstant>(ICMinus1SCEV)) { ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); } else { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) Preheader = InsertPreheaderForLoop(L, this); ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), Preheader->getTerminator()); } Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond"); BI->setCondition(Cond); if (BI->getSuccessor(1) != Header) BI->swapSuccessors(); } } } SimplifyInstructionsInBlock(Header, DL, TLI); DeleteDeadPHIs(Header, TLI); ++NumRerolledLoops; return true; }
MCFunction MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, const MemoryObject &Region, uint64_t Start, uint64_t End, const MCInstrAnalysis *Ana, raw_ostream &DebugOut, SmallVectorImpl<uint64_t> &Calls) { std::vector<MCDecodedInst> Instructions; std::set<uint64_t> Splits; Splits.insert(Start); uint64_t Size; MCFunction f(Name); { DenseSet<uint64_t> VisitedInsts; SmallVector<uint64_t, 16> WorkList; WorkList.push_back(Start); // Disassemble code and gather basic block split points. while (!WorkList.empty()) { uint64_t Index = WorkList.pop_back_val(); if (VisitedInsts.find(Index) != VisitedInsts.end()) continue; // Already visited this location. for (;Index < End; Index += Size) { VisitedInsts.insert(Index); MCInst Inst; if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){ Instructions.push_back(MCDecodedInst(Index, Size, Inst)); if (Ana->isBranch(Inst)) { uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); if (targ != -1ULL && targ == Index+Size) continue; // Skip nop jumps. // If we could determine the branch target, make a note to start a // new basic block there and add the target to the worklist. if (targ != -1ULL) { Splits.insert(targ); WorkList.push_back(targ); WorkList.push_back(Index+Size); } Splits.insert(Index+Size); break; } else if (Ana->isReturn(Inst)) { // Return instruction. This basic block ends here. Splits.insert(Index+Size); break; } else if (Ana->isCall(Inst)) { uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); // Add the call to the call list if the destination is known. if (targ != -1ULL && targ != Index+Size) Calls.push_back(targ); } } else { errs().write_hex(Index) << ": warning: invalid instruction encoding\n"; if (Size == 0) Size = 1; // skip illegible bytes } } } } // Make sure the instruction list is sorted. std::sort(Instructions.begin(), Instructions.end()); // Create basic blocks. unsigned ii = 0, ie = Instructions.size(); for (std::set<uint64_t>::iterator spi = Splits.begin(), spe = llvm::prior(Splits.end()); spi != spe; ++spi) { MCBasicBlock BB; uint64_t BlockEnd = *llvm::next(spi); // Add instructions to the BB. for (; ii != ie; ++ii) { if (Instructions[ii].Address < *spi || Instructions[ii].Address >= BlockEnd) break; BB.addInst(Instructions[ii]); } f.addBlock(*spi, BB); } std::sort(f.Blocks.begin(), f.Blocks.end()); // Calculate successors of each block. for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second); if (BB.getInsts().empty()) continue; const MCDecodedInst &Inst = BB.getInsts().back(); if (Ana->isBranch(Inst.Inst)) { uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size); if (targ == -1ULL) { // Indirect branch. Bail and add all blocks of the function as a // successor. for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) BB.addSucc(i->first); } else if (targ != Inst.Address+Inst.Size) BB.addSucc(targ); // Conditional branches can also fall through to the next block. if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e) BB.addSucc(llvm::next(i)->first); } else { // No branch. Fall through to the next block. if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e) BB.addSucc(llvm::next(i)->first); } } return f; }
/// canonicalizeInputFunction - Functions like swift_retain return an /// argument as a low-level performance optimization. This makes it difficult /// to reason about pointer equality though, so undo it as an initial /// canonicalization step. After this step, all swift_retain's have been /// replaced with swift_retain. /// /// This also does some trivial peep-hole optimizations as we go. static bool canonicalizeInputFunction(Function &F, ARCEntryPointBuilder &B, SwiftRCIdentity *RC) { bool Changed = false; DenseSet<Value *> NativeRefs; DenseMap<Value *, TinyPtrVector<Instruction *>> UnknownRetains; DenseMap<Value *, TinyPtrVector<Instruction *>> UnknownReleases; for (auto &BB : F) { UnknownRetains.clear(); UnknownReleases.clear(); NativeRefs.clear(); for (auto I = BB.begin(); I != BB.end(); ) { Instruction &Inst = *I++; switch (classifyInstruction(Inst)) { // These instructions should not reach here based on the pass ordering. // i.e. LLVMARCOpt -> LLVMContractOpt. case RT_RetainN: case RT_UnknownRetainN: case RT_BridgeRetainN: case RT_ReleaseN: case RT_UnknownReleaseN: case RT_BridgeReleaseN: llvm_unreachable("These are only created by LLVMARCContract !"); case RT_Unknown: case RT_BridgeRelease: case RT_AllocObject: case RT_FixLifetime: case RT_NoMemoryAccessed: case RT_RetainUnowned: case RT_CheckUnowned: break; case RT_Retain: { CallInst &CI = cast<CallInst>(Inst); Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0)); // retain(null) is a no-op. if (isa<ConstantPointerNull>(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } // Rewrite unknown retains into swift_retains. NativeRefs.insert(ArgVal); for (auto &X : UnknownRetains[ArgVal]) { B.setInsertPoint(X); B.createRetain(ArgVal, cast<CallInst>(X)); X->eraseFromParent(); ++NumUnknownRetainReleaseSRed; Changed = true; } UnknownRetains[ArgVal].clear(); break; } case RT_UnknownRetain: { CallInst &CI = cast<CallInst>(Inst); Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0)); // unknownRetain(null) is a no-op. if (isa<ConstantPointerNull>(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } // Have not encountered a strong retain/release. keep it in the // unknown retain/release list for now. It might get replaced // later. if (NativeRefs.find(ArgVal) == NativeRefs.end()) { UnknownRetains[ArgVal].push_back(&CI); } else { B.setInsertPoint(&CI); B.createRetain(ArgVal, &CI); CI.eraseFromParent(); ++NumUnknownRetainReleaseSRed; Changed = true; } break; } case RT_Release: { CallInst &CI = cast<CallInst>(Inst); Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0)); // release(null) is a no-op. if (isa<ConstantPointerNull>(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } // Rewrite unknown releases into swift_releases. NativeRefs.insert(ArgVal); for (auto &X : UnknownReleases[ArgVal]) { B.setInsertPoint(X); B.createRelease(ArgVal, cast<CallInst>(X)); X->eraseFromParent(); ++NumUnknownRetainReleaseSRed; Changed = true; } UnknownReleases[ArgVal].clear(); break; } case RT_UnknownRelease: { CallInst &CI = cast<CallInst>(Inst); Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0)); // unknownRelease(null) is a no-op. if (isa<ConstantPointerNull>(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } // Have not encountered a strong retain/release. keep it in the // unknown retain/release list for now. It might get replaced // later. if (NativeRefs.find(ArgVal) == NativeRefs.end()) { UnknownReleases[ArgVal].push_back(&CI); } else { B.setInsertPoint(&CI); B.createRelease(ArgVal, &CI); CI.eraseFromParent(); ++NumUnknownRetainReleaseSRed; Changed = true; } break; } case RT_ObjCRelease: { CallInst &CI = cast<CallInst>(Inst); Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0)); // objc_release(null) is a noop, zap it. if (isa<ConstantPointerNull>(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } break; } // These retain instructions return their argument so must be processed // specially. case RT_BridgeRetain: case RT_ObjCRetain: { // Canonicalize the retain so that nothing uses its result. CallInst &CI = cast<CallInst>(Inst); // Do not get RC identical value here, could end up with a // crash in replaceAllUsesWith as the type maybe different. Value *ArgVal = CI.getArgOperand(0); if (!CI.use_empty()) { CI.replaceAllUsesWith(ArgVal); Changed = true; } // {objc_retain,swift_unknownRetain}(null) is a noop, delete it. if (isa<ConstantPointerNull>(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } break; } } } } return Changed; }
// // Function: GetNodesReachableFromGlobals() // // Description: // This function finds all DSNodes which are reachable from globals. It finds // DSNodes both within the local DSGraph as well as in the Globals graph that // are reachable from globals. It does, however, filter out those DSNodes // which are of no interest to automatic pool allocation. // // Inputs: // G - The DSGraph for which to find DSNodes which are reachable by globals. // This DSGraph can either by a DSGraph associated with a function *or* // it can be the globals graph itself. // // Outputs: // NodesFromGlobals - A reference to a container object in which to record // DSNodes reachable from globals. DSNodes are *added* to // this container; it is not cleared by this function. // DSNodes from both the local and globals graph are added. void AllHeapNodesHeuristic::GetNodesReachableFromGlobals (DSGraph* G, DenseSet<const DSNode*> &NodesFromGlobals) { // // Get the globals graph associated with this DSGraph. If the globals graph // is NULL, then the graph that was passed in *is* the globals graph. // DSGraph * GlobalsGraph = G->getGlobalsGraph(); if (!GlobalsGraph) GlobalsGraph = G; // // Find all DSNodes which are reachable in the globals graph. // for (DSGraph::node_iterator NI = GlobalsGraph->node_begin(); NI != GlobalsGraph->node_end(); ++NI) { NI->markReachableNodes(NodesFromGlobals); } // // Remove those global nodes which we know will never be pool allocated. // std::vector<const DSNode *> toRemove; for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(), E = NodesFromGlobals.end(); I != E; ) { DenseSet<const DSNode*>::iterator Last = I; ++I; const DSNode *tmp = *Last; if (!(tmp->isHeapNode())) toRemove.push_back (tmp); // Do not poolallocate nodes that are cast to Int. // As we do not track through ints, these could be escaping if (tmp->isPtrToIntNode()) toRemove.push_back(tmp); } // // Remove all globally reachable DSNodes which do not require pools. // for (unsigned index = 0; index < toRemove.size(); ++index) { NodesFromGlobals.erase(toRemove[index]); } // // Now the fun part. Find DSNodes in the local graph that correspond to // those nodes reachable in the globals graph. Add them to the set of // reachable nodes, too. // if (G->getGlobalsGraph()) { // // Compute a mapping between local DSNodes and DSNodes in the globals // graph. // DSGraph::NodeMapTy NodeMap; G->computeGToGGMapping (NodeMap); // // Scan through all DSNodes in the local graph. If a local DSNode has a // corresponding DSNode in the globals graph that is reachable from a // global, then add the local DSNode to the set of DSNodes reachable from a // global. // // FIXME: A node's existance within the global DSGraph is probably // sufficient evidence that it is reachable from a global. // DSGraph::node_iterator ni = G->node_begin(); for (; ni != G->node_end(); ++ni) { DSNode * N = ni; if (NodesFromGlobals.count (NodeMap[N].getNode())) NodesFromGlobals.insert (N); } } }
/// \brief Figure out if the loop is worth full unrolling. /// /// Complete loop unrolling can make some loads constant, and we need to know /// if that would expose any further optimization opportunities. This routine /// estimates this optimization. It computes cost of unrolled loop /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By /// dynamic cost we mean that we won't count costs of blocks that are known not /// to be executed (i.e. if we have a branch in the loop and we know that at the /// given iteration its condition would be resolved to true, we won't add up the /// cost of the 'false'-block). /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, int MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && "The unroll iterations max is too large!"); // Only analyze inner loops. We can't properly estimate cost of nested loops // and we won't visit inner loops again anyway. if (!L->empty()) return None; // Don't simulate loops with a big or unknown tripcount if (!UnrollMaxIterationsCountToAnalyze || !TripCount || TripCount > UnrollMaxIterationsCountToAnalyze) return None; SmallSetVector<BasicBlock *, 16> BBWorklist; SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist; DenseMap<Value *, Constant *> SimplifiedValues; SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. int UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling // aren't just exposing dead control flows, but actual reduced dynamic // instructions due to the simplifications which we expect to occur after // unrolling. int RolledDynamicCost = 0; // We track the simplification of each instruction in each iteration. We use // this to recursively merge costs into the unrolled cost on-demand so that // we don't count the cost of any dead code. This is essentially a map from // <instruction, int> to <bool, bool>, but stored as a densely packed struct. DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap; // A small worklist used to accumulate cost of instructions from each // observable and reached root in the loop. SmallVector<Instruction *, 16> CostWorklist; // PHI-used worklist used between iterations while accumulating cost. SmallVector<Instruction *, 4> PHIUsedList; // Helper function to accumulate cost for instructions in the loop. auto AddCostRecursively = [&](Instruction &RootI, int Iteration) { assert(Iteration >= 0 && "Cannot have a negative iteration!"); assert(CostWorklist.empty() && "Must start with an empty cost list"); assert(PHIUsedList.empty() && "Must start with an empty phi used list"); CostWorklist.push_back(&RootI); for (;; --Iteration) { do { Instruction *I = CostWorklist.pop_back_val(); // InstCostMap only uses I and Iteration as a key, the other two values // don't matter here. auto CostIter = InstCostMap.find({I, Iteration, 0, 0}); if (CostIter == InstCostMap.end()) // If an input to a PHI node comes from a dead path through the loop // we may have no cost data for it here. What that actually means is // that it is free. continue; auto &Cost = *CostIter; if (Cost.IsCounted) // Already counted this instruction. continue; // Mark that we are counting the cost of this instruction now. Cost.IsCounted = true; // If this is a PHI node in the loop header, just add it to the PHI set. if (auto *PhiI = dyn_cast<PHINode>(I)) if (PhiI->getParent() == L->getHeader()) { assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they " "inherently simplify during unrolling."); if (Iteration == 0) continue; // Push the incoming value from the backedge into the PHI used list // if it is an in-loop instruction. We'll use this to populate the // cost worklist for the next iteration (as we count backwards). if (auto *OpI = dyn_cast<Instruction>( PhiI->getIncomingValueForBlock(L->getLoopLatch()))) if (L->contains(OpI)) PHIUsedList.push_back(OpI); continue; } // First accumulate the cost of this instruction. if (!Cost.IsFree) { UnrolledCost += TTI.getUserCost(I); DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); DEBUG(I->dump()); } // We must count the cost of every operand which is not free, // recursively. If we reach a loop PHI node, simply add it to the set // to be considered on the next iteration (backwards!). for (Value *Op : I->operands()) { // Check whether this operand is free due to being a constant or // outside the loop. auto *OpI = dyn_cast<Instruction>(Op); if (!OpI || !L->contains(OpI)) continue; // Otherwise accumulate its cost. CostWorklist.push_back(OpI); } } while (!CostWorklist.empty()); if (PHIUsedList.empty()) // We've exhausted the search. break; assert(Iteration > 0 && "Cannot track PHI-used values past the first iteration!"); CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end()); PHIUsedList.clear(); } }; // Ensure that we don't violate the loop structure invariants relied on by // this analysis. assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); assert(L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values."); DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); // Prepare for the iteration by collecting any simplified entry or backedge // inputs. for (Instruction &I : *L->getHeader()) { auto *PHI = dyn_cast<PHINode>(&I); if (!PHI) break; // The loop header PHI nodes must have exactly two input: one from the // loop preheader and one from the loop latch. assert( PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch."); Value *V = PHI->getIncomingValueForBlock( Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); Constant *C = dyn_cast<Constant>(V); if (Iteration != 0 && !C) C = SimplifiedValues.lookup(V); if (C) SimplifiedInputValues.push_back({PHI, C}); } // Now clear and re-populate the map for the next iteration. SimplifiedValues.clear(); while (!SimplifiedInputValues.empty()) SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L); BBWorklist.clear(); BBWorklist.insert(L->getHeader()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { BasicBlock *BB = BBWorklist[Idx]; // Visit all instructions in the given basic block and try to simplify // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { // Track this instruction's expected baseline cost when executing the // rolled loop form. RolledDynamicCost += TTI.getUserCost(&I); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after // unrolling and continue. bool IsFree = Analyzer.visit(I); bool Inserted = InstCostMap.insert({&I, (int)Iteration, (unsigned)IsFree, /*IsCounted*/ false}).second; (void)Inserted; assert(Inserted && "Cannot have a state for an unvisited instruction!"); if (IsFree) continue; // If the instruction might have a side-effect recursively account for // the cost of it and all the instructions leading up to it. if (I.mayHaveSideEffects()) AddCostRecursively(I, Iteration); // Can't properly model a cost of a call. // FIXME: With a proper cost model we should be able to do it. if(isa<CallInst>(&I)) return None; // If unrolled body turns out to be too big, bail out. if (UnrolledCost > MaxUnrolledLoopSize) { DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"); return None; } } TerminatorInst *TI = BB->getTerminator(); // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. BasicBlock *KnownSucc = nullptr; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { if (Constant *SimpleCond = SimplifiedValues.lookup(BI->getCondition())) { // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) KnownSucc = BI->getSuccessor(0); else if (ConstantInt *SimpleCondVal = dyn_cast<ConstantInt>(SimpleCond)) KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0); } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (Constant *SimpleCond = SimplifiedValues.lookup(SI->getCondition())) { // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) KnownSucc = SI->getSuccessor(0); else if (ConstantInt *SimpleCondVal = dyn_cast<ConstantInt>(SimpleCond)) KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor(); } } if (KnownSucc) { if (L->contains(KnownSucc)) BBWorklist.insert(KnownSucc); else ExitWorklist.insert({BB, KnownSucc}); continue; } // Add BB's successors to the worklist. for (BasicBlock *Succ : successors(BB)) if (L->contains(Succ)) BBWorklist.insert(Succ); else ExitWorklist.insert({BB, Succ}); AddCostRecursively(*TI, Iteration); } // If we found no optimization opportunities on the first iteration, we // won't find them on later ones too. if (UnrolledCost == RolledDynamicCost) { DEBUG(dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n"); return None; } } while (!ExitWorklist.empty()) { BasicBlock *ExitingBB, *ExitBB; std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val(); for (Instruction &I : *ExitBB) { auto *PN = dyn_cast<PHINode>(&I); if (!PN) break; Value *Op = PN->getIncomingValueForBlock(ExitingBB); if (auto *OpI = dyn_cast<Instruction>(Op)) if (L->contains(OpI)) AddCostRecursively(*OpI, TripCount - 1); } } DEBUG(dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n"); return {{UnrolledCost, RolledDynamicCost}}; }
Result::Sat AttemptSolutionSDP::attempt(const ApproximateSimplex::Solution& sol){ const DenseSet& newBasis = sol.newBasis; const DenseMap<DeltaRational>& newValues = sol.newValues; DenseSet needsToBeAdded; for(DenseSet::const_iterator i = newBasis.begin(), i_end = newBasis.end(); i != i_end; ++i){ ArithVar b = *i; if(!d_tableau.isBasic(b)){ needsToBeAdded.add(b); } } DenseMap<DeltaRational>::const_iterator nvi = newValues.begin(), nvi_end = newValues.end(); for(; nvi != nvi_end; ++nvi){ ArithVar currentlyNb = *nvi; if(!d_tableau.isBasic(currentlyNb)){ if(!matchesNewValue(newValues, currentlyNb)){ const DeltaRational& newValue = newValues[currentlyNb]; Trace("arith::updateMany") << "updateMany:" << currentlyNb << " " << d_variables.getAssignment(currentlyNb) << " to "<< newValue << endl; d_linEq.update(currentlyNb, newValue); Assert(d_variables.assignmentIsConsistent(currentlyNb)); } } } d_errorSet.reduceToSignals(); d_errorSet.setSelectionRule(VAR_ORDER); static int instance = 0; ++instance; if(processSignals()){ Debug("arith::findModel") << "attemptSolution("<< instance <<") early conflict" << endl; d_conflictVariables.purge(); return Result::UNSAT; }else if(d_errorSet.errorEmpty()){ Debug("arith::findModel") << "attemptSolution("<< instance <<") fixed itself" << endl; return Result::SAT; } while(!needsToBeAdded.empty() && !d_errorSet.errorEmpty()){ ArithVar toRemove = ARITHVAR_SENTINEL; ArithVar toAdd = ARITHVAR_SENTINEL; DenseSet::const_iterator i = needsToBeAdded.begin(), i_end = needsToBeAdded.end(); for(; toAdd == ARITHVAR_SENTINEL && i != i_end; ++i){ ArithVar v = *i; Tableau::ColIterator colIter = d_tableau.colIterator(v); for(; !colIter.atEnd(); ++colIter){ const Tableau::Entry& entry = *colIter; Assert(entry.getColVar() == v); ArithVar b = d_tableau.rowIndexToBasic(entry.getRowIndex()); if(!newBasis.isMember(b)){ toAdd = v; bool favorBOverToRemove = (toRemove == ARITHVAR_SENTINEL) || (matchesNewValue(newValues, toRemove) && !matchesNewValue(newValues, b)) || (d_tableau.basicRowLength(toRemove) > d_tableau.basicRowLength(b)); if(favorBOverToRemove){ toRemove = b; } } } } Assert(toRemove != ARITHVAR_SENTINEL); Assert(toAdd != ARITHVAR_SENTINEL); Trace("arith::forceNewBasis") << toRemove << " " << toAdd << endl; //Message() << toRemove << " " << toAdd << endl; d_linEq.pivotAndUpdate(toRemove, toAdd, newValues[toRemove]); Trace("arith::forceNewBasis") << needsToBeAdded.size() << "to go" << endl; //Message() << needsToBeAdded.size() << "to go" << endl; needsToBeAdded.remove(toAdd); bool conflict = processSignals(); if(conflict){ d_errorSet.reduceToSignals(); d_conflictVariables.purge(); return Result::UNSAT; } } Assert( d_conflictVariables.empty() ); if(d_errorSet.errorEmpty()){ return Result::SAT; }else{ d_errorSet.reduceToSignals(); return Result::SAT_UNKNOWN; } }
Error AnalysisStyle::dump() { auto Tpi = File.getPDBTpiStream(); if (!Tpi) return Tpi.takeError(); TypeDatabase TypeDB(Tpi->getNumTypeRecords()); TypeDatabaseVisitor DBV(TypeDB); TypeVisitorCallbackPipeline Pipeline; HashLookupVisitor Hasher(*Tpi); // Add them to the database Pipeline.addCallbackToPipeline(DBV); // Store their hash values Pipeline.addCallbackToPipeline(Hasher); if (auto EC = codeview::visitTypeStream(Tpi->typeArray(), Pipeline)) return EC; auto &Adjusters = Tpi->getHashAdjusters(); DenseSet<uint32_t> AdjusterSet; for (const auto &Adj : Adjusters) { assert(AdjusterSet.find(Adj.second) == AdjusterSet.end()); AdjusterSet.insert(Adj.second); } uint32_t Count = 0; outs() << "Searching for hash collisions\n"; for (const auto &H : Hasher.Lookup) { if (H.second.size() <= 1) continue; ++Count; outs() << formatv("Hash: {0}, Count: {1} records\n", H.first, H.second.size()); for (const auto &R : H.second) { auto Iter = AdjusterSet.find(R.TI.getIndex()); StringRef Prefix; if (Iter != AdjusterSet.end()) { Prefix = "[HEAD]"; AdjusterSet.erase(Iter); } StringRef LeafName = getLeafTypeName(R.Record.Type); uint32_t TI = R.TI.getIndex(); StringRef TypeName = TypeDB.getTypeName(R.TI); outs() << formatv("{0,-6} {1} ({2:x}) {3}\n", Prefix, LeafName, TI, TypeName); } } outs() << "\n"; outs() << "Dumping hash adjustment chains\n"; for (const auto &A : Tpi->getHashAdjusters()) { TypeIndex TI(A.second); StringRef TypeName = TypeDB.getTypeName(TI); const CVType &HeadRecord = TypeDB.getTypeRecord(TI); assert(HeadRecord.Hash.hasValue()); auto CollisionsIter = Hasher.Lookup.find(*HeadRecord.Hash); if (CollisionsIter == Hasher.Lookup.end()) continue; const auto &Collisions = CollisionsIter->second; outs() << TypeName << "\n"; outs() << formatv(" [HEAD] {0:x} {1} {2}\n", A.second, getLeafTypeName(HeadRecord.Type), TypeName); for (const auto &Chain : Collisions) { if (Chain.TI == TI) continue; const CVType &TailRecord = TypeDB.getTypeRecord(Chain.TI); outs() << formatv(" {0:x} {1} {2}\n", Chain.TI.getIndex(), getLeafTypeName(TailRecord.Type), TypeDB.getTypeName(Chain.TI)); } } outs() << formatv("There are {0} orphaned hash adjusters\n", AdjusterSet.size()); for (const auto &Adj : AdjusterSet) { outs() << formatv(" {0}\n", Adj); } uint32_t DistinctHashValues = Hasher.Lookup.size(); outs() << formatv("{0}/{1} hash collisions", Count, DistinctHashValues); return Error::success(); }
// // Method: findGlobalPoolNodes() // // Description: // This method finds DSNodes that are reachable from globals and that need a // pool. The Automatic Pool Allocation transform will use the returned // information to build global pools for the DSNodes in question. // // For efficiency, this method also determines which DSNodes should be in the // same pool. // // Outputs: // Nodes - The DSNodes that are both reachable from globals and which should // have global pools will be *added* to this container. // void AllNodesHeuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) { // Get the globals graph for the program. DSGraph* GG = Graphs->getGlobalsGraph(); // // Get all of the nodes reachable from globals. // DenseSet<const DSNode*> GlobalNodes; GetNodesReachableFromGlobals (GG, GlobalNodes); // // Create a global pool for each global DSNode. // for (DenseSet<const DSNode *>::iterator NI = GlobalNodes.begin(); NI != GlobalNodes.end(); ++NI) { const DSNode * N = *NI; PoolMap[N] = OnePool(N); } // // Now find all DSNodes belonging to function-local DSGraphs which are // mirrored in the globals graph. These DSNodes require a global pool, too, // but must use the same pool as the one assigned to the corresponding global // DSNode. // for (Module::iterator F = M->begin(); F != M->end(); ++F) { // // Ignore functions that have no DSGraph. // if (!(Graphs->hasDSGraph(*F))) continue; // // Compute a mapping between local DSNodes and DSNodes in the globals // graph. // DSGraph* G = Graphs->getDSGraph(*F); DSGraph::NodeMapTy NodeMap; G->computeGToGGMapping (NodeMap); // // Scan through all DSNodes in the local graph. If a local DSNode has a // corresponding DSNode in the globals graph that is reachable from a // global, then add the local DSNode to the set of DSNodes reachable from // a global. // DSGraph::node_iterator ni = G->node_begin(); for (; ni != G->node_end(); ++ni) { DSNode * N = ni; DSNode * GGN = NodeMap[N].getNode(); assert (!GGN || GlobalNodes.count (GGN)); if (GGN && GlobalNodes.count (GGN)) PoolMap[GGN].NodesInPool.push_back (N); } } // // Scan through all the local graphs looking for DSNodes which may be // reachable by a global. These nodes may not end up in the globals graph // because of the fact that DSA doesn't actually know what is happening to // them. // // FIXME: I believe this code causes a condition in which a local DSNode is // given a local pool in one function but not in other functions. // Someone needs to investigate whether DSA is being consistent here, // and if not, if that inconsistency is correct. // #if 0 for (Module::iterator F = M->begin(); F != M->end(); ++F) { if (F->isDeclaration()) continue; DSGraph* G = Graphs->getDSGraph(*F); for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end(); I != E; ++I) { DSNode * Node = I; if (Node->isExternalNode() || Node->isUnknownNode()) { GlobalNodes.insert (Node); } } } #endif // // Copy the values into the output container. Note that DenseSet has no // iterator traits (or whatever allows us to treat DenseSet has a generic // container), so we have to use a loop to copy values from the DenseSet into // the output container. // // Note that we do not copy local DSNodes into the output container; we // merely copy those nodes in the globals graph. // for (DenseSet<const DSNode*>::iterator I = GlobalNodes.begin(), E = GlobalNodes.end(); I != E; ++I) { Nodes.insert (*I); } return; }
int main(int argc, char ** argv) { std::cerr << std::fixed << std::setprecision(3); std::ofstream devnull("/dev/null"); DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); size_t n = atoi(argv[1]); size_t elems_show = 1; using Vec = std::vector<std::string>; using Set = std::unordered_map<std::string, int>; using RefsSet = std::unordered_map<StringRef, int, StringRefHash>; using DenseSet = google::dense_hash_map<std::string, int>; using RefsDenseSet = google::dense_hash_map<StringRef, int, StringRefHash>; using RefsHashMap = HashMap<StringRef, int, StringRefHash>; Vec vec; vec.reserve(n); { Stopwatch watch; std::string s; for (size_t i = 0; i < n && !in.eof(); ++i) { DB::readEscapedString(s, in); DB::assertChar('\n', in); vec.push_back(s); } std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; } { DB::Arena pool; Stopwatch watch; const char * res = nullptr; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { const char * tmp = pool.insert(it->data(), it->size()); if (it == vec.begin()) res = tmp; } std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; devnull.write(res, 100); devnull << std::endl; } { Set set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[*it] = 0; std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull << it->first; devnull << std::endl; } } { RefsSet set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(*it)] = 0; std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DB::Arena pool; RefsSet set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DenseSet set; set.set_empty_key(DenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[*it] = 0; std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull << it->first; devnull << std::endl; } } { RefsDenseSet set; set.set_empty_key(RefsDenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(it->data(), it->size())] = 0; std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DB::Arena pool; RefsDenseSet set; set.set_empty_key(RefsDenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { RefsHashMap set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { RefsHashMap::iterator inserted_it; bool inserted; set.emplace(StringRef(*it), inserted_it, inserted); } std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } //std::cerr << set.size() << ", " << set.getCollisions() << std::endl; } { DB::Arena pool; RefsHashMap set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { RefsHashMap::iterator inserted_it; bool inserted; set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted); } std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } return 0; }