// // Method: findGlobalPoolNodes() // // Description: // This method finds DSNodes that are reachable from globals and that need a // pool. The Automatic Pool Allocation transform will use the returned // information to build global pools for the DSNodes in question. // // Note that this method does not assign DSNodes to pools; it merely decides // which DSNodes are reachable from globals and will need a pool of global // scope. // // Outputs: // Nodes - The DSNodes that are both reachable from globals and which should // have global pools will be *added* to this container. // void AllHeapNodesHeuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) { // Get the globals graph for the program. DSGraph* GG = Graphs->getGlobalsGraph(); // Get all of the nodes reachable from globals. DenseSet<const DSNode*> GlobalHeapNodes; GetNodesReachableFromGlobals (GG, GlobalHeapNodes); // // Create a global pool for each global DSNode. // for (DenseSet<const DSNode *>::iterator NI = GlobalHeapNodes.begin(); NI != GlobalHeapNodes.end();++NI) { const DSNode * N = *NI; PoolMap[N] = OnePool(N); } // // Now find all DSNodes belonging to function-local DSGraphs which are // mirrored in the globals graph. These DSNodes require a global pool, too. // for (Module::iterator F = M->begin(); F != M->end(); ++F) { if (Graphs->hasDSGraph(*F)) { DSGraph* G = Graphs->getDSGraph(*F); DSGraph::NodeMapTy NodeMap; G->computeGToGGMapping (NodeMap); // // Scan through all DSNodes in the local graph. If a local DSNode has a // corresponding DSNode in the globals graph that is reachable from a // global, then add the local DSNode to the set of DSNodes reachable from // a global. // DSGraph::node_iterator ni = G->node_begin(); for (; ni != G->node_end(); ++ni) { DSNode * N = ni; DSNode * GGN = NodeMap[N].getNode(); //assert (!GGN || GlobalHeapNodes.count (GGN)); if (GGN && GlobalHeapNodes.count (GGN)) PoolMap[GGN].NodesInPool.push_back (N); } } } // // Copy the values into the output container. Note that DenseSet has no // iterator traits (or whatever allows us to treat DenseSet has a generic // container), so we have to use a loop to copy values from the DenseSet into // the output container. // for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(), E = GlobalHeapNodes.end(); I != E; ++I) { Nodes.insert (*I); } return; }
void AliasAnalysisChecker::collectMissingAliases( const DenseSet<ValuePair> &DynamicAliases, vector<ValuePair> &MissingAliases) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); AliasAnalysis &BaselineAA = getAnalysis<BaselineAliasAnalysis>(); MissingAliases.clear(); for (DenseSet<ValuePair>::const_iterator I = DynamicAliases.begin(); I != DynamicAliases.end(); ++I) { Value *V1 = I->first, *V2 = I->second; if (IntraProc && !DynAAUtils::IsIntraProcQuery(V1, V2)) { continue; } // Ignore BitCasts and PhiNodes. The reports on them are typically // redundant. if (isa<BitCastInst>(V1) || isa<BitCastInst>(V2)) continue; if (isa<PHINode>(V1) || isa<PHINode>(V2)) continue; if (!CheckAllPointers) { if (!DynAAUtils::PointerIsDereferenced(V1) || !DynAAUtils::PointerIsDereferenced(V2)) { continue; } } if (BaselineAA.alias(V1, V2) != AliasAnalysis::NoAlias && AA.alias(V1, V2) == AliasAnalysis::NoAlias) { MissingAliases.push_back(make_pair(V1, V2)); } } }
static void MarkNodesWhichMustBePassedIn(DenseSet<const DSNode*> &MarkedNodes, Function &F, DSGraph* G, EntryPointAnalysis* EPA) { // All DSNodes reachable from arguments must be passed in... // Unless this is an entry point to the program if (!EPA->isEntryPoint(&F)) { for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { DSGraph::ScalarMapTy::iterator AI = G->getScalarMap().find(I); if (AI != G->getScalarMap().end()) if (DSNode * N = AI->second.getNode()) N->markReachableNodes(MarkedNodes); } } // Marked the returned node as needing to be passed in. if (DSNode * RetNode = G->getReturnNodeFor(F).getNode()) RetNode->markReachableNodes(MarkedNodes); // Calculate which DSNodes are reachable from globals. If a node is reachable // from a global, we will create a global pool for it, so no argument passage // is required. DenseSet<const DSNode*> NodesFromGlobals; GetNodesReachableFromGlobals(G, NodesFromGlobals); // Remove any nodes reachable from a global. These nodes will be put into // global pools, which do not require arguments to be passed in. for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(), E = NodesFromGlobals.end(); I != E; ++I) MarkedNodes.erase(*I); }
// // Method: findGlobalPoolNodes() // // Description: // This method finds DSNodes that are reachable from globals and that need a // pool. The Automatic Pool Allocation transform will use the returned // information to build global pools for the DSNodes in question. // // Note that this method does not assign DSNodes to pools; it merely decides // which DSNodes are reachable from globals and will need a pool of global // scope. // // Outputs: // Nodes - The DSNodes that are both reachable from globals and which should // have global pools will be *added* to this container. // void Heuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) { // Get the globals graph for the program. DSGraph* GG = Graphs->getGlobalsGraph(); // Get all of the nodes reachable from globals. DenseSet<const DSNode*> GlobalHeapNodes; GetNodesReachableFromGlobals (GG, GlobalHeapNodes); // // Now find all DSNodes belonging to function-local DSGraphs which are // mirrored in the globals graph. These DSNodes require a global pool, too. // for (Module::iterator F = M->begin(); F != M->end(); ++F) { if (Graphs->hasDSGraph(*F)) { DSGraph* G = Graphs->getDSGraph(*F); GetNodesReachableFromGlobals (G, GlobalHeapNodes); } } // // Copy the values into the output container. Note that DenseSet has no // iterator traits (or whatever allows us to treat DenseSet has a generic // container), so we have to use a loop to copy values from the DenseSet into // the output container. // for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(), E = GlobalHeapNodes.end(); I != E; ++I) { Nodes.insert (*I); } return; }
// Collects missing aliases to <MissingAliases>. void AliasAnalysisChecker::collectMissingAliases( const DenseSet<ValuePair> &DynamicAliases) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); AliasAnalysis &BaselineAA = getAnalysis<BaselineAliasAnalysis>(); MissingAliases.clear(); for (DenseSet<ValuePair>::const_iterator I = DynamicAliases.begin(); I != DynamicAliases.end(); ++I) { Value *V1 = I->first, *V2 = I->second; if (IntraProc && !DynAAUtils::IsIntraProcQuery(V1, V2)) { continue; } if (!CheckAllPointers) { if (!DynAAUtils::PointerIsDereferenced(V1) || !DynAAUtils::PointerIsDereferenced(V2)) { continue; } } if (BaselineAA.alias(V1, V2) != AliasAnalysis::NoAlias && AA.alias(V1, V2) == AliasAnalysis::NoAlias) { MissingAliases.push_back(make_pair(V1, V2)); } } }
// Calculate the largest possible vregsPassed sets. These are the registers that // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. DenseSet<const MachineBasicBlock*> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); BBInfo &MInfo = MBBInfoMap[&MBB]; if (!MInfo.reachable) continue; for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), SuE = MBB.succ_end(); SuI != SuE; ++SuI) { BBInfo &SInfo = MBBInfoMap[*SuI]; if (SInfo.addPassed(MInfo.regsLiveOut)) todo.insert(*SuI); } } // Iteratively push vregsPassed to successors. This will converge to the same // final state regardless of DenseSet iteration order. while (!todo.empty()) { const MachineBasicBlock *MBB = *todo.begin(); todo.erase(MBB); BBInfo &MInfo = MBBInfoMap[MBB]; for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), SuE = MBB->succ_end(); SuI != SuE; ++SuI) { if (*SuI == MBB) continue; BBInfo &SInfo = MBBInfoMap[*SuI]; if (SInfo.addPassed(MInfo.vregsPassed)) todo.insert(*SuI); } } }
// Calculate the set of virtual registers that must be passed through each basic // block in order to satisfy the requirements of successor blocks. This is very // similar to calcRegsPassed, only backwards. void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. DenseSet<const MachineBasicBlock*> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); BBInfo &MInfo = MBBInfoMap[&MBB]; for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), PrE = MBB.pred_end(); PrI != PrE; ++PrI) { BBInfo &PInfo = MBBInfoMap[*PrI]; if (PInfo.addRequired(MInfo.vregsLiveIn)) todo.insert(*PrI); } } // Iteratively push vregsRequired to predecessors. This will converge to the // same final state regardless of DenseSet iteration order. while (!todo.empty()) { const MachineBasicBlock *MBB = *todo.begin(); todo.erase(MBB); BBInfo &MInfo = MBBInfoMap[MBB]; for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), PrE = MBB->pred_end(); PrI != PrE; ++PrI) { if (*PrI == MBB) continue; BBInfo &SInfo = MBBInfoMap[*PrI]; if (SInfo.addRequired(MInfo.vregsRequired)) todo.insert(*PrI); } } }
void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { // When an edge in the graph has been threaded, values that we could not // determine a value for before (i.e. were marked overdefined) may be possible // to solve now. We do NOT try to proactively update these values. Instead, // we clear their entries from the cache, and allow lazy updating to recompute // them when needed. // The updating process is fairly simple: we need to dropped cached info // for all values that were marked overdefined in OldSucc, and for those same // values in any successor of OldSucc (except NewSucc) in which they were // also marked overdefined. std::vector<BasicBlock*> worklist; worklist.push_back(OldSucc); DenseSet<Value*> ClearSet; for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) { if (I->first == OldSucc) ClearSet.insert(I->second); } // Use a worklist to perform a depth-first search of OldSucc's successors. // NOTE: We do not need a visited list since any blocks we have already // visited will have had their overdefined markers cleared already, and we // thus won't loop to their successors. while (!worklist.empty()) { BasicBlock *ToUpdate = worklist.back(); worklist.pop_back(); // Skip blocks only accessible through NewSucc. if (ToUpdate == NewSucc) continue; bool changed = false; for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end(); I != E; ++I) { // If a value was marked overdefined in OldSucc, and is here too... DenseSet<OverDefinedPairTy>::iterator OI = OverDefinedCache.find(std::make_pair(ToUpdate, *I)); if (OI == OverDefinedCache.end()) continue; // Remove it from the caches. ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)]; ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); assert(CI != Entry.end() && "Couldn't find entry to update?"); Entry.erase(CI); OverDefinedCache.erase(OI); // If we removed anything, then we potentially need to update // blocks successors too. changed = true; } if (!changed) continue; worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); } }
// Find the number of arguments we need to add to the functions. void CSDataRando::findFunctionArgNodes(const std::vector<const Function *> &Functions) { std::vector<DSNodeHandle> RootNodes; for (const Function *F : Functions) { DSGraph *G = DSA->getDSGraph(*F); G->getFunctionArgumentsForCall(F, RootNodes); } // No additional args to pass. if (RootNodes.size() == 0) { return; } DenseSet<const DSNode*> MarkedNodes; for (DSNodeHandle &NH : RootNodes) { if (DSNode *N = NH.getNode()) { N->markReachableNodes(MarkedNodes); } } // Remove global nodes from the arg nodes. If we are using the bottom-up // analysis then if a node is a global node all contexts will use the global map. for (auto i : GlobalNodes) { MarkedNodes.erase(i); } // Remove any nodes that are marked do not encrypt. SmallVector<const DSNode*, 8> MarkedNodeWorkList; for (auto i : MarkedNodes) { if (i->isDoNotEncryptNode()) { MarkedNodeWorkList.push_back(i); } } for (auto i : MarkedNodeWorkList) { MarkedNodes.erase(i); } if (MarkedNodes.empty()) { return; } // Create a FuncInfo entry for each of the functions with the arg nodes that // need to be passed for (const Function *F : Functions) { FuncInfo &FI = FunctionInfo[F]; FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end()); } }
/// FindFunctionPoolArgs - In the first pass over the program, we decide which /// arguments will have to be added for each function, build the FunctionInfo /// map and recording this info in the ArgNodes set. static void FindFunctionPoolArgs(Function &F, FuncInfo& FI, EntryPointAnalysis* EPA) { DenseSet<const DSNode*> MarkedNodes; if (FI.G->node_begin() == FI.G->node_end()) return; // No memory activity, nothing is required // Find DataStructure nodes which are allocated in pools non-local to the // current function. This set will contain all of the DSNodes which require // pools to be passed in from outside of the function. MarkNodesWhichMustBePassedIn(MarkedNodes, F, FI.G,EPA); //FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end()); //Work around DenseSet not having iterator traits for (DenseSet<const DSNode*>::iterator ii = MarkedNodes.begin(), ee = MarkedNodes.end(); ii != ee; ++ii) FI.ArgNodes.insert(FI.ArgNodes.end(), *ii); }
// // Method: eraseCallsTo() // // Description: // This method removes the specified function from DSCallsites within the // specified function. We do not do anything with call sites that call this // function indirectly (for which there is not much point as we do not yet // know the targets of indirect function calls). // void StdLibDataStructures::eraseCallsTo(Function* F) { typedef std::pair<DSGraph*,Function*> RemovalPair; DenseSet<RemovalPair> ToRemove; for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); ii != ee; ++ii) if (CallInst* CI = dyn_cast<CallInst>(*ii)){ if (CI->getCalledValue() == F) { DSGraph* Graph = getDSGraph(*CI->getParent()->getParent()); //delete the call DEBUG(errs() << "Removing " << F->getName().str() << " from " << CI->getParent()->getParent()->getName().str() << "\n"); ToRemove.insert(std::make_pair(Graph, F)); } }else if (InvokeInst* CI = dyn_cast<InvokeInst>(*ii)){ if (CI->getCalledValue() == F) { DSGraph* Graph = getDSGraph(*CI->getParent()->getParent()); //delete the call DEBUG(errs() << "Removing " << F->getName().str() << " from " << CI->getParent()->getParent()->getName().str() << "\n"); ToRemove.insert(std::make_pair(Graph, F)); } } else if(ConstantExpr *CE = dyn_cast<ConstantExpr>(*ii)) { if(CE->isCast()) { for (Value::use_iterator ci = CE->use_begin(), ce = CE->use_end(); ci != ce; ++ci) { if (CallInst* CI = dyn_cast<CallInst>(*ci)){ if(CI->getCalledValue() == CE) { DSGraph* Graph = getDSGraph(*CI->getParent()->getParent()); //delete the call DEBUG(errs() << "Removing " << F->getName().str() << " from " << CI->getParent()->getParent()->getName().str() << "\n"); ToRemove.insert(std::make_pair(Graph, F)); } } } } } for(DenseSet<RemovalPair>::iterator I = ToRemove.begin(), E = ToRemove.end(); I != E; ++I) I->first->removeFunctionCalls(*I->second); }
void RTAssociate::SetupGlobalPools(Module* M, DSGraph* GG) { // Get the globals graph for the program. // DSGraph* GG = Graphs->getGlobalsGraph(); // Get all of the nodes reachable from globals. DenseSet<const DSNode*> GlobalHeapNodes; GetNodesReachableFromGlobals(GG, GlobalHeapNodes); errs() << "Pool allocating " << GlobalHeapNodes.size() << " global nodes!\n"; FuncInfo& FI = makeFuncInfo(0, GG); while (GlobalHeapNodes.size()) { const DSNode* D = *GlobalHeapNodes.begin(); GlobalHeapNodes.erase(D); FI.PoolDescriptors[D] = CreateGlobalPool(D, M); } }
// Reroll the provided loop with respect to the provided induction variable. // Generally, we're looking for a loop like this: // // %iv = phi [ (preheader, ...), (body, %iv.next) ] // f(%iv) // %iv.1 = add %iv, 1 <-- a root increment // f(%iv.1) // %iv.2 = add %iv, 2 <-- a root increment // f(%iv.2) // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment // f(%iv.scale_m_1) // ... // %iv.next = add %iv, scale // %cmp = icmp(%iv, ...) // br %cmp, header, exit // // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can // be intermixed with eachother. The restriction imposed by this algorithm is // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1), // etc. be the same. // // First, we collect the use set of %iv, excluding the other increment roots. // This gives us f(%iv). Then we iterate over the loop instructions (scale-1) // times, having collected the use set of f(%iv.(i+1)), during which we: // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to // the next unmatched instruction in f(%iv.(i+1)). // - Ensure that both matched instructions don't have any external users // (with the exception of last-in-chain reduction instructions). // - Track the (aliasing) write set, and other side effects, of all // instructions that belong to future iterations that come before the matched // instructions. If the matched instructions read from that write set, then // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly, // if any of these future instructions had side effects (could not be // speculatively executed), and so do the matched instructions, when we // cannot reorder those side-effect-producing instructions, and rerolling // fails. // // Finally, we make sure that all loop instructions are either loop increment // roots, belong to simple latch code, parts of validated reductions, part of // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions // have been validated), then we reroll the loop. bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions) { const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV)); uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))-> getValue()->getZExtValue(); // The collection of loop increment instructions. SmallInstructionVector LoopIncs; uint64_t Scale = Inc; // The effective induction variable, IV, is normally also the real induction // variable. When we're dealing with a loop like: // for (int i = 0; i < 500; ++i) // x[3*i] = ...; // x[3*i+1] = ...; // x[3*i+2] = ...; // then the real IV is still i, but the effective IV is (3*i). Instruction *RealIV = IV; if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs)) return false; assert(Scale <= MaxInc && "Scale is too large"); assert(Scale > 1 && "Scale must be at least 2"); // The set of increment instructions for each increment value. SmallVector<SmallInstructionVector, 32> Roots(Scale-1); SmallInstructionSet AllRoots; if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs)) return false; DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *RealIV << "\n"); // An array of just the possible reductions for this scale factor. When we // collect the set of all users of some root instructions, these reduction // instructions are treated as 'final' (their uses are not considered). // This is important because we don't want the root use set to search down // the reduction chain. SmallInstructionSet PossibleRedSet; SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet; Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet, PossibleRedLastSet); // We now need to check for equivalence of the use graph of each root with // that of the primary induction variable (excluding the roots). Our goal // here is not to solve the full graph isomorphism problem, but rather to // catch common cases without a lot of work. As a result, we will assume // that the relative order of the instructions in each unrolled iteration // is the same (although we will not make an assumption about how the // different iterations are intermixed). Note that while the order must be // the same, the instructions may not be in the same basic block. SmallInstructionSet Exclude(AllRoots); Exclude.insert(LoopIncs.begin(), LoopIncs.end()); DenseSet<Instruction *> BaseUseSet; collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet); DenseSet<Instruction *> AllRootUses; std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1); bool MatchFailed = false; for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) { DenseSet<Instruction *> &RootUseSet = RootUseSets[i]; collectInLoopUserSet(L, Roots[i], SmallInstructionSet(), PossibleRedSet, RootUseSet); DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() << " vs. iteration increment " << (i+1) << " use set size: " << RootUseSet.size() << "\n"); if (BaseUseSet.size() != RootUseSet.size()) { MatchFailed = true; break; } // In addition to regular aliasing information, we need to look for // instructions from later (future) iterations that have side effects // preventing us from reordering them past other instructions with side // effects. bool FutureSideEffects = false; AliasSetTracker AST(*AA); // The map between instructions in f(%iv.(i+1)) and f(%iv). DenseMap<Value *, Value *> BaseMap; assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops"); for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(), JE = Header->end(); J1 != JE && !MatchFailed; ++J1) { if (cast<Instruction>(J1) == RealIV) continue; if (cast<Instruction>(J1) == IV) continue; if (!BaseUseSet.count(J1)) continue; if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs. continue; while (J2 != JE && (!RootUseSet.count(J2) || std::find(Roots[i].begin(), Roots[i].end(), J2) != Roots[i].end())) { // As we iterate through the instructions, instructions that don't // belong to previous iterations (or the base case), must belong to // future iterations. We want to track the alias set of writes from // previous iterations. if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) && !AllRootUses.count(J2)) { if (J2->mayWriteToMemory()) AST.add(J2); // Note: This is specifically guarded by a check on isa<PHINode>, // which while a valid (somewhat arbitrary) micro-optimization, is // needed because otherwise isSafeToSpeculativelyExecute returns // false on PHI nodes. if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL)) FutureSideEffects = true; } ++J2; } if (!J1->isSameOperationAs(J2)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << "\n"); MatchFailed = true; break; } // Make sure that this instruction, which is in the use set of this // root instruction, does not also belong to the base set or the set of // some previous root instruction. if (BaseUseSet.count(J2) || AllRootUses.count(J2)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (prev. case overlap)\n"); MatchFailed = true; break; } // Make sure that we don't alias with any instruction in the alias set // tracker. If we do, then we depend on a future iteration, and we // can't reroll. if (J2->mayReadFromMemory()) { for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end(); K != KE && !MatchFailed; ++K) { if (K->aliasesUnknownInst(J2, *AA)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (depends on future store)\n"); MatchFailed = true; break; } } } // If we've past an instruction from a future iteration that may have // side effects, and this instruction might also, then we can't reorder // them, and this matching fails. As an exception, we allow the alias // set tracker to handle regular (simple) load/store dependencies. if (FutureSideEffects && ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) || (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (side effects prevent reordering)\n"); MatchFailed = true; break; } // For instructions that are part of a reduction, if the operation is // associative, then don't bother matching the operands (because we // already know that the instructions are isomorphic, and the order // within the iteration does not matter). For non-associative reductions, // we do need to match the operands, because we need to reject // out-of-order instructions within an iteration! // For example (assume floating-point addition), we need to reject this: // x += a[i]; x += b[i]; // x += a[i+1]; x += b[i+1]; // x += b[i+2]; x += a[i+2]; bool InReduction = Reductions.isPairInSame(J1, J2); if (!(InReduction && J1->isAssociative())) { bool Swapped = false, SomeOpMatched = false;; for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { Value *Op2 = J2->getOperand(j); // If this is part of a reduction (and the operation is not // associatve), then we match all operands, but not those that are // part of the reduction. if (InReduction) if (Instruction *Op2I = dyn_cast<Instruction>(Op2)) if (Reductions.isPairInSame(J2, Op2I)) continue; DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2); if (BMI != BaseMap.end()) Op2 = BMI->second; else if (std::find(Roots[i].begin(), Roots[i].end(), (Instruction*) Op2) != Roots[i].end()) Op2 = IV; if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) { // If we've not already decided to swap the matched operands, and // we've not already matched our first operand (note that we could // have skipped matching the first operand because it is part of a // reduction above), and the instruction is commutative, then try // the swapped match. if (!Swapped && J1->isCommutative() && !SomeOpMatched && J1->getOperand(!j) == Op2) { Swapped = true; } else { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (operand " << j << ")\n"); MatchFailed = true; break; } } SomeOpMatched = true; } } if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) || (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (uses outside loop)\n"); MatchFailed = true; break; } if (!MatchFailed) BaseMap.insert(std::pair<Value *, Value *>(J2, J1)); AllRootUses.insert(J2); Reductions.recordPair(J1, J2, i+1); ++J2; } } if (MatchFailed) return false; DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *RealIV << "\n"); DenseSet<Instruction *> LoopIncUseSet; collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(), SmallInstructionSet(), LoopIncUseSet); DEBUG(dbgs() << "LRR: Loop increment set size: " << LoopIncUseSet.size() << "\n"); // Make sure that all instructions in the loop have been included in some // use set. for (BasicBlock::iterator J = Header->begin(), JE = Header->end(); J != JE; ++J) { if (isa<DbgInfoIntrinsic>(J)) continue; if (cast<Instruction>(J) == RealIV) continue; if (cast<Instruction>(J) == IV) continue; if (BaseUseSet.count(J) || AllRootUses.count(J) || (LoopIncUseSet.count(J) && (J->isTerminator() || isSafeToSpeculativelyExecute(J, DL)))) continue; if (AllRoots.count(J)) continue; if (Reductions.isSelectedPHI(J)) continue; DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV << " unprocessed instruction found: " << *J << "\n"); MatchFailed = true; break; } if (MatchFailed) return false; DEBUG(dbgs() << "LRR: all instructions processed from " << *RealIV << "\n"); if (!Reductions.validateSelected()) return false; // At this point, we've validated the rerolling, and we're committed to // making changes! Reductions.replaceSelected(); // Remove instructions associated with non-base iterations. for (BasicBlock::reverse_iterator J = Header->rbegin(); J != Header->rend();) { if (AllRootUses.count(&*J)) { Instruction *D = &*J; DEBUG(dbgs() << "LRR: removing: " << *D << "\n"); D->eraseFromParent(); continue; } ++J; } // Insert the new induction variable. const SCEV *Start = RealIVSCEV->getStart(); if (Inc == 1) Start = SE->getMulExpr(Start, SE->getConstant(Start->getType(), Scale)); const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start, SE->getConstant(RealIVSCEV->getType(), 1), L, SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. SCEVExpander Expander(*SE, "reroll"); Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(), JE = BaseUseSet.end(); J != JE; ++J) (*J)->replaceUsesOfWith(IV, NewIV); if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) { if (LoopIncUseSet.count(BI)) { const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); if (Inc == 1) ICSCEV = SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale)); // Iteration count SCEV minus 1 const SCEV *ICMinus1SCEV = SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); Value *ICMinus1; // Iteration count minus 1 if (isa<SCEVConstant>(ICMinus1SCEV)) { ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); } else { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) Preheader = InsertPreheaderForLoop(L, this); ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), Preheader->getTerminator()); } Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond"); BI->setCondition(Cond); if (BI->getSuccessor(1) != Header) BI->swapSuccessors(); } } } SimplifyInstructionsInBlock(Header, DL, TLI); DeleteDeadPHIs(Header, TLI); ++NumRerolledLoops; return true; }
// // Function: GetNodesReachableFromGlobals() // // Description: // This function finds all DSNodes which are reachable from globals. It finds // DSNodes both within the local DSGraph as well as in the Globals graph that // are reachable from globals. It does, however, filter out those DSNodes // which are of no interest to automatic pool allocation. // // Inputs: // G - The DSGraph for which to find DSNodes which are reachable by globals. // This DSGraph can either by a DSGraph associated with a function *or* // it can be the globals graph itself. // // Outputs: // NodesFromGlobals - A reference to a container object in which to record // DSNodes reachable from globals. DSNodes are *added* to // this container; it is not cleared by this function. // DSNodes from both the local and globals graph are added. void AllHeapNodesHeuristic::GetNodesReachableFromGlobals (DSGraph* G, DenseSet<const DSNode*> &NodesFromGlobals) { // // Get the globals graph associated with this DSGraph. If the globals graph // is NULL, then the graph that was passed in *is* the globals graph. // DSGraph * GlobalsGraph = G->getGlobalsGraph(); if (!GlobalsGraph) GlobalsGraph = G; // // Find all DSNodes which are reachable in the globals graph. // for (DSGraph::node_iterator NI = GlobalsGraph->node_begin(); NI != GlobalsGraph->node_end(); ++NI) { NI->markReachableNodes(NodesFromGlobals); } // // Remove those global nodes which we know will never be pool allocated. // std::vector<const DSNode *> toRemove; for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(), E = NodesFromGlobals.end(); I != E; ) { DenseSet<const DSNode*>::iterator Last = I; ++I; const DSNode *tmp = *Last; if (!(tmp->isHeapNode())) toRemove.push_back (tmp); // Do not poolallocate nodes that are cast to Int. // As we do not track through ints, these could be escaping if (tmp->isPtrToIntNode()) toRemove.push_back(tmp); } // // Remove all globally reachable DSNodes which do not require pools. // for (unsigned index = 0; index < toRemove.size(); ++index) { NodesFromGlobals.erase(toRemove[index]); } // // Now the fun part. Find DSNodes in the local graph that correspond to // those nodes reachable in the globals graph. Add them to the set of // reachable nodes, too. // if (G->getGlobalsGraph()) { // // Compute a mapping between local DSNodes and DSNodes in the globals // graph. // DSGraph::NodeMapTy NodeMap; G->computeGToGGMapping (NodeMap); // // Scan through all DSNodes in the local graph. If a local DSNode has a // corresponding DSNode in the globals graph that is reachable from a // global, then add the local DSNode to the set of DSNodes reachable from a // global. // // FIXME: A node's existance within the global DSGraph is probably // sufficient evidence that it is reachable from a global. // DSGraph::node_iterator ni = G->node_begin(); for (; ni != G->node_end(); ++ni) { DSNode * N = ni; if (NodesFromGlobals.count (NodeMap[N].getNode())) NodesFromGlobals.insert (N); } } }
Result::Sat AttemptSolutionSDP::attempt(const ApproximateSimplex::Solution& sol){ const DenseSet& newBasis = sol.newBasis; const DenseMap<DeltaRational>& newValues = sol.newValues; DenseSet needsToBeAdded; for(DenseSet::const_iterator i = newBasis.begin(), i_end = newBasis.end(); i != i_end; ++i){ ArithVar b = *i; if(!d_tableau.isBasic(b)){ needsToBeAdded.add(b); } } DenseMap<DeltaRational>::const_iterator nvi = newValues.begin(), nvi_end = newValues.end(); for(; nvi != nvi_end; ++nvi){ ArithVar currentlyNb = *nvi; if(!d_tableau.isBasic(currentlyNb)){ if(!matchesNewValue(newValues, currentlyNb)){ const DeltaRational& newValue = newValues[currentlyNb]; Trace("arith::updateMany") << "updateMany:" << currentlyNb << " " << d_variables.getAssignment(currentlyNb) << " to "<< newValue << endl; d_linEq.update(currentlyNb, newValue); Assert(d_variables.assignmentIsConsistent(currentlyNb)); } } } d_errorSet.reduceToSignals(); d_errorSet.setSelectionRule(VAR_ORDER); static int instance = 0; ++instance; if(processSignals()){ Debug("arith::findModel") << "attemptSolution("<< instance <<") early conflict" << endl; d_conflictVariables.purge(); return Result::UNSAT; }else if(d_errorSet.errorEmpty()){ Debug("arith::findModel") << "attemptSolution("<< instance <<") fixed itself" << endl; return Result::SAT; } while(!needsToBeAdded.empty() && !d_errorSet.errorEmpty()){ ArithVar toRemove = ARITHVAR_SENTINEL; ArithVar toAdd = ARITHVAR_SENTINEL; DenseSet::const_iterator i = needsToBeAdded.begin(), i_end = needsToBeAdded.end(); for(; toAdd == ARITHVAR_SENTINEL && i != i_end; ++i){ ArithVar v = *i; Tableau::ColIterator colIter = d_tableau.colIterator(v); for(; !colIter.atEnd(); ++colIter){ const Tableau::Entry& entry = *colIter; Assert(entry.getColVar() == v); ArithVar b = d_tableau.rowIndexToBasic(entry.getRowIndex()); if(!newBasis.isMember(b)){ toAdd = v; bool favorBOverToRemove = (toRemove == ARITHVAR_SENTINEL) || (matchesNewValue(newValues, toRemove) && !matchesNewValue(newValues, b)) || (d_tableau.basicRowLength(toRemove) > d_tableau.basicRowLength(b)); if(favorBOverToRemove){ toRemove = b; } } } } Assert(toRemove != ARITHVAR_SENTINEL); Assert(toAdd != ARITHVAR_SENTINEL); Trace("arith::forceNewBasis") << toRemove << " " << toAdd << endl; //Message() << toRemove << " " << toAdd << endl; d_linEq.pivotAndUpdate(toRemove, toAdd, newValues[toRemove]); Trace("arith::forceNewBasis") << needsToBeAdded.size() << "to go" << endl; //Message() << needsToBeAdded.size() << "to go" << endl; needsToBeAdded.remove(toAdd); bool conflict = processSignals(); if(conflict){ d_errorSet.reduceToSignals(); d_conflictVariables.purge(); return Result::UNSAT; } } Assert( d_conflictVariables.empty() ); if(d_errorSet.errorEmpty()){ return Result::SAT; }else{ d_errorSet.reduceToSignals(); return Result::SAT_UNKNOWN; } }
// // Method: findGlobalPoolNodes() // // Description: // This method finds DSNodes that are reachable from globals and that need a // pool. The Automatic Pool Allocation transform will use the returned // information to build global pools for the DSNodes in question. // // For efficiency, this method also determines which DSNodes should be in the // same pool. // // Outputs: // Nodes - The DSNodes that are both reachable from globals and which should // have global pools will be *added* to this container. // void AllNodesHeuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) { // Get the globals graph for the program. DSGraph* GG = Graphs->getGlobalsGraph(); // // Get all of the nodes reachable from globals. // DenseSet<const DSNode*> GlobalNodes; GetNodesReachableFromGlobals (GG, GlobalNodes); // // Create a global pool for each global DSNode. // for (DenseSet<const DSNode *>::iterator NI = GlobalNodes.begin(); NI != GlobalNodes.end(); ++NI) { const DSNode * N = *NI; PoolMap[N] = OnePool(N); } // // Now find all DSNodes belonging to function-local DSGraphs which are // mirrored in the globals graph. These DSNodes require a global pool, too, // but must use the same pool as the one assigned to the corresponding global // DSNode. // for (Module::iterator F = M->begin(); F != M->end(); ++F) { // // Ignore functions that have no DSGraph. // if (!(Graphs->hasDSGraph(*F))) continue; // // Compute a mapping between local DSNodes and DSNodes in the globals // graph. // DSGraph* G = Graphs->getDSGraph(*F); DSGraph::NodeMapTy NodeMap; G->computeGToGGMapping (NodeMap); // // Scan through all DSNodes in the local graph. If a local DSNode has a // corresponding DSNode in the globals graph that is reachable from a // global, then add the local DSNode to the set of DSNodes reachable from // a global. // DSGraph::node_iterator ni = G->node_begin(); for (; ni != G->node_end(); ++ni) { DSNode * N = ni; DSNode * GGN = NodeMap[N].getNode(); assert (!GGN || GlobalNodes.count (GGN)); if (GGN && GlobalNodes.count (GGN)) PoolMap[GGN].NodesInPool.push_back (N); } } // // Scan through all the local graphs looking for DSNodes which may be // reachable by a global. These nodes may not end up in the globals graph // because of the fact that DSA doesn't actually know what is happening to // them. // // FIXME: I believe this code causes a condition in which a local DSNode is // given a local pool in one function but not in other functions. // Someone needs to investigate whether DSA is being consistent here, // and if not, if that inconsistency is correct. // #if 0 for (Module::iterator F = M->begin(); F != M->end(); ++F) { if (F->isDeclaration()) continue; DSGraph* G = Graphs->getDSGraph(*F); for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end(); I != E; ++I) { DSNode * Node = I; if (Node->isExternalNode() || Node->isUnknownNode()) { GlobalNodes.insert (Node); } } } #endif // // Copy the values into the output container. Note that DenseSet has no // iterator traits (or whatever allows us to treat DenseSet has a generic // container), so we have to use a loop to copy values from the DenseSet into // the output container. // // Note that we do not copy local DSNodes into the output container; we // merely copy those nodes in the globals graph. // for (DenseSet<const DSNode*>::iterator I = GlobalNodes.begin(), E = GlobalNodes.end(); I != E; ++I) { Nodes.insert (*I); } return; }
int main(int argc, char ** argv) { std::cerr << std::fixed << std::setprecision(3); std::ofstream devnull("/dev/null"); DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); size_t n = atoi(argv[1]); size_t elems_show = 1; using Vec = std::vector<std::string>; using Set = std::unordered_map<std::string, int>; using RefsSet = std::unordered_map<StringRef, int, StringRefHash>; using DenseSet = google::dense_hash_map<std::string, int>; using RefsDenseSet = google::dense_hash_map<StringRef, int, StringRefHash>; using RefsHashMap = HashMap<StringRef, int, StringRefHash>; Vec vec; vec.reserve(n); { Stopwatch watch; std::string s; for (size_t i = 0; i < n && !in.eof(); ++i) { DB::readEscapedString(s, in); DB::assertChar('\n', in); vec.push_back(s); } std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; } { DB::Arena pool; Stopwatch watch; const char * res = nullptr; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { const char * tmp = pool.insert(it->data(), it->size()); if (it == vec.begin()) res = tmp; } std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; devnull.write(res, 100); devnull << std::endl; } { Set set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[*it] = 0; std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull << it->first; devnull << std::endl; } } { RefsSet set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(*it)] = 0; std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DB::Arena pool; RefsSet set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DenseSet set; set.set_empty_key(DenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[*it] = 0; std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull << it->first; devnull << std::endl; } } { RefsDenseSet set; set.set_empty_key(RefsDenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(it->data(), it->size())] = 0; std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DB::Arena pool; RefsDenseSet set; set.set_empty_key(RefsDenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { RefsHashMap set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { RefsHashMap::iterator inserted_it; bool inserted; set.emplace(StringRef(*it), inserted_it, inserted); } std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } //std::cerr << set.size() << ", " << set.getCollisions() << std::endl; } { DB::Arena pool; RefsHashMap set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { RefsHashMap::iterator inserted_it; bool inserted; set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted); } std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } return 0; }