/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be /// turned into an explicit branch. static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { // FIXME: This should use the same heuristics as IfConversion to determine // whether a select is better represented as a branch. This requires that // branch probability metadata is preserved for the select, which is not the // case currently. CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); // If the branch is predicted right, an out of order CPU can avoid blocking on // the compare. Emit cmovs on compares with a memory operand as branches to // avoid stalls on the load from memory. If the compare has more than one use // there's probably another cmov or setcc around so it's not worth emitting a // branch. if (!Cmp) return false; Value *CmpOp0 = Cmp->getOperand(0); Value *CmpOp1 = Cmp->getOperand(1); // We check that the memory operand has one use to avoid uses of the loaded // value directly after the compare, making branches unprofitable. return Cmp->hasOneUse() && ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); }
// Predict that a comparison in which a register is an operand, the register is // used before being defined in a successor block, and the successor block // does not post-dominate will reach the successor block. int BranchProbabilities::CheckGuardHeuristic() { BranchInst *BI = dyn_cast<BranchInst>(_TI); bool bUses[2] = {false, false}; // If we don't have a conditional branch, abandon if ((!BI) || (BI->isUnconditional())) return -1; // If the condition is not immediately dependent on a comparison, abandon CmpInst *cmp = dyn_cast<CmpInst>(BI->getCondition()); if (!cmp) return -1; for (int i = 0; i < 2; i++) { if (_bPostDoms[i]) continue; // Get the values being compared Value *v = cmp->getOperand(i); // For all uses of the first value check if the use post-dominates for (Value::use_iterator UI = v->use_begin(), UE = v->use_end(); UI != UE; ++UI) { // if the use is not an instruction, skip it Instruction *I = dyn_cast<Instruction>(*UI); if (!I) continue; BasicBlock *UsingBlock = I->getParent(); // Check if the use is in either successor for (int i = 0; i < 2; i++) if (UsingBlock == _Succ[i]) bUses[i] = true; } } if (bUses[0] == bUses[1]) return -1; if (bUses[0]) return 0; else return 1; }
// Converts LLVM encoding of comparison predicates to the // corresponding bitcode versions. static unsigned GetEncodedCmpPredicate(const CmpInst &Cmp) { switch (Cmp.getPredicate()) { default: report_fatal_error( "Comparison predicate not supported by PNaCl bitcode"); case CmpInst::FCMP_FALSE: return naclbitc::FCMP_FALSE; case CmpInst::FCMP_OEQ: return naclbitc::FCMP_OEQ; case CmpInst::FCMP_OGT: return naclbitc::FCMP_OGT; case CmpInst::FCMP_OGE: return naclbitc::FCMP_OGE; case CmpInst::FCMP_OLT: return naclbitc::FCMP_OLT; case CmpInst::FCMP_OLE: return naclbitc::FCMP_OLE; case CmpInst::FCMP_ONE: return naclbitc::FCMP_ONE; case CmpInst::FCMP_ORD: return naclbitc::FCMP_ORD; case CmpInst::FCMP_UNO: return naclbitc::FCMP_UNO; case CmpInst::FCMP_UEQ: return naclbitc::FCMP_UEQ; case CmpInst::FCMP_UGT: return naclbitc::FCMP_UGT; case CmpInst::FCMP_UGE: return naclbitc::FCMP_UGE; case CmpInst::FCMP_ULT: return naclbitc::FCMP_ULT; case CmpInst::FCMP_ULE: return naclbitc::FCMP_ULE; case CmpInst::FCMP_UNE: return naclbitc::FCMP_UNE; case CmpInst::FCMP_TRUE: return naclbitc::FCMP_TRUE; case CmpInst::ICMP_EQ: return naclbitc::ICMP_EQ; case CmpInst::ICMP_NE: return naclbitc::ICMP_NE; case CmpInst::ICMP_UGT: return naclbitc::ICMP_UGT; case CmpInst::ICMP_UGE: return naclbitc::ICMP_UGE; case CmpInst::ICMP_ULT: return naclbitc::ICMP_ULT; case CmpInst::ICMP_ULE: return naclbitc::ICMP_ULE; case CmpInst::ICMP_SGT: return naclbitc::ICMP_SGT; case CmpInst::ICMP_SGE: return naclbitc::ICMP_SGE; case CmpInst::ICMP_SLT: return naclbitc::ICMP_SLT; case CmpInst::ICMP_SLE: return naclbitc::ICMP_SLE; } }
/// Try to simplify cmp instruction. bool UnrolledInstAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. if (!isa<Constant>(LHS)) if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; if (!isa<Constant>(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) { auto SimplifiedLHS = SimplifiedAddresses.find(LHS); if (SimplifiedLHS != SimplifiedAddresses.end()) { auto SimplifiedRHS = SimplifiedAddresses.find(RHS); if (SimplifiedRHS != SimplifiedAddresses.end()) { SimplifiedAddress &LHSAddr = SimplifiedLHS->second; SimplifiedAddress &RHSAddr = SimplifiedRHS->second; if (LHSAddr.Base == RHSAddr.Base) { LHS = LHSAddr.Offset; RHS = RHSAddr.Offset; } } } } if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) { if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; return true; } } } return Base::visitCmpInst(I); }
/// Returns true if the select instruction has users in the compare-and-add /// reduction pattern below. The select instruction argument is the last one /// in the sequence. /// /// %sum.1 = phi ... /// ... /// %cmp = fcmp pred %0, %CFP /// %add = fadd %0, %sum.1 /// %sum.2 = select %cmp, %add, %sum.1 RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isConditionalRdxPattern( RecurrenceKind Kind, Instruction *I) { SelectInst *SI = dyn_cast<SelectInst>(I); if (!SI) return InstDesc(false, I); CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition()); // Only handle single use cases for now. if (!CI || !CI->hasOneUse()) return InstDesc(false, I); Value *TrueVal = SI->getTrueValue(); Value *FalseVal = SI->getFalseValue(); // Handle only when either of operands of select instruction is a PHI // node for now. if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) || (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal))) return InstDesc(false, I); Instruction *I1 = isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal) : dyn_cast<Instruction>(TrueVal); if (!I1 || !I1->isBinaryOp()) return InstDesc(false, I); Value *Op1, *Op2; if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) || m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) && I1->isFast()) return InstDesc(Kind == RK_FloatAdd, SI); if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) return InstDesc(Kind == RK_FloatMult, SI); return InstDesc(false, I); }
/// check if value in memory at `memAddr` was changed when accessing `val`, store result into flag void RedoBBBuilder::insertCheck(Value *val, Value *memAddr, Value* flag) { for (auto it = val->use_begin(), ite = val->use_end(); it != ite; it++) { if (StoreInst *SI = dyn_cast<StoreInst>(*it)) { // skip those not in current top loop if (!isCurrentTopLoop(*SI)) { continue; } // skip instruction we don't interested in if (!shouldCheck(*SI)) { continue; } // if we have checked this store for this memAddr CmpInst *chkRes = 0; if (StoreToCheckMap.count(SI)) { for (auto pair : StoreToCheckMap[SI]) { if (pair.first == memAddr) { chkRes = pair.second; DEBUG(dbgs() << " Found existing check '" << chkRes->getName() << "' for (" << *SI << " ) in " << SI->getParent()->getName() << "\n"); } } } if (!chkRes) { // check if before and after the store, the memore address changed // // %orig = load %memAddr // the STORE we checking // %modified = load %memAddr // %cmp = icmp ne, %orig, %modified LoadInst *orig = new LoadInst(memAddr, "", SI); Instruction *next = SI->getNextNode(); LoadInst *modified = new LoadInst(memAddr, "", next); chkRes = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, orig, modified, "chk", next); CheckingInstrs.insert(orig); CheckingInstrs.insert(modified); CheckingInstrs.insert(chkRes); // set consitant name orig->setName(chkRes->getName() + ".orig"); modified->setName(chkRes->getName() + ".mod"); StoreToCheckMap[SI].push_back({memAddr, chkRes}); DEBUG(dbgs() << " Inserted check '" << chkRes->getName() << "' for (" << *SI << " ) in " << SI->getParent()->getName() << "\n"); } // if we have stored the check result to the flag Check pair = {memAddr, chkRes}; for (auto f : CheckToFlagMap[pair]) { if (f == flag) { DEBUG(dbgs() << " Existing flag store found\n"); return; } } DEBUG(dbgs() << " Check result stored to " << flag->getName() << "\n"); // merge old value and new value with or // // %oldflgval = load %flag // %newflgval = or %oldflgval, %chkRes // store %newflgval, %flag // the next instr after STORE we checking Instruction *next = chkRes->getNextNode(); LoadInst *oldflgval = new LoadInst(flag, flag->getName() + ".oldval", next); auto *newflgval = BinaryOperator::Create(Instruction::Or, oldflgval, chkRes, flag->getName() + ".newval", next); StoreInst *st = new StoreInst(newflgval, flag, next); CheckingInstrs.insert(oldflgval); CheckingInstrs.insert(newflgval); CheckingInstrs.insert(st); CheckToFlagMap[pair].push_back(flag); } } }
void SystemZTDCPass::convertFCmp(CmpInst &I) { Value *Op0 = I.getOperand(0); auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); auto Pred = I.getPredicate(); // Only comparisons with consts are interesting. if (!Const) return; // Compute the smallest normal number (and its negation). auto &Sem = Op0->getType()->getFltSemantics(); APFloat Smallest = APFloat::getSmallestNormalized(Sem); APFloat NegSmallest = Smallest; NegSmallest.changeSign(); // Check if Const is one of our recognized consts. int WhichConst; if (Const->isZero()) { // All comparisons with 0 can be converted. WhichConst = 0; } else if (Const->isInfinity()) { // Likewise for infinities. WhichConst = Const->isNegative() ? 2 : 1; } else if (Const->isExactlyValue(Smallest)) { // For Smallest, we cannot do EQ separately from GT. if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && (Pred & CmpInst::FCMP_OGE) != 0) return; WhichConst = 3; } else if (Const->isExactlyValue(NegSmallest)) { // Likewise for NegSmallest, we cannot do EQ separately from LT. if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && (Pred & CmpInst::FCMP_OLE) != 0) return; WhichConst = 4; } else { // Not one of our special constants. return; } // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. static const int Masks[][4] = { { // 0 SystemZ::TDCMASK_ZERO, // eq SystemZ::TDCMASK_POSITIVE, // gt SystemZ::TDCMASK_NEGATIVE, // lt SystemZ::TDCMASK_NAN, // un }, { // inf SystemZ::TDCMASK_INFINITY_PLUS, // eq 0, // gt (SystemZ::TDCMASK_ZERO | SystemZ::TDCMASK_NEGATIVE | SystemZ::TDCMASK_NORMAL_PLUS | SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt SystemZ::TDCMASK_NAN, // un }, { // -inf SystemZ::TDCMASK_INFINITY_MINUS, // eq (SystemZ::TDCMASK_ZERO | SystemZ::TDCMASK_POSITIVE | SystemZ::TDCMASK_NORMAL_MINUS | SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt 0, // lt SystemZ::TDCMASK_NAN, // un }, { // minnorm 0, // eq (unsupported) (SystemZ::TDCMASK_NORMAL_PLUS | SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) (SystemZ::TDCMASK_ZERO | SystemZ::TDCMASK_NEGATIVE | SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt SystemZ::TDCMASK_NAN, // un }, { // -minnorm 0, // eq (unsupported) (SystemZ::TDCMASK_ZERO | SystemZ::TDCMASK_POSITIVE | SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt (SystemZ::TDCMASK_NORMAL_MINUS | SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) SystemZ::TDCMASK_NAN, // un } }; // Construct the mask as a combination of the partial masks. int Mask = 0; if (Pred & CmpInst::FCMP_OEQ) Mask |= Masks[WhichConst][0]; if (Pred & CmpInst::FCMP_OGT) Mask |= Masks[WhichConst][1]; if (Pred & CmpInst::FCMP_OLT) Mask |= Masks[WhichConst][2]; if (Pred & CmpInst::FCMP_UNO) Mask |= Masks[WhichConst][3]; // A lone fcmp is unworthy of tdc conversion on its own, but may become // worthy if combined with fabs. bool Worthy = false; if (CallInst *CI = dyn_cast<CallInst>(Op0)) { Function *F = CI->getCalledFunction(); if (F && F->getIntrinsicID() == Intrinsic::fabs) { // Fold with fabs - adjust the mask appropriately. Mask &= SystemZ::TDCMASK_PLUS; Mask |= Mask >> 1; Op0 = CI->getArgOperand(0); // A combination of fcmp with fabs is a win, unless the constant // involved is 0 (which is handled by later passes). Worthy = WhichConst != 0; PossibleJunk.insert(CI); }
/// \brief Simplify one loop and queue further loops for simplification. /// /// FIXME: Currently this accepts both lots of analyses that it uses and a raw /// Pass pointer. The Pass pointer is used by numerous utilities to update /// specific analyses. Rather than a pass it would be much cleaner and more /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, Pass *PP, AssumptionCache *AC) { bool Changed = false; ReprocessLoop: // Check to see that no blocks (other than the header) in this loop have // predecessors that are not in the loop. This is not valid for natural // loops, but can occur if the blocks are unreachable. Since they are // unreachable we can just shamelessly delete those CFG edges! for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); BB != E; ++BB) { if (*BB == L->getHeader()) continue; SmallPtrSet<BasicBlock*, 4> BadPreds; for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) { BasicBlock *P = *PI; if (!L->contains(P)) BadPreds.insert(P); } // Delete each unique out-of-loop (and thus dead) predecessor. for (BasicBlock *P : BadPreds) { DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " << P->getName() << "\n"); // Inform each successor of each dead pred. for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI) (*SI)->removePredecessor(P); // Zap the dead pred's terminator and replace it with unreachable. TerminatorInst *TI = P->getTerminator(); TI->replaceAllUsesWith(UndefValue::get(TI->getType())); P->getTerminator()->eraseFromParent(); new UnreachableInst(P->getContext(), P); Changed = true; } } // If there are exiting blocks with branches on undef, resolve the undef in // the direction which will exit the loop. This will help simplify loop // trip count computations. SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), E = ExitingBlocks.end(); I != E; ++I) if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) if (BI->isConditional()) { if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " << (*I)->getName() << "\n"); BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); // This may make the loop analyzable, force SCEV recomputation. if (SE) SE->forgetLoop(L); Changed = true; } } // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = InsertPreheaderForLoop(L, PP); if (Preheader) { ++NumInserted; Changed = true; } } // Next, check to make sure that all exit nodes of the loop only have // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), E = ExitBlockSet.end(); I != E; ++I) { BasicBlock *ExitBlock = *I; for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); PI != PE; ++PI) // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PP)) { ++NumInserted; Changed = true; } break; } } // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. BasicBlock *LoopLatch = L->getLoopLatch(); if (!LoopLatch) { // If this is really a nested loop, rip it out into a child loop. Don't do // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, PP, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. Worklist.push_back(OuterL); // This is a big restructuring change, reprocess the whole loop. Changed = true; // GCC doesn't tail recursion eliminate this. // FIXME: It isn't clear we can't rely on LLVM to TRE this. goto ReprocessLoop; } } // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); if (LoopLatch) { ++NumInserted; Changed = true; } } const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Scan over the PHI nodes in the loop header. Since they now have only two // incoming values (the loop is canonicalized), we may have simplified the PHI // down to 'X = phi [X, Y]', which should be replaced with 'Y'. PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); } // If this loop has multiple exits and the exits all go to the same // block, attempt to merge the exits. This helps several passes, such // as LoopRotation, which do not support loops with multiple exits. // SimplifyCFG also does this (and this code uses the same utility // function), however this code is loop-aware, where SimplifyCFG is // not. That gives it the advantage of being able to hoist // loop-invariant instructions out of the way to open up more // opportunities, and the disadvantage of having the responsibility // to preserve dominator information. bool UniqueExit = true; if (!ExitBlocks.empty()) for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) if (ExitBlocks[i] != ExitBlocks[0]) { UniqueExit = false; break; } if (UniqueExit) { for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; if (!ExitingBlock->getSinglePredecessor()) continue; BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!BI || !BI->isConditional()) continue; CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); if (!CI || CI->getParent() != ExitingBlock) continue; // Attempt to hoist out all instructions except for the // comparison and the branch. bool AllInvariant = true; bool AnyInvariant = false; for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { Instruction *Inst = I++; // Skip debug info intrinsics. if (isa<DbgInfoIntrinsic>(Inst)) continue; if (Inst == CI) continue; if (!L->makeLoopInvariant(Inst, AnyInvariant, Preheader ? Preheader->getTerminator() : nullptr)) { AllInvariant = false; break; } } if (AnyInvariant) { Changed = true; // The loop disposition of all SCEV expressions that depend on any // hoisted values have also changed. if (SE) SE->forgetLoopDispositions(L); } if (!AllInvariant) continue; // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. if (!FoldBranchToCommonDest(BI)) continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " << ExitingBlock->getName() << "\n"); // Notify ScalarEvolution before deleting this block. Currently assume the // parent loop doesn't change (spliting edges doesn't count). If blocks, // CFG edges, or other values in the parent loop change, then we need call // to forgetLoop() for the parent instead. if (SE) SE->forgetLoop(L); assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); DomTreeNode *Node = DT->getNode(ExitingBlock); const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = Node->getChildren(); while (!Children.empty()) { DomTreeNode *Child = Children.front(); DT->changeImmediateDominator(Child, Node->getIDom()); } DT->eraseNode(ExitingBlock); BI->getSuccessor(0)->removePredecessor(ExitingBlock); BI->getSuccessor(1)->removePredecessor(ExitingBlock); ExitingBlock->eraseFromParent(); } } return Changed; }
bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. if (!isa<Constant>(LHS)) if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; if (!isa<Constant>(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; return true; } } if (I.getOpcode() == Instruction::FCmp) return false; // Otherwise look for a comparison between constant offset pointers with // a common base. Value *LHSBase, *RHSBase; APInt LHSOffset, RHSOffset; std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); if (LHSBase) { std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); if (RHSBase && LHSBase == RHSBase) { // We have common bases, fold the icmp to a constant based on the // offsets. Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; ++NumConstantPtrCmps; return true; } } } // If the comparison is an equality comparison with null, we can simplify it // for any alloca-derived argument. if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) if (isAllocaDerivedArg(I.getOperand(0))) { // We can actually predict the result of comparisons between an // alloca-derived value and null. Note that this fires regardless of // SROA firing. bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) : ConstantInt::getFalse(I.getType()); return true; } // Finally check for SROA candidates in comparisons. Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { if (isa<ConstantPointerNull>(I.getOperand(1))) { accumulateSROACost(CostIt, InlineConstants::InstrCost); return true; } disableSROA(CostIt); } return false; }
/// If \param [in] BB has more than one predecessor that is a conditional /// branch, attempt to use parallel and/or for the branch condition. \returns /// true on success. /// /// Before: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 /// br i1 %cmp1, label %if.then, label %lor.rhs /// /// lor.rhs: /// ...... /// %cmp11 = fcmp une float %tmp3, %tmp4 /// br i1 %cmp11, label %if.then, label %ifend /// /// if.end: // the merge block /// ...... /// /// if.then: // has two predecessors, both of them contains conditional branch. /// ...... /// br label %if.end; /// /// After: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 /// ...... /// %cmp11 = fcmp une float %tmp3, %tmp4 /// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. /// br i1 %cmp12, label %if.then, label %ifend /// /// if.end: /// ...... /// /// if.then: /// ...... /// br label %if.end; /// /// Current implementation handles two cases. /// Case 1: \param BB is on the else-path. /// /// BB1 /// / | /// BB2 | /// / \ | /// BB3 \ | where, BB1, BB2 contain conditional branches. /// \ | / BB3 contains unconditional branch. /// \ | / BB4 corresponds to \param BB which is also the merge. /// BB => BB4 /// /// /// Corresponding source code: /// /// if (a == b && c == d) /// statement; // BB3 /// /// Case 2: \param BB BB is on the then-path. /// /// BB1 /// / | /// | BB2 /// \ / | where BB1, BB2 contain conditional branches. /// BB => BB3 | BB3 contains unconditiona branch and corresponds /// \ / to \param BB. BB4 is the merge. /// BB4 /// /// Corresponding source code: /// /// if (a == b || c == d) /// statement; // BB3 /// /// In both cases, \param BB is the common successor of conditional branches. /// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as /// its predecessor. In Case 2, \param BB (BB3) only has conditional branches /// as its predecessors. /// bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P) { PHINode *PHI = dyn_cast<PHINode>(BB->begin()); if (PHI) return false; // For simplicity, avoid cases containing PHI nodes. BasicBlock *LastCondBlock = NULL; BasicBlock *FirstCondBlock = NULL; BasicBlock *UnCondBlock = NULL; int Idx = -1; // Check predecessors of \param BB. SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { BasicBlock *Pred = *PI; BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); // All predecessors should terminate with a branch. if (!PBI) return false; BasicBlock *PP = Pred->getSinglePredecessor(); if (PBI->isUnconditional()) { // Case 1: Pred (BB3) is an unconditional block, it should // have a single predecessor (BB2) that is also a predecessor // of \param BB (BB4) and should not have address-taken. // There should exist only one such unconditional // branch among the predecessors. if (UnCondBlock || !PP || (Preds.count(PP) == 0) || Pred->hasAddressTaken()) return false; UnCondBlock = Pred; continue; } // Only conditional branches are allowed beyond this point. assert(PBI->isConditional()); // Condition's unique use should be the branch instruction. Value *PC = PBI->getCondition(); if (!PC || !PC->hasOneUse()) return false; if (PP && Preds.count(PP)) { // These are internal condition blocks to be merged from, e.g., // BB2 in both cases. // Should not be address-taken. if (Pred->hasAddressTaken()) return false; // Instructions in the internal condition blocks should be safe // to hoist up. for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { Instruction *CI = BI++; if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) return false; } } else { // This is the condition block to be merged into, e.g. BB1 in // both cases. if (FirstCondBlock) return false; FirstCondBlock = Pred; } // Find whether BB is uniformly on the true (or false) path // for all of its predecessors. BasicBlock *PS1 = PBI->getSuccessor(0); BasicBlock *PS2 = PBI->getSuccessor(1); BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; int CIdx = (PS1 == BB) ? 0 : 1; if (Idx == -1) Idx = CIdx; else if (CIdx != Idx) return false; // PS is the successor which is not BB. Check successors to identify // the last conditional branch. if (Preds.count(PS) == 0) { // Case 2. LastCondBlock = Pred; } else { // Case 1 BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); if (BPS && BPS->isUnconditional()) { // Case 1: PS(BB3) should be an unconditional branch. LastCondBlock = Pred; } } } if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) return false; TerminatorInst *TBB = LastCondBlock->getTerminator(); BasicBlock *PS1 = TBB->getSuccessor(0); BasicBlock *PS2 = TBB->getSuccessor(1); BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); // If PS1 does not jump into PS2, but PS2 jumps into PS1, // attempt branch inversion. if (!PBI1 || !PBI1->isUnconditional() || (PS1->getTerminator()->getSuccessor(0) != PS2)) { // Check whether PS2 jumps into PS1. if (!PBI2 || !PBI2->isUnconditional() || (PS2->getTerminator()->getSuccessor(0) != PS1)) return false; // Do branch inversion. BasicBlock *CurrBlock = LastCondBlock; bool EverChanged = false; while (1) { BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); CmpInst::Predicate Predicate = CI->getPredicate(); // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); BI->swapSuccessors(); EverChanged = true; } if (CurrBlock == FirstCondBlock) break; CurrBlock = CurrBlock->getSinglePredecessor(); } return EverChanged; } // PS1 must have a conditional branch. if (!PBI1 || !PBI1->isUnconditional()) return false; // PS2 should not contain PHI node. PHI = dyn_cast<PHINode>(PS2->begin()); if (PHI) return false; // Do the transformation. BasicBlock *CB; BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); bool Iteration = true; BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); Value *PC = PBI->getCondition(); do { CB = PBI->getSuccessor(1 - Idx); // Delete the conditional branch. FirstCondBlock->getInstList().pop_back(); FirstCondBlock->getInstList() .splice(FirstCondBlock->end(), CB->getInstList()); PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); Value *CC = PBI->getCondition(); // Merge conditions. Builder.SetInsertPoint(PBI); Value *NC; if (Idx == 0) // Case 2, use parallel or. NC = Builder.CreateOr(PC, CC); else // Case 1, use parallel and. NC = Builder.CreateAnd(PC, CC); PBI->replaceUsesOfWith(CC, NC); PC = NC; if (CB == LastCondBlock) Iteration = false; // Remove internal conditional branches. CB->dropAllReferences(); // make CB unreachable and let downstream to delete the block. new UnreachableInst(CB->getContext(), CB); } while (Iteration); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); return true; }
void CGGraph::constructGraph() { std::vector<CGConstraint*>::iterator it; std::string nodeName; CGConstraint* curConstraint; CGNode* firstNode; CGNode* secondNode; std::vector<int>::iterator lengthIt; int curValue; ConstantInt* cInt; for (it = constraints.begin(); it != constraints.end(); ++it) { curConstraint = *it; Instruction* PP = curConstraint->programPoint; switch(curConstraint->type) { case CGConstraint::C1: { firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(0), PP)); secondNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(1), PP)); firstNode->connectTo(secondNode, 0); break; } case CGConstraint::C2: { if (isa<StoreInst>(PP)) { /* operand(0) = constant int or variable of int type operand(1) = var being stored into */ firstNode = getNode(getNameFromValue(PP->getOperand(0), PP)); secondNode = getNode(getNameFromValue(PP->getOperand(1), PP)); firstNode->connectTo(secondNode, 0); } else if (isa<LoadInst>(PP)) { /* operand(0) = pointer being loaded from (Value*)PP = var being loaded into */ firstNode = getNode(getNameFromValue(PP->getOperand(0), PP)); secondNode = getNode(getNameFromValue(PP, PP)); firstNode->connectTo(secondNode, 0); } else if (isa<CastInst>(PP)) { /* operand(0) = var being casted (Value*)PP = var getting the result of the cast */ firstNode = getNode(getNameFromValue(PP->getOperand(0), PP)); secondNode = getNode(getNameFromValue(PP, PP)); firstNode->connectTo(secondNode, 0); } break; } case CGConstraint::C3: { secondNode = getNode(getNameFromValue(PP, PP)); if ((cInt = dyn_cast<ConstantInt>(curConstraint->programPoint->getOperand(0)))) { firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(1), PP)); } else if ((cInt = dyn_cast<ConstantInt>(curConstraint->programPoint->getOperand(1)))) { firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(0), PP)); } else { return; } curValue = cInt->getSExtValue(); firstNode->connectTo(secondNode, curValue); break; } case CGConstraint::C4: { CmpInst* cmpInst; BranchInst* branchInst; if( !(branchInst = dyn_cast<BranchInst>(curConstraint->programPoint)) ) { errs() << "ERROR: BranchInst cast unsuccessful for C4 in CGGraph::constructGraph() \n"; return; } if( !(cmpInst = dyn_cast<CmpInst>(owner->branchToCompare[branchInst])) ) { errs() << "ERROR: CmpInst not found for C4 in CGGraph::constructGraph() \n"; return; } int size1, size2; size1 = curConstraint->piAssignments.size(); size2 = curConstraint->piAssignments2.size(); /* There are two possibilities here: (a) size1 = size2 = 2, or (b) size1 = size2 = 1; If (b), there will be one operand in the compare inst that is a literal value. That literal value still generates a constraint although it does not generate a pi assignment. We need to account for that. */ if (size1 != size2) { errs() << "ERROR: piAssignments not of equal length for C4 in CGGraph::constructGraph()\n"; return; } if ( (size1 < 1) || (size1 > 2) ) { errs() << "ERROR: piAssignments.size() != 1 or 2 for C4 in CGGraph::constructGraph()\n"; return; } //this takes care of the first two constraints for both cases (size = 1 or size = 2) //first branch for (int i = 0; i < size1; ++i) { firstNode = getNode(curConstraint->piAssignments[i]->getOperandName()); //vi - wr secondNode = getNode(curConstraint->piAssignments[i]->getAssignedName()); //vj - ws firstNode->connectTo(secondNode, 0); //vi -> vj 0 - wr -> ws 0 } //second branch for (int i = 0; i < size2; ++i) { firstNode = getNode(curConstraint->piAssignments2[i]->getOperandName()); //vi - wr secondNode = getNode(curConstraint->piAssignments2[i]->getAssignedName()); //vk - wt firstNode->connectTo(secondNode, 0); //vi -> vk 0 - wr -> wt 0 } /* - first and second op names for the third constraint for branches 1 and 2 - each case is stored in the order of the pi assignments, which is also the order of the cmp instruction operands */ std::string firstOpNameBr1, secondOpNameBr1, firstOpNameBr2, secondOpNameBr2; if (size1 == 1) { /* the 2nd constraint in the table will not exist in this case, but there will be a 3rd constraint that was missed by the above, e.g. if (x1 <= 10) 1. x2 <= x1 2. nothing 3. x2 <= 10 <---- we need to add this here */ //prune the int from the CmpInst if (cmpInst->getNumOperands() != 2) { errs() << "ERROR: cmpInst->getNumOperands() != 2 in CGGraph::constructGraph()\n"; return; } if ((cInt = dyn_cast<ConstantInt>(cmpInst->getOperand(0)))) { //int is first op firstOpNameBr1 = getNameFromValue(cInt, PP); secondOpNameBr1 = curConstraint->piAssignments[0]->getAssignedName(); firstOpNameBr2 = firstOpNameBr1; secondOpNameBr2 = curConstraint->piAssignments2[0]->getAssignedName(); } else if ((cInt = dyn_cast<ConstantInt>(cmpInst->getOperand(1)))) { //int is second op firstOpNameBr1 = curConstraint->piAssignments[0]->getAssignedName(); secondOpNameBr1 = getNameFromValue(cInt, PP); firstOpNameBr2 = curConstraint->piAssignments2[0]->getAssignedName(); secondOpNameBr2 = secondOpNameBr1; } else { errs() << "ERROR: int not found in cmpInstr in CGGraph::constructGraph()\n"; return; } } else if (size1 == 2) { //store in order of pi assignments firstOpNameBr1 = curConstraint->piAssignments[0]->getAssignedName(); secondOpNameBr1 = curConstraint->piAssignments[1]->getAssignedName(); firstOpNameBr2 = curConstraint->piAssignments2[0]->getAssignedName(); secondOpNameBr2 = curConstraint->piAssignments2[1]->getAssignedName(); } CGNode* firstNodeBr1 = getNode(firstOpNameBr1); CGNode* secondNodeBr1 = getNode(secondOpNameBr1); CGNode* firstNodeBr2 = getNode(firstOpNameBr2); CGNode* secondNodeBr2 = getNode(secondOpNameBr2); switch(cmpInst->getPredicate()) { case CmpInst::ICMP_SGT: // > firstNodeBr1->connectTo(secondNodeBr1, -1); //vj -> ws -1 secondNodeBr2->connectTo(firstNodeBr2, 0); //wt -> vk 0 break; case CmpInst::ICMP_SLT: // < secondNodeBr1->connectTo(firstNodeBr1, -1); //ws -> vj -1 firstNodeBr2->connectTo(secondNodeBr2, 0); //vk -> wt 0 break; case CmpInst::ICMP_SGE: // >= firstNodeBr1->connectTo(secondNodeBr1, 0); //vj -> ws 0 secondNodeBr2->connectTo(firstNodeBr2, -1); //wt -> vk -1 break; case CmpInst::ICMP_SLE: // <= secondNodeBr1->connectTo(firstNodeBr1, 0); //ws -> vj 0 firstNodeBr2->connectTo(secondNodeBr2, -1); //vk -> wt -1 break; default: break; } break; } case CGConstraint::C5: { //operand 1 = array length firstNode = getNode(getNameFromValue(PP->getOperand(1), PP)); secondNode = getNode(curConstraint->piAssignments[0]->getAssignedName()); firstNode->connectTo(secondNode, -1); break; } case CGConstraint::CONTROL_FLOW: { //Done and ready to test firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(0), PP)); secondNode = getNode(getNameFromValue(curConstraint->programPoint, PP)); firstNode->connectTo(secondNode, 0); firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(1), PP)); firstNode->connectTo(secondNode, 0); break; } } //end switch } //end for } //end constructGraph()
// Compute the unlikely successors to the block BB in the loop L, specifically // those that are unlikely because this is a loop, and add them to the // UnlikelyBlocks set. static void computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) { // Sometimes in a loop we have a branch whose condition is made false by // taking it. This is typically something like // int n = 0; // while (...) { // if (++n >= MAX) { // n = 0; // } // } // In this sort of situation taking the branch means that at the very least it // won't be taken again in the next iteration of the loop, so we should // consider it less likely than a typical branch. // // We detect this by looking back through the graph of PHI nodes that sets the // value that the condition depends on, and seeing if we can reach a successor // block which can be determined to make the condition false. // // FIXME: We currently consider unlikely blocks to be half as likely as other // blocks, but if we consider the example above the likelyhood is actually // 1/MAX. We could therefore be more precise in how unlikely we consider // blocks to be, but it would require more careful examination of the form // of the comparison expression. const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) return; // Check if the branch is based on an instruction compared with a constant CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); if (!CI || !isa<Instruction>(CI->getOperand(0)) || !isa<Constant>(CI->getOperand(1))) return; // Either the instruction must be a PHI, or a chain of operations involving // constants that ends in a PHI which we can then collapse into a single value // if the PHI value is known. Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0)); PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS); Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1)); // Collect the instructions until we hit a PHI SmallVector<BinaryOperator *, 1> InstChain; while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) && isa<Constant>(CmpLHS->getOperand(1))) { // Stop if the chain extends outside of the loop if (!L->contains(CmpLHS)) return; InstChain.push_back(cast<BinaryOperator>(CmpLHS)); CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0)); if (CmpLHS) CmpPHI = dyn_cast<PHINode>(CmpLHS); } if (!CmpPHI || !L->contains(CmpPHI)) return; // Trace the phi node to find all values that come from successors of BB SmallPtrSet<PHINode*, 8> VisitedInsts; SmallVector<PHINode*, 8> WorkList; WorkList.push_back(CmpPHI); VisitedInsts.insert(CmpPHI); while (!WorkList.empty()) { PHINode *P = WorkList.back(); WorkList.pop_back(); for (BasicBlock *B : P->blocks()) { // Skip blocks that aren't part of the loop if (!L->contains(B)) continue; Value *V = P->getIncomingValueForBlock(B); // If the source is a PHI add it to the work list if we haven't // already visited it. if (PHINode *PN = dyn_cast<PHINode>(V)) { if (VisitedInsts.insert(PN).second) WorkList.push_back(PN); continue; } // If this incoming value is a constant and B is a successor of BB, then // we can constant-evaluate the compare to see if it makes the branch be // taken or not. Constant *CmpLHSConst = dyn_cast<Constant>(V); if (!CmpLHSConst || std::find(succ_begin(BB), succ_end(BB), B) == succ_end(BB)) continue; // First collapse InstChain for (Instruction *I : llvm::reverse(InstChain)) { CmpLHSConst = ConstantExpr::get(I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), true); if (!CmpLHSConst) break; } if (!CmpLHSConst) continue; // Now constant-evaluate the compare Constant *Result = ConstantExpr::getCompare(CI->getPredicate(), CmpLHSConst, CmpConst, true); // If the result means we don't branch to the block then that block is // unlikely. if (Result && ((Result->isZeroValue() && B == BI->getSuccessor(0)) || (Result->isOneValue() && B == BI->getSuccessor(1)))) UnlikelyBlocks.insert(B); } } }
/// MatchGuardHeuristic - Predict that a comparison in which a register is /// an operand, the register is used before being defined in a successor /// block, and the successor block does not post-dominate will reach the /// successor block. /// @returns a Prediction that is a pair in which the first element is the /// successor taken, and the second the successor not taken. Prediction BranchHeuristicsInfo::MatchGuardHeuristic(BasicBlock *root) const { bool matched = false; Prediction pred; // Last instruction of basic block. TerminatorInst *TI = root->getTerminator(); // Basic block successors. True and False branches. BasicBlock *trueSuccessor = TI->getSuccessor(0); BasicBlock *falseSuccessor = TI->getSuccessor(1); // Is the last instruction a Branch Instruction? BranchInst *BI = dyn_cast<BranchInst>(TI); if (!BI || !BI->isConditional()) return empty; // Conditional instruction. Value *cond = BI->getCondition(); // Find if the variable used in the branch instruction is // in fact a comparison instruction. CmpInst *CI = dyn_cast<CmpInst>(cond); if (!CI) return empty; // Seek over all of the operands of this comparison instruction. for (unsigned ops = 0; ops < CI->getNumOperands(); ++ops) { // Find the operand. Value *operand = CI->getOperand(ops); // Check if the operand is neither a function argument or a value. if (!isa<Argument>(operand) && !isa<User>(operand)) continue; // Check if this variable was used in the true successor and // does not post dominate. // Since LLVM is in SSA form, it's impossible for a variable being used // before being defined, so that statement is skipped. if (operand->isUsedInBasicBlock(trueSuccessor) && !PDT->dominates(trueSuccessor, root)) { // If a heuristic was already matched, predict none and abort immediately. if (matched) return empty; matched = true; pred = std::make_pair(trueSuccessor, falseSuccessor); } // Check if this variable was used in the false successor and // does not post dominate. if (operand->isUsedInBasicBlock(falseSuccessor) && !PDT->dominates(falseSuccessor, root)) { // If a heuristic was already matched, predict none and abort immediately. if (matched) return empty; matched = true; pred = std::make_pair(falseSuccessor, trueSuccessor); } } return (matched ? pred : empty); }