Value * ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) { User *U = UserChain[ChainIndex]; if (ChainIndex == 0) { assert(isa<ConstantInt>(U)); // If U is a ConstantInt, applyExts will return a ConstantInt as well. return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U)); } if (CastInst *Cast = dyn_cast<CastInst>(U)) { assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) && "We only traced into two types of CastInst: sext and zext"); ExtInsts.push_back(Cast); UserChain[ChainIndex] = nullptr; return distributeExtsAndCloneChain(ChainIndex - 1); } // Function find only trace into BinaryOperator and CastInst. BinaryOperator *BO = cast<BinaryOperator>(U); // OpNo = which operand of BO is UserChain[ChainIndex - 1] unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); Value *TheOther = applyExts(BO->getOperand(1 - OpNo)); Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1); BinaryOperator *NewBO = nullptr; if (OpNo == 0) { NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther, BO->getName(), IP); } else { NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain, BO->getName(), IP); } return UserChain[ChainIndex] = NewBO; }
/* FindRoots() for each instruction I = ’R <- op, Ra, Rb’ if op(I) not associative or commutative continue // I is a root unless R is a temporary // (temporaries are only used once and by an instruction with the same operator) if NumUses(R) > 1 or op(Use(R)) != op(I) mark I as root, processed(root) = false order roots such that precedence of op(r$_i$) $\leq$ precedence of op(r$_{i+1}$) while roots not empty I = ’R <- op, Ra, Rb’ = Def(Pop(root)) BalanceTree(I) */ bool findRoots(Function* f) { bool changed = false; assert(f); std::vector<BinaryOperator*> roots; for(Function::iterator BB = f->begin(); BB != f->end(); ++BB) { for(BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) { BinaryOperator* BO = dynamic_cast<BinaryOperator*>(&*II); if( BO and isCommutativeOperation(BO) and isAssociativeOperation(BO) ) { if( getRealNumUses(BO) > 1 ) { roots.push_back(BO); INTERNAL_MESSAGE("Root " << BO->getName() << " added for numUses > 1.\n"); } else { for(Value::use_iterator UI = BO->use_begin(); UI != BO->use_end(); ++UI) { if( isDifferentOperation(BO, *UI) ) { roots.push_back(BO); INTERNAL_MESSAGE("Root " << BO->getName() << " added because it is different operation than " << (*UI)->getName() << "\n"); } } } } } } std::sort(roots.begin(), roots.end(), precedence_less_than); std::list<BinaryOperator*> root_queue; root_queue.resize(roots.size()); std::copy(roots.begin(), roots.end(), root_queue.begin()); std::map<Instruction*,bool> visitMap; int roots_balanced = 0; while( !root_queue.empty() ) { BinaryOperator* BO = root_queue.front(); root_queue.pop_front(); bool root_changed = balanceTree(BO, visitMap, roots); if( root_changed ) ++roots_balanced; changed = root_changed or changed; } std::stringstream ss; ss << "Attempted to balance " << roots.size() << " roots ("; for(std::vector<BinaryOperator*>::iterator RI = roots.begin(); RI != roots.end(); ++RI) { if( RI != roots.begin() ) ss << ", "; ss << getValueName((*RI)); } ss << "), " << roots_balanced << " needed balancing.\n"; LOG_MESSAGE1("Balancing", ss.str()); return changed; }
Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { if (ChainIndex == 0) { assert(isa<ConstantInt>(UserChain[ChainIndex])); return ConstantInt::getNullValue(UserChain[ChainIndex]->getType()); } BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]); unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]); Value *NextInChain = removeConstOffset(ChainIndex - 1); Value *TheOther = BO->getOperand(1 - OpNo); // If NextInChain is 0 and not the LHS of a sub, we can simplify the // sub-expression to be just TheOther. if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) { if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0)) return TheOther; } if (BO->getOpcode() == Instruction::Or) { // Rebuild "or" as "add", because "or" may be invalid for the new // epxression. // // For instance, given // a | (b + 5) where a and b + 5 have no common bits, // we can extract 5 as the constant offset. // // However, reusing the "or" in the new index would give us // (a | b) + 5 // which does not equal a | (b + 5). // // Replacing the "or" with "add" is fine, because // a | (b + 5) = a + (b + 5) = (a + b) + 5 if (OpNo == 0) { return BinaryOperator::CreateAdd(NextInChain, TheOther, BO->getName(), IP); } else { return BinaryOperator::CreateAdd(TheOther, NextInChain, BO->getName(), IP); } } // We can reuse BO in this case, because the new expression shares the same // instruction type and BO is used at most once. assert(BO->getNumUses() <= 1 && "distributeExtsAndCloneChain clones each BinaryOperator in " "UserChain, so no one should be used more than " "once"); BO->setOperand(OpNo, NextInChain); BO->setHasNoSignedWrap(false); BO->setHasNoUnsignedWrap(false); // Make sure it appears after all instructions we've inserted so far. BO->moveBefore(IP); return BO; }
/// HandleFloatingPointIV - If the loop has floating induction variable /// then insert corresponding integer induction variable if possible. /// For example, /// for(double i = 0; i < 10000; ++i) /// bar(i) /// is converted into /// for(int i = 0; i < 10000; ++i) /// bar((double)i); /// void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; // Check incoming value. ConstantFP *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); int64_t InitValue; if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) return; // Check IV increment. Reject this PN if increment operation is not // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); int64_t IncValue; if (IncValueVal == 0 || Incr->getOperand(0) != PN || !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) return; // Check Incr uses. One user is PN and the other user is an exit condition // used by the conditional terminator. Value::use_iterator IncrUse = Incr->use_begin(); Instruction *U1 = cast<Instruction>(IncrUse++); if (IncrUse == Incr->use_end()) return; Instruction *U2 = cast<Instruction>(IncrUse++); if (IncrUse != Incr->use_end()) return; // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't // only used by a branch, we can't transform it. FCmpInst *Compare = dyn_cast<FCmpInst>(U1); if (!Compare) Compare = dyn_cast<FCmpInst>(U2); if (Compare == 0 || !Compare->hasOneUse() || !isa<BranchInst>(Compare->use_back())) return; BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); // We need to verify that the branch actually controls the iteration count // of the loop. If not, the new IV can overflow and no one will notice. // The branch block must be in the loop and one of the successors must be out // of the loop. assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); if (!L->contains(TheBr->getParent()) || (L->contains(TheBr->getSuccessor(0)) && L->contains(TheBr->getSuccessor(1)))) return; // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); int64_t ExitValue; if (ExitValueVal == 0 || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (Compare->getPredicate()) { default: return; // Unknown comparison. case CmpInst::FCMP_OEQ: case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; case CmpInst::FCMP_ONE: case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; case CmpInst::FCMP_OGT: case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; case CmpInst::FCMP_OGE: case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; case CmpInst::FCMP_OLT: case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } // We convert the floating point induction variable to a signed i32 value if // we can. This is only safe if the comparison will not overflow in a way // that won't be trapped by the integer equivalent operations. Check for this // now. // TODO: We could use i64 if it is native and the range requires it. // The start/stride/exit values must all fit in signed i32. if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) return; // If not actually striding (add x, 0.0), avoid touching the code. if (IncValue == 0) return; // Positive and negative strides have different safety conditions. if (IncValue > 0) { // If we have a positive stride, we require the init to be less than the // exit value and an equality or less than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) return; uint32_t Range = uint32_t(ExitValue-InitValue); if (NewPred == CmpInst::ICMP_SLE) { // Normalize SLE -> SLT, check for infinite loop. if (++Range == 0) return; // Range overflows. } unsigned Leftover = Range % uint32_t(IncValue); // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; } else { // If we have a negative stride, we require the init to be greater than the // exit value and an equality or greater than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) return; uint32_t Range = uint32_t(InitValue-ExitValue); if (NewPred == CmpInst::ICMP_SGE) { // Normalize SGE -> SGT, check for infinite loop. if (++Range == 0) return; // Range overflows. } unsigned Leftover = Range % uint32_t(-IncValue); // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) return; } const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN); NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), PN->getIncomingBlock(IncomingEdge)); Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), Incr->getName()+".int", Incr); NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, ConstantInt::get(Int32Ty, ExitValue), Compare->getName()); // In the following deletions, PN may become dead and may be deleted. // Use a WeakVH to observe whether this happens. WeakVH WeakPH = PN; // Delete the old floating point exit comparison. The branch starts using the // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); RecursivelyDeleteTriviallyDeadInstructions(Compare); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); RecursivelyDeleteTriviallyDeadInstructions(Incr); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction // variable, we chose to eliminate the IV and rewrite it in terms of an // int->fp cast. // // We give preference to sitofp over uitofp because it is faster on most // platforms. if (WeakPH) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", PN->getParent()->getFirstNonPHI()); PN->replaceAllUsesWith(Conv); RecursivelyDeleteTriviallyDeadInstructions(PN); } // Add a new IVUsers entry for the newly-created integer PHI. IU->AddUsersIfInteresting(NewPHI); }
// Peephole optimize the following instructions: // %t1 = cast ? to x * // %t2 = add x * %SP, %t1 ;; Constant must be 2nd operand // // Into: %t3 = getelementptr {<...>} * %SP, <element indices> // %t2 = cast <eltype> * %t3 to {<...>}* // static bool HandleCastToPointer(BasicBlock::iterator BI, const PointerType *DestPTy, const TargetData &TD) { CastInst &CI = cast<CastInst>(*BI); if (CI.use_empty()) return false; // Scan all of the uses, looking for any uses that are not add or sub // instructions. If we have non-adds, do not make this transformation. // bool HasSubUse = false; // Keep track of any subtracts... for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*I)) { if ((BO->getOpcode() != Instruction::Add && BO->getOpcode() != Instruction::Sub) || // Avoid add sbyte* %X, %X cases... BO->getOperand(0) == BO->getOperand(1)) return false; else HasSubUse |= BO->getOpcode() == Instruction::Sub; } else { return false; } std::vector<Value*> Indices; Value *Src = CI.getOperand(0); const Type *Result = ConvertibleToGEP(DestPTy, Src, Indices, TD, &BI); if (Result == 0) return false; // Not convertible... // Cannot handle subtracts if there is more than one index required... if (HasSubUse && Indices.size() != 1) return false; PRINT_PEEPHOLE2("cast-add-to-gep:in", *Src, CI); // If we have a getelementptr capability... transform all of the // add instruction uses into getelementptr's. while (!CI.use_empty()) { BinaryOperator *I = cast<BinaryOperator>(*CI.use_begin()); assert((I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Sub) && "Use is not a valid add instruction!"); // Get the value added to the cast result pointer... Value *OtherPtr = I->getOperand((I->getOperand(0) == &CI) ? 1 : 0); Instruction *GEP = new GetElementPtrInst(OtherPtr, Indices, I->getName()); PRINT_PEEPHOLE1("cast-add-to-gep:i", *I); // If the instruction is actually a subtract, we are guaranteed to only have // one index (from code above), so we just need to negate the pointer index // long value. if (I->getOpcode() == Instruction::Sub) { Instruction *Neg = BinaryOperator::createNeg(GEP->getOperand(1), GEP->getOperand(1)->getName()+".neg", I); GEP->setOperand(1, Neg); } if (GEP->getType() == I->getType()) { // Replace the old add instruction with the shiny new GEP inst ReplaceInstWithInst(I, GEP); } else { // If the type produced by the gep instruction differs from the original // add instruction type, insert a cast now. // // Insert the GEP instruction before the old add instruction... I->getParent()->getInstList().insert(I, GEP); PRINT_PEEPHOLE1("cast-add-to-gep:o", *GEP); GEP = new CastInst(GEP, I->getType()); // Replace the old add instruction with the shiny new GEP inst ReplaceInstWithInst(I, GEP); } PRINT_PEEPHOLE1("cast-add-to-gep:o", *GEP); } return true; }
/// HandleFloatingPointIV - If the loop has floating induction variable /// then insert corresponding integer induction variable if possible. /// For example, /// for(double i = 0; i < 10000; ++i) /// bar(i) /// is converted into /// for(int i = 0; i < 10000; ++i) /// bar((double)i); /// void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; // Check incoming value. ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge)); if (!InitValue) return; uint64_t newInitValue = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(InitValue->getValueAPF(), &newInitValue)) return; // Check IV increment. Reject this PH if increment operation is not // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge)); if (!Incr) return; if (Incr->getOpcode() != Instruction::FAdd) return; ConstantFP *IncrValue = NULL; unsigned IncrVIndex = 1; if (Incr->getOperand(1) == PH) IncrVIndex = 0; IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex)); if (!IncrValue) return; uint64_t newIncrValue = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue)) return; // Check Incr uses. One user is PH and the other users is exit condition used // by the conditional terminator. Value::use_iterator IncrUse = Incr->use_begin(); Instruction *U1 = cast<Instruction>(IncrUse++); if (IncrUse == Incr->use_end()) return; Instruction *U2 = cast<Instruction>(IncrUse++); if (IncrUse != Incr->use_end()) return; // Find exit condition. FCmpInst *EC = dyn_cast<FCmpInst>(U1); if (!EC) EC = dyn_cast<FCmpInst>(U2); if (!EC) return; if (BranchInst *BI = dyn_cast<BranchInst>(EC->getParent()->getTerminator())) { if (!BI->isConditional()) return; if (BI->getCondition() != EC) return; } // Find exit value. If exit value can not be represented as an integer then // do not handle this floating point PH. ConstantFP *EV = NULL; unsigned EVIndex = 1; if (EC->getOperand(1) == Incr) EVIndex = 0; EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex)); if (!EV) return; uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(EV->getValueAPF(), &intEV)) return; // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (EC->getPredicate()) { case CmpInst::FCMP_OEQ: case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; case CmpInst::FCMP_OGT: case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_UGT; break; case CmpInst::FCMP_OGE: case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_UGE; break; case CmpInst::FCMP_OLT: case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_ULT; break; case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_ULE; break; default: break; } if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return; // Insert new integer induction variable. PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()), PH->getName()+".int", PH); NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()), newInitValue), PH->getIncomingBlock(IncomingEdge)); Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Type::getInt32Ty(PH->getContext()), newIncrValue), Incr->getName()+".int", Incr); NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge)); // The back edge is edge 1 of newPHI, whatever it may have been in the // original PHI. ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()), intEV); Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV); Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1)); ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(), NewPred, LHS, RHS, EC->getName()); // In the following deletions, PH may become dead and may be deleted. // Use a WeakVH to observe whether this happens. WeakVH WeakPH = PH; // Delete old, floating point, exit comparison instruction. NewEC->takeName(EC); EC->replaceAllUsesWith(NewEC); RecursivelyDeleteTriviallyDeadInstructions(EC); // Delete old, floating point, increment instruction. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); RecursivelyDeleteTriviallyDeadInstructions(Incr); // Replace floating induction variable, if it isn't already deleted. // Give SIToFPInst preference over UIToFPInst because it is faster on // platforms that are widely used. if (WeakPH && !PH->use_empty()) { if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) { SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv", PH->getParent()->getFirstNonPHI()); PH->replaceAllUsesWith(Conv); } else { UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv", PH->getParent()->getFirstNonPHI()); PH->replaceAllUsesWith(Conv); } RecursivelyDeleteTriviallyDeadInstructions(PH); } // Add a new IVUsers entry for the newly-created integer PHI. IU->AddUsersIfInteresting(NewPHI); }