/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->user_back())->getParent() == BB && !isa<PHINode>(Inst->user_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction *, 16> Users; for (User *U : Inst->users()) { Instruction *UI = cast<Instruction>(U); if (UI->getParent() != BB || isa<PHINode>(UI)) Users.push_back(UI); } // Find all of the blocks that this value is live in. SmallPtrSet<BasicBlock *, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } } // If we decided we need a spill, do it. // FIXME: Spilling this way is overkill, as it forces all uses of // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { DemoteRegToStack(*Inst, true); ++NumSpilled; } } } // Go through the landing pads and remove any PHIs there. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. SmallPtrSet<PHINode *, 8> PHIsToDemote; for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); if (PHIsToDemote.empty()) continue; // Demote the PHIs to the stack. for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) DemotePHIToStack(*I); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(UnwindBlock->begin()); } }
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, /// this value inserts the new computation that produces the shifted value. static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, InstCombiner &IC) { // We can always evaluate constants shifted. if (Constant *C = dyn_cast<Constant>(V)) { if (isLeftShift) V = IC.Builder->CreateShl(C, NumBits); else V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) V = ConstantFoldConstantExpression(CE, IC.getDataLayout(), IC.getTargetLibraryInfo()); return V; } Instruction *I = cast<Instruction>(V); IC.Worklist.Add(I); switch (I->getOpcode()) { default: llvm_unreachable("Inconsistency with CanEvaluateShifted"); case Instruction::And: case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); return I; case Instruction::Shl: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(I->getType()); BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setHasNoUnsignedWrap(false); BO->setHasNoSignedWrap(false); return I; } // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); V = IC.Builder->CreateAnd(BO->getOperand(0), ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { VI->moveBefore(BO); VI->takeName(BO); } return V; } // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); BO->setOperand(1, ConstantInt::get(BO->getType(), CI->getZExtValue() - NumBits)); BO->setHasNoUnsignedWrap(false); BO->setHasNoSignedWrap(false); return BO; } case Instruction::LShr: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(BO->getType()); BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setIsExact(false); return I; } // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); V = IC.Builder->CreateAnd(I->getOperand(0), ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { VI->moveBefore(I); VI->takeName(I); } return V; } // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); BO->setOperand(1, ConstantInt::get(BO->getType(), CI->getZExtValue() - NumBits)); BO->setIsExact(false); return BO; } case Instruction::Select: I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); return I; case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast<PHINode>(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits, isLeftShift, IC)); return PN; } } }
/// CanEvaluateShifted - See if we can compute the specified value, but shifted /// logically to the left or right by some number of bits. This should return /// true if the expression can be computed for the same cost as the current /// expression tree. This is used to eliminate extraneous shifting from things /// like: /// %C = shl i128 %A, 64 /// %D = shl i128 %B, 96 /// %E = or i128 %C, %D /// %F = lshr i128 %E, 64 /// where the client will ask if E can be computed shifted right by 64-bits. If /// this succeeds, the GetShiftedValue function will be called to produce the /// value. static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, InstCombiner &IC) { // We can always evaluate constants shifted. if (isa<Constant>(V)) return true; Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; // If this is the opposite shift, we can directly reuse the input of the shift // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. // TODO: Handle opposite shift by exact value. ConstantInt *CI = 0; if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { if (CI->getZExtValue() == NumBits) { // TODO: Check that the input bits are already zero with MaskedValueIsZero #if 0 // If this is a truncate of a logical shr, we can truncate it to a smaller // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); uint32_t BitWidth = Ty->getScalarSizeInBits(); if (MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && CI->getLimitedValue(BitWidth) < BitWidth) { return CanEvaluateTruncated(I->getOperand(0), Ty); } #endif } } // We can't mutate something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; switch (I->getOpcode()) { default: return false; case Instruction::And: case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (CI == 0) return false; // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) return true; // We can always turn shl(c)+shr(c) -> and(c2). if (CI->getValue() == NumBits) return true; unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't // profitable unless we know the and'd out bits are already zero. if (CI->getZExtValue() > NumBits) { unsigned LowBits = TypeWidth - CI->getZExtValue(); if (MaskedValueIsZero(I->getOperand(0), APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } return false; } case Instruction::LShr: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (CI == 0) return false; // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) return true; // We can always turn lshr(c)+shl(c) -> and(c2). if (CI->getValue() == NumBits) return true; unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't // profitable unless we know the and'd out bits are already zero. if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) { unsigned LowBits = CI->getZExtValue() - NumBits; if (MaskedValueIsZero(I->getOperand(0), APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } return false; } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) && CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC); } case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast<PHINode>(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC)) return false; return true; } } }
// If we have a PHI node with a vector type that has only 2 uses: feed // itself and be an operand of extractelement at a constant location, // try to replace the PHI of the vector type with a PHI of a scalar type. Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. if (!PN->hasNUses(2)) return nullptr; // If so, it's known at this point that one operand is PHI and the other is // an extractelement node. Find the PHI user that is not the extractelement // node. auto iu = PN->user_begin(); Instruction *PHIUser = dyn_cast<Instruction>(*iu); if (PHIUser == cast<Instruction>(&EI)) PHIUser = cast<Instruction>(*(++iu)); // Verify that this PHI user has one use, which is the PHI itself, // and that it is a binary operation which is cheap to scalarize. // otherwise return NULL. if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true)) return nullptr; // Create a scalar PHI node that will replace the vector PHI node // just before the current PHI node. PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); // Scalarize each PHI operand. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { Value *PHIInVal = PN->getIncomingValue(i); BasicBlock *inBB = PN->getIncomingBlock(i); Value *Elt = EI.getIndexOperand(); // If the operand is the PHI induction variable: if (PHIInVal == PHIUser) { // Scalarize the binary operation. Its first operand is the // scalar PHI, and the second operand is extracted from the other // vector operand. BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; Value *Op = InsertNewInstWith( ExtractElementInst::Create(B0->getOperand(opId), Elt, B0->getOperand(opId)->getName() + ".Elt"), *B0); Value *newPHIUser = InsertNewInstWith( BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0); scalarPHI->addIncoming(newPHIUser, inBB); } else { // Scalarize PHI input: Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); // Insert the new instruction into the predecessor basic block. Instruction *pos = dyn_cast<Instruction>(PHIInVal); BasicBlock::iterator InsertPos; if (pos && !isa<PHINode>(pos)) { InsertPos = ++pos->getIterator(); } else { InsertPos = inBB->getFirstInsertionPt(); } InsertNewInstWith(newEI, *InsertPos); scalarPHI->addIncoming(newEI, inBB); } } return replaceInstUsesWith(EI, scalarPHI); }
void DebriefDlg::Show() { FormWindow::Show(); Game::SetTimeCompression(1); mission = 0; campaign = Campaign::GetCampaign(); sim = Sim::GetSim(); if (sim) ship = sim->GetPlayerShip(); if (campaign) mission = campaign->GetMission(); if (mission_name) { if (mission) mission_name->SetText(mission->Name()); else mission_name->SetText(Game::GetText("DebriefDlg.mission-name")); } if (mission_system) { mission_system->SetText(""); if (mission) { StarSystem* sys = mission->GetStarSystem(); if (sys) mission_system->SetText(sys->Name()); } } if (mission_sector) { mission_sector->SetText(""); if (mission) { MissionElement* elem = mission->GetElements()[0]; if (elem) mission_sector->SetText(elem->Region()); } } if (mission_time_start) { if (mission) { char txt[32]; FormatDayTime(txt, mission->Start()); mission_time_start->SetText(txt); } } if (objectives) { bool found_objectives = false; if (sim && sim->GetPlayerElement()) { Text text; Element* elem = sim->GetPlayerElement(); for (int i = 0; i < elem->NumObjectives(); i++) { Instruction* obj = elem->GetObjective(i); text += Text("* ") + obj->GetDescription() + Text("\n"); found_objectives = true; } objectives->SetText(text); } if (!found_objectives) { if (mission) objectives->SetText(mission->Objective()); else objectives->SetText(Game::GetText("DebriefDlg.unspecified")); } } if (situation) { if (mission) situation->SetText(mission->Situation()); else situation->SetText(Game::GetText("DebriefDlg.unknown")); } if (mission_score) { mission_score->SetText(Game::GetText("DebriefDlg.no-stats")); if (ship) { for (int i = 0; i < ShipStats::NumStats(); i++) { ShipStats* stats = ShipStats::GetStats(i); if (stats && !strcmp(ship->Name(), stats->GetName())) { stats->Summarize(); Player* player = Player::GetCurrentPlayer(); int points = stats->GetPoints() + stats->GetCommandPoints(); if (player && sim) points = player->GetMissionPoints(stats, sim->StartTime()) + stats->GetCommandPoints(); char score[32]; sprintf_s(score, "%d %s", points, Game::GetText("DebriefDlg.points").data()); mission_score->SetText(score); break; } } } } DrawUnits(); }
/// \brief Assign DWARF discriminators. /// /// To assign discriminators, we examine the boundaries of every /// basic block and its successors. Suppose there is a basic block B1 /// with successor B2. The last instruction I1 in B1 and the first /// instruction I2 in B2 are located at the same file and line number. /// This situation is illustrated in the following code snippet: /// /// if (i < 10) x = i; /// /// entry: /// br i1 %cmp, label %if.then, label %if.end, !dbg !10 /// if.then: /// %1 = load i32* %i.addr, align 4, !dbg !10 /// store i32 %1, i32* %x, align 4, !dbg !10 /// br label %if.end, !dbg !10 /// if.end: /// ret void, !dbg !12 /// /// Notice how the branch instruction in block 'entry' and all the /// instructions in block 'if.then' have the exact same debug location /// information (!dbg !10). /// /// To distinguish instructions in block 'entry' from instructions in /// block 'if.then', we generate a new lexical block for all the /// instruction in block 'if.then' that share the same file and line /// location with the last instruction of block 'entry'. /// /// This new lexical block will have the same location information as /// the previous one, but with a new DWARF discriminator value. /// /// One of the main uses of this discriminator value is in runtime /// sample profilers. It allows the profiler to distinguish instructions /// at location !dbg !10 that execute on different basic blocks. This is /// important because while the predicate 'if (x < 10)' may have been /// executed millions of times, the assignment 'x = i' may have only /// executed a handful of times (meaning that the entry->if.then edge is /// seldom taken). /// /// If we did not have discriminator information, the profiler would /// assign the same weight to both blocks 'entry' and 'if.then', which /// in turn will make it conclude that the entry->if.then edge is very /// hot. /// /// To decide where to create new discriminator values, this function /// traverses the CFG and examines instruction at basic block boundaries. /// If the last instruction I1 of a block B1 is at the same file and line /// location as instruction I2 of successor B2, then it creates a new /// lexical block for I2 and all the instruction in B2 that share the same /// file and line location as I2. This new lexical block will have a /// different discriminator number than I1. bool AddDiscriminators::runOnFunction(Function &F) { // If the function has debug information, but the user has disabled // discriminators, do nothing. // Simlarly, if the function has no debug info, do nothing. // Finally, if this module is built with dwarf versions earlier than 4, // do nothing (discriminator support is a DWARF 4 feature). if (NoDiscriminators || !hasDebugInfo(F) || F.getParent()->getDwarfVersion() < 4) return false; bool Changed = false; Module *M = F.getParent(); LLVMContext &Ctx = M->getContext(); DIBuilder Builder(*M, /*AllowUnresolved*/ false); // Traverse all the blocks looking for instructions in different // blocks that are at the same file:line location. for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *B = I; TerminatorInst *Last = B->getTerminator(); DILocation LastDIL = Last->getDebugLoc().get(); if (!LastDIL) continue; for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) { BasicBlock *Succ = Last->getSuccessor(I); Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); DILocation FirstDIL = First->getDebugLoc().get(); if (!FirstDIL) continue; // If the first instruction (First) of Succ is at the same file // location as B's last instruction (Last), add a new // discriminator for First's location and all the instructions // in Succ that share the same location with First. if (!FirstDIL->canDiscriminate(*LastDIL)) { // Create a new lexical scope and compute a new discriminator // number for it. StringRef Filename = FirstDIL->getFilename(); auto *Scope = FirstDIL->getScope(); auto *File = Builder.createFile(Filename, Scope->getDirectory()); // FIXME: Calculate the discriminator here, based on local information, // and delete MDLocation::computeNewDiscriminator(). The current // solution gives different results depending on other modules in the // same context. All we really need is to discriminate between // FirstDIL and LastDIL -- a local map would suffice. unsigned Discriminator = FirstDIL->computeNewDiscriminator(); auto *NewScope = Builder.createLexicalBlockFile(Scope, File, Discriminator); auto *NewDIL = MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(), NewScope, FirstDIL->getInlinedAt()); DebugLoc newDebugLoc = NewDIL; // Attach this new debug location to First and every // instruction following First that shares the same location. for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1; ++I1) { if (I1->getDebugLoc().get() != FirstDIL) break; I1->setDebugLoc(newDebugLoc); DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine() << ":" << NewDIL->getColumn() << ":" << NewDIL->getDiscriminator() << *I1 << "\n"); } DEBUG(dbgs() << "\n"); Changed = true; } } } return Changed; }
/// The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone){ WeakVH &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; // Nope, clone it now. BasicBlock *NewBB; BBEntry = NewBB = BasicBlock::Create(BB->getContext()); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); // It is only legal to clone a function if a block address within that // function is never referenced outside of the function. Given that, we // want to map block addresses from the old function to block addresses in // the clone. (This is different from the generic ValueMapper // implementation, which generates an invalid blockaddress when // cloning a function.) // // Note that we don't need to fix the mapping for unreachable blocks; // the default mapping there is safe. if (BB->hasAddressTaken()) { Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), const_cast<BasicBlock*>(BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); } bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; // Loop over all instructions, and copy them over, DCE'ing as we go. This // loop doesn't include the terminator. for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { Instruction *NewInst = II->clone(); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. if (!isa<PHINode>(NewInst)) { RemapInstruction(NewInst, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into // the basic block. if (Value *V = SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. if (Value *MappedV = VMap.lookup(V)) V = MappedV; VMap[&*II] = V; delete NewInst; continue; } } if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); VMap[&*II] = NewInst; // Add instruction map to value. NewBB->getInstList().push_back(NewInst); hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (CodeInfo) if (auto CS = ImmutableCallSite(&*II)) if (CS.hasOperandBundles()) CodeInfo->OperandBundleCallSites.push_back(NewInst); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (isa<ConstantInt>(AI->getArraySize())) hasStaticAllocas = true; else hasDynamicAllocas = true; } } // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { if (BI->isConditional()) { // If the condition was a known constant in the callee... ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); // Or is a known constant in the caller... if (!Cond) { Value *V = VMap.lookup(BI->getCondition()); Cond = dyn_cast_or_null<ConstantInt>(V); } // Constant fold to uncond branch! if (Cond) { BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } } } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); if (!Cond) { // Or known constant after constant prop in the callee... Value *V = VMap.lookup(SI->getCondition()); Cond = dyn_cast_or_null<ConstantInt>(V); } if (Cond) { // Constant fold to uncond branch! SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond); BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } } if (!TerminatorDone) { Instruction *NewInst = OldTI->clone(); if (OldTI->hasName()) NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); VMap[OldTI] = NewInst; // Add instruction map to value. if (CodeInfo) if (auto CS = ImmutableCallSite(OldTI)) if (CS.hasOperandBundles()) CodeInfo->OperandBundleCallSites.push_back(NewInst); // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); for (const BasicBlock *Succ : TI->successors()) ToClone.push_back(Succ); } if (CodeInfo) { CodeInfo->ContainsCalls |= hasCalls; CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } }
void StatsTracker::computeReachableUncovered() { KModule *km = executor.kmodule; Module *m = km->module; static bool init = true; const InstructionInfoTable &infos = *km->infos; StatisticManager &sm = *theStatisticManager; if (init) { init = false; // Compute call targets. It would be nice to use alias information // instead of assuming all indirect calls hit all escaping // functions, eh? for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); bbIt != bb_ie; ++bbIt) { for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); it != ie; ++it) { if (isa<CallInst>(it) || isa<InvokeInst>(it)) { CallSite cs(it); if (isa<InlineAsm>(cs.getCalledValue())) { // We can never call through here so assume no targets // (which should be correct anyhow). callTargets.insert(std::make_pair(it, std::vector<Function*>())); } else if (Function *target = getDirectCallTarget(cs)) { callTargets[it].push_back(target); } else { callTargets[it] = std::vector<Function*>(km->escapingFunctions.begin(), km->escapingFunctions.end()); } } } } } // Compute function callers as reflexion of callTargets. for (calltargets_ty::iterator it = callTargets.begin(), ie = callTargets.end(); it != ie; ++it) for (std::vector<Function*>::iterator fit = it->second.begin(), fie = it->second.end(); fit != fie; ++fit) functionCallers[*fit].push_back(it->first); // Initialize minDistToReturn to shortest paths through // functions. 0 is unreachable. std::vector<Instruction *> instructions; for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { if (fnIt->isDeclaration()) { if (fnIt->doesNotReturn()) { functionShortestPath[fnIt] = 0; } else { functionShortestPath[fnIt] = 1; // whatever } } else { functionShortestPath[fnIt] = 0; } // Not sure if I should bother to preorder here. XXX I should. for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); bbIt != bb_ie; ++bbIt) { for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); it != ie; ++it) { instructions.push_back(it); unsigned id = infos.getInfo(it).id; sm.setIndexedValue(stats::minDistToReturn, id, isa<ReturnInst>(it) #if LLVM_VERSION_CODE < LLVM_VERSION(3, 1) || isa<UnwindInst>(it) #endif ); } } } std::reverse(instructions.begin(), instructions.end()); // I'm so lazy it's not even worklisted. bool changed; do { changed = false; for (std::vector<Instruction*>::iterator it = instructions.begin(), ie = instructions.end(); it != ie; ++it) { Instruction *inst = *it; unsigned bestThrough = 0; if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) { std::vector<Function*> &targets = callTargets[inst]; for (std::vector<Function*>::iterator fnIt = targets.begin(), ie = targets.end(); fnIt != ie; ++fnIt) { uint64_t dist = functionShortestPath[*fnIt]; if (dist) { dist = 1+dist; // count instruction itself if (bestThrough==0 || dist<bestThrough) bestThrough = dist; } } } else { bestThrough = 1; } if (bestThrough) { unsigned id = infos.getInfo(*it).id; uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToReturn, id); std::vector<Instruction*> succs = getSuccs(*it); for (std::vector<Instruction*>::iterator it2 = succs.begin(), ie = succs.end(); it2 != ie; ++it2) { uint64_t dist = sm.getIndexedValue(stats::minDistToReturn, infos.getInfo(*it2).id); if (dist) { uint64_t val = bestThrough + dist; if (best==0 || val<best) best = val; } } // there's a corner case here when a function only includes a single // instruction (a ret). in that case, we MUST update // functionShortestPath, or it will remain 0 (erroneously indicating // that no return instructions are reachable) Function *f = inst->getParent()->getParent(); if (best != cur || (inst == f->begin()->begin() && functionShortestPath[f] != best)) { sm.setIndexedValue(stats::minDistToReturn, id, best); changed = true; // Update shortest path if this is the entry point. if (inst==f->begin()->begin()) functionShortestPath[f] = best; } } } } while (changed); } // compute minDistToUncovered, 0 is unreachable std::vector<Instruction *> instructions; for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { // Not sure if I should bother to preorder here. for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); bbIt != bb_ie; ++bbIt) { for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); it != ie; ++it) { unsigned id = infos.getInfo(it).id; instructions.push_back(&*it); sm.setIndexedValue(stats::minDistToUncovered, id, sm.getIndexedValue(stats::uncoveredInstructions, id)); } } } std::reverse(instructions.begin(), instructions.end()); // I'm so lazy it's not even worklisted. bool changed; do { changed = false; for (std::vector<Instruction*>::iterator it = instructions.begin(), ie = instructions.end(); it != ie; ++it) { Instruction *inst = *it; uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToUncovered, infos.getInfo(inst).id); unsigned bestThrough = 0; if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) { std::vector<Function*> &targets = callTargets[inst]; for (std::vector<Function*>::iterator fnIt = targets.begin(), ie = targets.end(); fnIt != ie; ++fnIt) { uint64_t dist = functionShortestPath[*fnIt]; if (dist) { dist = 1+dist; // count instruction itself if (bestThrough==0 || dist<bestThrough) bestThrough = dist; } if (!(*fnIt)->isDeclaration()) { uint64_t calleeDist = sm.getIndexedValue(stats::minDistToUncovered, infos.getFunctionInfo(*fnIt).id); if (calleeDist) { calleeDist = 1+calleeDist; // count instruction itself if (best==0 || calleeDist<best) best = calleeDist; } } } } else { bestThrough = 1; } if (bestThrough) { std::vector<Instruction*> succs = getSuccs(inst); for (std::vector<Instruction*>::iterator it2 = succs.begin(), ie = succs.end(); it2 != ie; ++it2) { uint64_t dist = sm.getIndexedValue(stats::minDistToUncovered, infos.getInfo(*it2).id); if (dist) { uint64_t val = bestThrough + dist; if (best==0 || val<best) best = val; } } } if (best != cur) { sm.setIndexedValue(stats::minDistToUncovered, infos.getInfo(inst).id, best); changed = true; } } } while (changed); for (std::set<ExecutionState*>::iterator it = executor.states.begin(), ie = executor.states.end(); it != ie; ++it) { ExecutionState *es = *it; uint64_t currentFrameMinDist = 0; #if MULTITHREAD for (Thread::stack_ty::iterator sfIt = es->stack().begin(), sf_ie = es->stack().end(); sfIt != sf_ie; ++sfIt) { Thread::stack_ty::iterator next = sfIt + 1; KInstIterator kii; if (next==es->stack().end()) { kii = es->pc(); #else for (ExecutionState::stack_ty::iterator sfIt = es->stack.begin(), sf_ie = es->stack.end(); sfIt != sf_ie; ++sfIt) { ExecutionState::stack_ty::iterator next = sfIt + 1; KInstIterator kii; if (next==es->stack.end()) { kii = es->pc; #endif } else { kii = next->caller; ++kii; } sfIt->minDistToUncoveredOnReturn = currentFrameMinDist; currentFrameMinDist = computeMinDistToUncovered(kii, currentFrameMinDist); } } }
/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) SCCNodes.insert((*I)->getFunction()); // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. bool ReadsMemory = false; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (F == 0) // External node - may write memory. Just give up. return false; AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F); if (MRB == AliasAnalysis::DoesNotAccessMemory) // Already perfect! continue; // Definitions with weak linkage may be overridden at linktime with // something that writes memory, so treat them like declarations. if (F->isDeclaration() || F->mayBeOverridden()) { if (!AliasAnalysis::onlyReadsMemory(MRB)) // May write memory. Just give up. return false; ReadsMemory = true; continue; } // Scan the function body for instructions that may read or write memory. for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { Instruction *I = &*II; // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. CallSite CS(cast<Value>(I)); if (CS) { // Ignore calls to functions in the same SCC. if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) continue; AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS); // If the call doesn't access arbitrary memory, we may be able to // figure out something. if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { // If the call does access argument pointees, check each argument. if (AliasAnalysis::doesAccessArgPointees(MRB)) // Check whether all pointer arguments point to local memory, and // ignore calls that only access local memory. for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI) { Value *Arg = *CI; if (Arg->getType()->isPointerTy()) { AliasAnalysis::Location Loc(Arg, AliasAnalysis::UnknownSize, I->getMetadata(LLVMContext::MD_tbaa)); if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) { if (MRB & AliasAnalysis::Mod) // Writes non-local memory. Give up. return false; if (MRB & AliasAnalysis::Ref) // Ok, it reads non-local memory. ReadsMemory = true; } } } continue; } // The call could access any memory. If that includes writes, give up. if (MRB & AliasAnalysis::Mod) return false; // If it reads, note it. if (MRB & AliasAnalysis::Ref) ReadsMemory = true; continue; } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { // Ignore non-volatile loads from local memory. (Atomic is okay here.) if (!LI->isVolatile()) { AliasAnalysis::Location Loc = AA->getLocation(LI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // Ignore non-volatile stores to local memory. (Atomic is okay here.) if (!SI->isVolatile()) { AliasAnalysis::Location Loc = AA->getLocation(SI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { // Ignore vaargs on local memory. AliasAnalysis::Location Loc = AA->getLocation(VI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } // Any remaining instructions need to be taken seriously! Check if they // read or write memory. if (I->mayWriteToMemory()) // Writes memory. Just give up. return false; // If this instruction may read memory, remember that. ReadsMemory |= I->mayReadFromMemory(); } } // Success! Functions in this SCC do not access memory, or only read memory. // Give them the appropriate attribute. bool MadeChange = false; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (F->doesNotAccessMemory()) // Already perfect! continue; if (F->onlyReadsMemory() && ReadsMemory) // No change. continue; MadeChange = true; // Clear out any existing attributes. AttrBuilder B; B.addAttribute(Attributes::ReadOnly) .addAttribute(Attributes::ReadNone); F->removeAttribute(AttributeSet::FunctionIndex, Attributes::get(F->getContext(), B)); // Add in the new attribute. B.clear(); B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone); F->addAttribute(AttributeSet::FunctionIndex, Attributes::get(F->getContext(), B)); if (ReadsMemory) ++NumReadOnly; else ++NumReadNone; } return MadeChange; }
MapVector<Instruction *, uint64_t> llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI) { // DemandedBits will give us every value's live-out bits. But we want // to ensure no extra casts would need to be inserted, so every DAG // of connected values must have the same minimum bitwidth. EquivalenceClasses<Value *> ECs; SmallVector<Value *, 16> Worklist; SmallPtrSet<Value *, 4> Roots; SmallPtrSet<Value *, 16> Visited; DenseMap<Value *, uint64_t> DBits; SmallPtrSet<Instruction *, 4> InstructionSet; MapVector<Instruction *, uint64_t> MinBWs; // Determine the roots. We work bottom-up, from truncs or icmps. bool SeenExtFromIllegalType = false; for (auto *BB : Blocks) for (auto &I : *BB) { InstructionSet.insert(&I); if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) && !TTI->isTypeLegal(I.getOperand(0)->getType())) SeenExtFromIllegalType = true; // Only deal with non-vector integers up to 64-bits wide. if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) && !I.getType()->isVectorTy() && I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) { // Don't make work for ourselves. If we know the loaded type is legal, // don't add it to the worklist. if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType())) continue; Worklist.push_back(&I); Roots.insert(&I); } } // Early exit. if (Worklist.empty() || (TTI && !SeenExtFromIllegalType)) return MinBWs; // Now proceed breadth-first, unioning values together. while (!Worklist.empty()) { Value *Val = Worklist.pop_back_val(); Value *Leader = ECs.getOrInsertLeaderValue(Val); if (Visited.count(Val)) continue; Visited.insert(Val); // Non-instructions terminate a chain successfully. if (!isa<Instruction>(Val)) continue; Instruction *I = cast<Instruction>(Val); // If we encounter a type that is larger than 64 bits, we can't represent // it so bail out. if (DB.getDemandedBits(I).getBitWidth() > 64) return MapVector<Instruction *, uint64_t>(); uint64_t V = DB.getDemandedBits(I).getZExtValue(); DBits[Leader] |= V; DBits[I] = V; // Casts, loads and instructions outside of our range terminate a chain // successfully. if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) || !InstructionSet.count(I)) continue; // Unsafe casts terminate a chain unsuccessfully. We can't do anything // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to // transform anything that relies on them. if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) || !I->getType()->isIntegerTy()) { DBits[Leader] |= ~0ULL; continue; } // We don't modify the types of PHIs. Reductions will already have been // truncated if possible, and inductions' sizes will have been chosen by // indvars. if (isa<PHINode>(I)) continue; if (DBits[Leader] == ~0ULL) // All bits demanded, no point continuing. continue; for (Value *O : cast<User>(I)->operands()) { ECs.unionSets(Leader, O); Worklist.push_back(O); } } // Now we've discovered all values, walk them to see if there are // any users we didn't see. If there are, we can't optimize that // chain. for (auto &I : DBits) for (auto *U : I.first->users()) if (U->getType()->isIntegerTy() && DBits.count(U) == 0) DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL; for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) { uint64_t LeaderDemandedBits = 0; for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) LeaderDemandedBits |= DBits[*MI]; uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) - llvm::countLeadingZeros(LeaderDemandedBits); // Round up to a power of 2 if (!isPowerOf2_64((uint64_t)MinBW)) MinBW = NextPowerOf2(MinBW); // We don't modify the types of PHIs. Reductions will already have been // truncated if possible, and inductions' sizes will have been chosen by // indvars. // If we are required to shrink a PHI, abandon this entire equivalence class. bool Abort = false; for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) { Abort = true; break; } if (Abort) continue; for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) { if (!isa<Instruction>(*MI)) continue; Type *Ty = (*MI)->getType(); if (Roots.count(*MI)) Ty = cast<Instruction>(*MI)->getOperand(0)->getType(); if (MinBW < Ty->getScalarSizeInBits()) MinBWs[cast<Instruction>(*MI)] = MinBW; } } return MinBWs; }
// Analyze interleaved accesses and collect them into interleaved load and // store groups. // // When generating code for an interleaved load group, we effectively hoist all // loads in the group to the location of the first load in program order. When // generating code for an interleaved store group, we sink all stores to the // location of the last store. This code motion can change the order of load // and store instructions and may break dependences. // // The code generation strategy mentioned above ensures that we won't violate // any write-after-read (WAR) dependences. // // E.g., for the WAR dependence: a = A[i]; // (1) // A[i] = b; // (2) // // The store group of (2) is always inserted at or below (2), and the load // group of (1) is always inserted at or above (1). Thus, the instructions will // never be reordered. All other dependences are checked to ensure the // correctness of the instruction reordering. // // The algorithm visits all memory accesses in the loop in bottom-up program // order. Program order is established by traversing the blocks in the loop in // reverse postorder when collecting the accesses. // // We visit the memory accesses in bottom-up order because it can simplify the // construction of store groups in the presence of write-after-write (WAW) // dependences. // // E.g., for the WAW dependence: A[i] = a; // (1) // A[i] = b; // (2) // A[i + 1] = c; // (3) // // We will first create a store group with (3) and (2). (1) can't be added to // this group because it and (2) are dependent. However, (1) can be grouped // with other accesses that may precede it in program order. Note that a // bottom-up order does not imply that WAW dependences should not be checked. void InterleavedAccessInfo::analyzeInterleaving( bool EnablePredicatedInterleavedMemAccesses) { LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); const ValueToValueMap &Strides = LAI->getSymbolicStrides(); // Holds all accesses with a constant stride. MapVector<Instruction *, StrideDescriptor> AccessStrideInfo; collectConstStrideAccesses(AccessStrideInfo, Strides); if (AccessStrideInfo.empty()) return; // Collect the dependences in the loop. collectDependences(); // Holds all interleaved store groups temporarily. SmallSetVector<InterleaveGroup *, 4> StoreGroups; // Holds all interleaved load groups temporarily. SmallSetVector<InterleaveGroup *, 4> LoadGroups; // Search in bottom-up program order for pairs of accesses (A and B) that can // form interleaved load or store groups. In the algorithm below, access A // precedes access B in program order. We initialize a group for B in the // outer loop of the algorithm, and then in the inner loop, we attempt to // insert each A into B's group if: // // 1. A and B have the same stride, // 2. A and B have the same memory object size, and // 3. A belongs in B's group according to its distance from B. // // Special care is taken to ensure group formation will not break any // dependences. for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend(); BI != E; ++BI) { Instruction *B = BI->first; StrideDescriptor DesB = BI->second; // Initialize a group for B if it has an allowable stride. Even if we don't // create a group for B, we continue with the bottom-up algorithm to ensure // we don't break any of B's dependences. InterleaveGroup *Group = nullptr; if (isStrided(DesB.Stride) && (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { Group = getInterleaveGroup(B); if (!Group) { LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); } if (B->mayWriteToMemory()) StoreGroups.insert(Group); else LoadGroups.insert(Group); } for (auto AI = std::next(BI); AI != E; ++AI) { Instruction *A = AI->first; StrideDescriptor DesA = AI->second; // Our code motion strategy implies that we can't have dependences // between accesses in an interleaved group and other accesses located // between the first and last member of the group. Note that this also // means that a group can't have more than one member at a given offset. // The accesses in a group can have dependences with other accesses, but // we must ensure we don't extend the boundaries of the group such that // we encompass those dependent accesses. // // For example, assume we have the sequence of accesses shown below in a // stride-2 loop: // // (1, 2) is a group | A[i] = a; // (1) // | A[i-1] = b; // (2) | // A[i-3] = c; // (3) // A[i] = d; // (4) | (2, 4) is not a group // // Because accesses (2) and (3) are dependent, we can group (2) with (1) // but not with (4). If we did, the dependent access (3) would be within // the boundaries of the (2, 4) group. if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) { // If a dependence exists and A is already in a group, we know that A // must be a store since A precedes B and WAR dependences are allowed. // Thus, A would be sunk below B. We release A's group to prevent this // illegal code motion. A will then be free to form another group with // instructions that precede it. if (isInterleaved(A)) { InterleaveGroup *StoreGroup = getInterleaveGroup(A); StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } // If a dependence exists and A is not already in a group (or it was // and we just released it), B might be hoisted above A (if B is a // load) or another store might be sunk below A (if B is a store). In // either case, we can't add additional instructions to B's group. B // will only form a group with instructions that it precedes. break; } // At this point, we've checked for illegal code motion. If either A or B // isn't strided, there's nothing left to do. if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride)) continue; // Ignore A if it's already in a group or isn't the same kind of memory // operation as B. // Note that mayReadFromMemory() isn't mutually exclusive to // mayWriteToMemory in the case of atomic loads. We shouldn't see those // here, canVectorizeMemory() should have returned false - except for the // case we asked for optimization remarks. if (isInterleaved(A) || (A->mayReadFromMemory() != B->mayReadFromMemory()) || (A->mayWriteToMemory() != B->mayWriteToMemory())) continue; // Check rules 1 and 2. Ignore A if its stride or size is different from // that of B. if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size) continue; // Ignore A if the memory object of A and B don't belong to the same // address space if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B)) continue; // Calculate the distance from A to B. const SCEVConstant *DistToB = dyn_cast<SCEVConstant>( PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); if (!DistToB) continue; int64_t DistanceToB = DistToB->getAPInt().getSExtValue(); // Check rule 3. Ignore A if its distance to B is not a multiple of the // size. if (DistanceToB % static_cast<int64_t>(DesB.Size)) continue; // All members of a predicated interleave-group must have the same predicate, // and currently must reside in the same BB. BasicBlock *BlockA = A->getParent(); BasicBlock *BlockB = B->getParent(); if ((isPredicated(BlockA) || isPredicated(BlockB)) && (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB)) continue; // The index of A is the index of B plus A's distance to B in multiples // of the size. int IndexA = Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size); // Try to insert A into B's group. if (Group->insertMember(A, IndexA, DesA.Align)) { LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' << " into the interleave group with" << *B << '\n'); InterleaveGroupMap[A] = Group; // Set the first load in program order as the insert position. if (A->mayReadFromMemory()) Group->setInsertPos(A); } } // Iteration over A accesses. } // Iteration over B accesses. // Remove interleaved store groups with gaps. for (InterleaveGroup *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved store group due " "to gaps.\n"); releaseGroup(Group); } // Remove interleaved groups with gaps (currently only loads) whose memory // accesses may wrap around. We have to revisit the getPtrStride analysis, // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does // not check wrapping (see documentation there). // FORNOW we use Assume=false; // TODO: Change to Assume=true but making sure we don't exceed the threshold // of runtime SCEV assumptions checks (thereby potentially failing to // vectorize altogether). // Additional optional optimizations: // TODO: If we are peeling the loop and we know that the first pointer doesn't // wrap then we can deduce that all pointers in the group don't wrap. // This means that we can forcefully peel the loop in order to only have to // check the first pointer for no-wrap. When we'll change to use Assume=true // we'll only need at most one runtime check per interleaved group. for (InterleaveGroup *Group : LoadGroups) { // Case 1: A full group. Can Skip the checks; For full groups, if the wide // load would wrap around the address space we would do a memory access at // nullptr even without the transformation. if (Group->getNumMembers() == Group->getFactor()) continue; // Case 2: If first and last members of the group don't wrap this implies // that all the pointers in the group don't wrap. // So we check only group member 0 (which is always guaranteed to exist), // and group member Factor - 1; If the latter doesn't exist we rely on // peeling (if it is a non-reveresed accsess -- see Case 3). Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0)); if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved group due to " "first group member potentially pointer-wrapping.\n"); releaseGroup(Group); continue; } Instruction *LastMember = Group->getMember(Group->getFactor() - 1); if (LastMember) { Value *LastMemberPtr = getLoadStorePointerOperand(LastMember); if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved group due to " "last group member potentially pointer-wrapping.\n"); releaseGroup(Group); } } else { // Case 3: A non-reversed interleaved load group with gaps: We need // to execute at least one scalar epilogue iteration. This will ensure // we don't speculatively access memory out-of-bounds. We only need // to look for a member at index factor - 1, since every group must have // a member at index zero. if (Group->isReverse()) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved group due to " "a reverse access with gaps.\n"); releaseGroup(Group); continue; } LLVM_DEBUG( dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); RequiresScalarEpilogue = true; } } }
bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) { Value *AAPtr; const SCEV *AlignSCEV, *OffSCEV; if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV)) return false; const SCEV *AASCEV = SE->getSCEV(AAPtr); // Apply the assumption to all other users of the specified pointer. SmallPtrSet<Instruction *, 32> Visited; SmallVector<Instruction*, 16> WorkList; for (User *J : AAPtr->users()) { if (J == ACall) continue; if (Instruction *K = dyn_cast<Instruction>(J)) if (isValidAssumeForContext(ACall, K, DL, DT)) WorkList.push_back(K); } while (!WorkList.empty()) { Instruction *J = WorkList.pop_back_val(); if (LoadInst *LI = dyn_cast<LoadInst>(J)) { unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, LI->getPointerOperand(), SE); if (NewAlignment > LI->getAlignment()) { LI->setAlignment(NewAlignment); ++NumLoadAlignChanged; } } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) { unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, SI->getPointerOperand(), SE); if (NewAlignment > SI->getAlignment()) { SI->setAlignment(NewAlignment); ++NumStoreAlignChanged; } } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) { unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE); // For memory transfers, we need a common alignment for both the // source and destination. If we have a new alignment for this // instruction, but only for one operand, save it. If we reach the // other operand through another assumption later, then we may // change the alignment at that point. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MTI->getSource(), SE); DenseMap<MemTransferInst *, unsigned>::iterator DI = NewDestAlignments.find(MTI); unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ? 0 : DI->second; DenseMap<MemTransferInst *, unsigned>::iterator SI = NewSrcAlignments.find(MTI); unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ? 0 : SI->second; DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " << AltDestAlignment << " " << NewSrcAlignment << " " << AltSrcAlignment << "\n"); // Of these four alignments, pick the largest possible... unsigned NewAlignment = 0; if (NewDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment)) NewAlignment = std::max(NewAlignment, NewDestAlignment); if (AltDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment)) NewAlignment = std::max(NewAlignment, AltDestAlignment); if (NewSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment)) NewAlignment = std::max(NewAlignment, NewSrcAlignment); if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment)) NewAlignment = std::max(NewAlignment, AltSrcAlignment); if (NewAlignment > MI->getAlignment()) { MI->setAlignment(ConstantInt::get(Type::getInt32Ty( MI->getParent()->getContext()), NewAlignment)); ++NumMemIntAlignChanged; } NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment)); NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment)); } else if (NewDestAlignment > MI->getAlignment()) { assert((!isa<MemIntrinsic>(MI) || isa<MemSetInst>(MI)) && "Unknown memory intrinsic"); MI->setAlignment(ConstantInt::get(Type::getInt32Ty( MI->getParent()->getContext()), NewDestAlignment)); ++NumMemIntAlignChanged; } } // Now that we've updated that use of the pointer, look for other uses of // the pointer to update. Visited.insert(J); for (User *UJ : J->users()) { Instruction *K = cast<Instruction>(UJ); if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT)) WorkList.push_back(K); } } return true; }
virtual bool runOnModule(Module &M) { LLVMContext &C = M.getContext(); Function *printError_func = (Function*)M.getOrInsertFunction("printErrorMessage", Type::getVoidTy(C), NULL); BasicBlock* entryBlock = BasicBlock::Create(C, "", printError_func); IRBuilder<> builder(entryBlock); Constant *msg = ConstantArray::get(C, "ERROR! Array Index Out of Bounds", true); Constant *zero_32 = Constant::getNullValue(IntegerType::getInt32Ty(C)); Constant *gep_params[] = {zero_32, zero_32}; GlobalVariable *errorMsg = new GlobalVariable(M, msg->getType(), true, GlobalValue::InternalLinkage, msg, "errorMsg"); Function *puts_func = (Function*)(M.getOrInsertFunction("puts", IntegerType::getInt32Ty(C), PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL)); Constant *msgptr = ConstantExpr::getGetElementPtr(errorMsg, gep_params); Value *puts_params[] = {msgptr}; CallInst *puts_call = builder.CreateCall(puts_func, puts_params); puts_call->setTailCall(false); Function *exit_func = cast<Function>(M.getOrInsertFunction("exit", IntegerType::getVoidTy(C), Type::getInt32Ty(C),NULL)); Value *exit_val = ConstantInt::get(IntegerType::getInt32Ty(C), 1); //create exit block. This block prints the error and calls exit system function BasicBlock* exitBlock = BasicBlock::Create(C, "exitBlock", printError_func); builder.CreateBr(exitBlock); builder.SetInsertPoint(exitBlock); builder.CreateCall(exit_func,exit_val); builder.CreateBr(exitBlock); int checksInserted = 0; for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) { //leave func defs alone if (!MI->isDeclaration()) { for (inst_iterator I = inst_begin(*MI), E = inst_end(*MI); I != E; ++I) { Instruction *inst = &*I; if(GetElementPtrInst *gep = dyn_cast<GetElementPtrInst>(inst)) { if (const ArrayType *ar = dyn_cast<ArrayType>(gep->getPointerOperandType()->getElementType())) { //increment checks inserted counter checksInserted++; //create split in basic block for function call insertion (branch) Instruction* next = inst->getNextNode(); BasicBlock* oldBlock = inst->getParent(); BasicBlock* newBlock = SplitBlock(oldBlock, next, this); //get upper limit and index used unsigned upperLim = ar->getNumElements(); int index = gep->getNumOperands() - 1; Value *vIndexUsed = gep->getOperand(index); Value *vUpperLimit = ConstantInt::get(vIndexUsed->getType(), upperLim); BasicBlock* checkUpperBlock = BasicBlock::Create(C, "checkUpperBlock", MI, newBlock); BasicBlock* checkLowerBlock = BasicBlock::Create(C, "checkLowerBlock", MI, checkUpperBlock); builder.SetInsertPoint(oldBlock); //remove old terminator TerminatorInst* term = oldBlock->getTerminator(); term->eraseFromParent(); //insert new one builder.CreateBr(checkUpperBlock); //configure uppper bound test builder.SetInsertPoint(checkUpperBlock); Value* condUpperInst = builder.CreateICmpSLT(vIndexUsed, vUpperLimit, "checkUpperBounds"); BasicBlock* errorBlock = BasicBlock::Create(C, "errorBlock", MI, newBlock); builder.CreateCondBr(condUpperInst, checkLowerBlock, errorBlock); //configure lower bound test builder.SetInsertPoint(checkLowerBlock); Value *vLowerLimit = ConstantInt::get(vIndexUsed->getType(), -1); Value *condLowerInst = builder.CreateICmpSGT(vIndexUsed, vLowerLimit, "checkLowerBounds"); builder.CreateCondBr(condLowerInst, newBlock, errorBlock); //setup error block. All this block does is call func to print error and exit builder.SetInsertPoint(errorBlock); builder.CreateCall(printError_func); builder.CreateBr(errorBlock); } } } } } errs() << "This pass has inserted " << checksInserted << " checks\n"; return true; }
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst *, 16> Returns; SmallVector<InvokeInst *, 16> Invokes; SmallSetVector<LandingPadInst *, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { if (Function *Callee = II->getCalledFunction()) if (Callee->isIntrinsic() && Callee->getIntrinsicID() == Intrinsic::donothing) { // Remove the NOP invoke. BranchInst::Create(II->getNormalDest(), II); II->eraseFromParent(); continue; } Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Returns.push_back(RI); } if (Invokes.empty()) return false; NumInvokes += Invokes.size(); lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); Val = Builder.CreateCall(StackAddrFn, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); // Store a pointer to the function context so that the back-end will know // where to look for it. Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); Builder.CreateCall(FuncCtxFn, FuncCtxArg); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); } // Mark call instructions that aren't nounwind as no-action (call_site == // -1). Skip the entry block, as prior to then, no function context has been // created for this function and any unexpected exceptions thrown will go // directly to the caller's context, which is what we want anyway, so no need // to do anything here. for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (!CI->doesNotThrow()) insertCallSiteStore(CI, -1); } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { insertCallSiteStore(RI, -1); } // Register the function context and make sure it's known to not throw CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the // jmpbuf to the new value. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (BB == F.begin()) continue; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast<CallInst>(I)) { if (CI->getCalledFunction() != StackRestoreFn) continue; } else if (!isa<AllocaInst>(I)) { continue; } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); StackAddr->insertAfter(I); Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); StoreStackAddr->insertAfter(StackAddr); } } // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned I = 0, E = Returns.size(); I != E; ++I) CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]); return true; }
static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, const SimplifyQuery &SQ) { bool FnChanged = false; // Visiting in a pre-order depth-first traversal causes us to simplify early // blocks before querying later blocks (which require us to analyze early // blocks). Eagerly simplifying shallow blocks means there is strictly less // work to do for deep blocks. This also means we don't visit unreachable // blocks. for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { bool BBChanged = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *II = &*BI++; switch (II->getOpcode()) { case Instruction::Select: BBChanged |= processSelect(cast<SelectInst>(II), LVI); break; case Instruction::PHI: BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ); break; case Instruction::ICmp: case Instruction::FCmp: BBChanged |= processCmp(cast<CmpInst>(II), LVI); break; case Instruction::Load: case Instruction::Store: BBChanged |= processMemAccess(II, LVI); break; case Instruction::Call: case Instruction::Invoke: BBChanged |= processCallSite(CallSite(II), LVI); break; case Instruction::SRem: BBChanged |= processSRem(cast<BinaryOperator>(II), LVI); break; case Instruction::SDiv: BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI); break; case Instruction::UDiv: case Instruction::URem: BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI); break; case Instruction::AShr: BBChanged |= processAShr(cast<BinaryOperator>(II), LVI); break; case Instruction::Add: BBChanged |= processAdd(cast<BinaryOperator>(II), LVI); break; } } Instruction *Term = BB->getTerminator(); switch (Term->getOpcode()) { case Instruction::Switch: BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI, DT); break; case Instruction::Ret: { auto *RI = cast<ReturnInst>(Term); // Try to determine the return value if we can. This is mainly here to // simplify the writing of unit tests, but also helps to enable IPO by // constant folding the return values of callees. auto *RetVal = RI->getReturnValue(); if (!RetVal) break; // handle "ret void" if (isa<Constant>(RetVal)) break; // nothing to do if (auto *C = getConstantAt(RetVal, RI, LVI)) { ++NumReturns; RI->replaceUsesOfWith(RetVal, C); BBChanged = true; } } } FnChanged |= BBChanged; } return FnChanged; }
Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AliasAnalysis *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; const DataLayout &DL = ScanBB->getModule()->getDataLayout(); // Try to get the store size for the type. auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); Value *StrippedPtr = Ptr->stripPointerCasts(); while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they // would affect codegen). Instruction *Inst = &*--ScanFrom; if (isa<DbgInfoIntrinsic>(Inst)) continue; // Restore ScanFrom to expected value in case next test succeeds ScanFrom++; if (NumScanedInst) ++(*NumScanedInst); // Don't scan huge blocks. if (MaxInstsToScan-- == 0) return nullptr; --ScanFrom; // If this is a load of Ptr, the loaded value is available. // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) if (AreEquivalentAddressValues( LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) && CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) { // We can value forward from an atomic to a non-atomic, but not the // other way around. if (LI->isAtomic() < AtLeastAtomic) return nullptr; if (IsLoadCSE) *IsLoadCSE = true; return LI; } if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); // If this is a store through Ptr, the value is available! // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) if (AreEquivalentAddressValues(StorePtr, StrippedPtr) && CastInst::isBitOrNoopPointerCastable(SI->getValueOperand()->getType(), AccessTy, DL)) { // We can value forward from an atomic to a non-atomic, but not the // other way around. if (SI->isAtomic() < AtLeastAtomic) return nullptr; if (IsLoadCSE) *IsLoadCSE = false; return SI->getOperand(0); } // If both StrippedPtr and StorePtr reach all the way to an alloca or // global and they are different, ignore the store. This is a trivial form // of alias analysis that is important for reg2mem'd code. if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) && (isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)) && StrippedPtr != StorePtr) continue; // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. if (AA && !isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) continue; // Otherwise the store that may or may not alias the pointer, bail out. ++ScanFrom; return nullptr; } // If this is some other instruction that may clobber Ptr, bail out. if (Inst->mayWriteToMemory()) { // If alias analysis claims that it really won't modify the load, // ignore it. if (AA && !isModSet(AA->getModRefInfo(Inst, StrippedPtr, AccessSize))) continue; // May modify the pointer, bail out. ++ScanFrom; return nullptr; } } // Got to the start of the block, we didn't find it, but are done for this // block. return nullptr; }
/// tryMergingIntoMemset - When scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consecutive ones, it /// attempts to merge them together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { if (TD == 0) return 0; // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(*TD); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) break; continue; } if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); } else { MemSetInst *MSI = cast<MemSetInst>(BI); if (MSI->isVolatile() || ByteVal != MSI->getValue() || !isa<ConstantInt>(MSI->getLength())) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) break; Ranges.addMemSet(Offset, MSI); } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) return 0; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. Ranges.addInst(0, StartInst); // If we create any memsets, we put it right before the first instruction that // isn't part of the memset block. This ensure that the memset is dominated // by any addressing instruction needed by the start of the block. IRBuilder<> Builder(BI); // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. Instruction *AMemSet = 0; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(*TD)) continue; // Otherwise, we do want to transform this! Create a new memset. // Get the starting pointer of the block. StartPtr = Range.StartPtr; // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); // Zap all the stores. for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { MD->removeInstruction(*SI); (*SI)->eraseFromParent(); } ++NumMemSetInfer; } return AMemSet; }
// Sinks \p I from the loop \p L's preheader to its uses. Returns true if // sinking is successful. // \p LoopBlockNumber is used to sort the insertion blocks to ensure // determinism. static bool sinkInstruction(Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI) { // Compute the set of blocks in loop L which contain a use of I. SmallPtrSet<BasicBlock *, 2> BBs; for (auto &U : I.uses()) { Instruction *UI = cast<Instruction>(U.getUser()); // We cannot sink I to PHI-uses. if (dyn_cast<PHINode>(UI)) return false; // We cannot sink I if it has uses outside of the loop. if (!L.contains(LI.getLoopFor(UI->getParent()))) return false; BBs.insert(UI->getParent()); } // findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max // BBs.size() to avoid expensive computation. // FIXME: Handle code size growth for min_size and opt_size. if (BBs.size() > MaxNumberOfUseBBsForSinking) return false; // Find the set of BBs that we should insert a copy of I. SmallPtrSet<BasicBlock *, 2> BBsToSinkInto = findBBsToSinkInto(L, BBs, ColdLoopBBs, DT, BFI); if (BBsToSinkInto.empty()) return false; // Copy the final BBs into a vector and sort them using the total ordering // of the loop block numbers as iterating the set doesn't give a useful // order. No need to stable sort as the block numbers are a total ordering. SmallVector<BasicBlock *, 2> SortedBBsToSinkInto; SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(), BBsToSinkInto.end()); llvm::sort(SortedBBsToSinkInto.begin(), SortedBBsToSinkInto.end(), [&](BasicBlock *A, BasicBlock *B) { return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second; }); BasicBlock *MoveBB = *SortedBBsToSinkInto.begin(); // FIXME: Optimize the efficiency for cloned value replacement. The current // implementation is O(SortedBBsToSinkInto.size() * I.num_uses()). for (BasicBlock *N : makeArrayRef(SortedBBsToSinkInto).drop_front(1)) { assert(LoopBlockNumber.find(N)->second > LoopBlockNumber.find(MoveBB)->second && "BBs not sorted!"); // Clone I and replace its uses. Instruction *IC = I.clone(); IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); // Replaces uses of I with IC in N for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) { Use &U = *UI++; auto *I = cast<Instruction>(U.getUser()); if (I->getParent() == N) U.set(IC); } // Replaces uses of I with IC in blocks dominated by N replaceDominatedUsesWith(&I, IC, DT, N); LLVM_DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName() << '\n'); NumLoopSunkCloned++; } LLVM_DEBUG(dbgs() << "Sinking " << I << " To: " << MoveBB->getName() << '\n'); NumLoopSunk++; I.moveBefore(&*MoveBB->getFirstInsertionPt()); return true; }
void WorklessInstrument::CloneInnerLoop(Loop * pLoop, vector<BasicBlock *> & vecAdd, ValueToValueMapTy & VMap, set<BasicBlock *> & setCloned) { Function * pFunction = pLoop->getHeader()->getParent(); BasicBlock * pPreHeader = vecAdd[0]; SmallVector<BasicBlock *, 4> ExitBlocks; pLoop->getExitBlocks(ExitBlocks); set<BasicBlock *> setExitBlocks; for(unsigned long i = 0; i < ExitBlocks.size(); i++) { setExitBlocks.insert(ExitBlocks[i]); } for(unsigned long i = 0; i < ExitBlocks.size(); i++ ) { VMap[ExitBlocks[i]] = ExitBlocks[i]; } vector<BasicBlock *> ToClone; vector<BasicBlock *> BeenCloned; //clone loop ToClone.push_back(pLoop->getHeader()); while(ToClone.size()>0) { BasicBlock * pCurrent = ToClone.back(); ToClone.pop_back(); WeakVH & BBEntry = VMap[pCurrent]; if (BBEntry) { continue; } BasicBlock * NewBB; BBEntry = NewBB = BasicBlock::Create(pCurrent->getContext(), "", pFunction); if(pCurrent->hasName()) { NewBB->setName(pCurrent->getName() + ".CPI"); } if(pCurrent->hasAddressTaken()) { errs() << "hasAddressTaken branch\n" ; exit(0); } for(BasicBlock::const_iterator II = pCurrent->begin(); II != pCurrent->end(); ++II ) { Instruction * NewInst = II->clone(); if(II->hasName()) { NewInst->setName(II->getName() + ".CPI"); } VMap[II] = NewInst; NewBB->getInstList().push_back(NewInst); } const TerminatorInst *TI = pCurrent->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { ToClone.push_back(TI->getSuccessor(i)); } setCloned.insert(NewBB); BeenCloned.push_back(NewBB); } //remap value used inside loop vector<BasicBlock *>::iterator itVecBegin = BeenCloned.begin(); vector<BasicBlock *>::iterator itVecEnd = BeenCloned.end(); for(; itVecBegin != itVecEnd; itVecBegin ++) { for(BasicBlock::iterator II = (*itVecBegin)->begin(); II != (*itVecBegin)->end(); II ++ ) { //II->dump(); RemapInstruction(II, VMap); } } //add to the else if body BasicBlock * pElseBody = vecAdd[1]; BasicBlock * pClonedHeader = cast<BasicBlock>(VMap[pLoop->getHeader()]); BranchInst::Create(pClonedHeader, pElseBody); //errs() << pPreHeader->getName() << "\n"; for(BasicBlock::iterator II = pClonedHeader->begin(); II != pClonedHeader->end(); II ++ ) { if(PHINode * pPHI = dyn_cast<PHINode>(II)) { vector<int> vecToRemoved; for (unsigned i = 0, e = pPHI->getNumIncomingValues(); i != e; ++i) { if(pPHI->getIncomingBlock(i) == pPreHeader) { pPHI->setIncomingBlock(i, pElseBody); } } } } set<BasicBlock *> setProcessedBlock; for(unsigned long i = 0; i < ExitBlocks.size(); i++ ) { if(setProcessedBlock.find(ExitBlocks[i]) != setProcessedBlock.end() ) { continue; } else { setProcessedBlock.insert(ExitBlocks[i]); } for(BasicBlock::iterator II = ExitBlocks[i]->begin(); II != ExitBlocks[i]->end(); II ++ ) { if(PHINode * pPHI = dyn_cast<PHINode>(II)) { unsigned numIncomming = pPHI->getNumIncomingValues(); for(unsigned i = 0; i<numIncomming; i++) { BasicBlock * incommingBlock = pPHI->getIncomingBlock(i); if(VMap.find(incommingBlock) != VMap.end() ) { Value * incommingValue = pPHI->getIncomingValue(i); if(VMap.find(incommingValue) != VMap.end() ) { incommingValue = VMap[incommingValue]; } pPHI->addIncoming(incommingValue, cast<BasicBlock>(VMap[incommingBlock])); } } } } } }
Value* ModuloSchedulerDriverPass::copyLoopBodyToHeader(Instruction* inst, Instruction* induction, BasicBlock* header, int offset){ // Holds the body of the interesting loop BasicBlock *body = inst->getParent(); assert(header && "Header is null"); assert(header->getTerminator() && "Header has no terminator"); // Maps the old instructions to the new Instructions DenseMap<const Value *, Value *> ValueMap; // Do the actual clone stringstream iname; iname<<"___"<<offset<<"___"; BasicBlock* newBB = CloneBasicBlock(body, ValueMap, iname.str().c_str()); // Fixing the dependencies for each of the instructions in the cloned BB // They now depend on themselves rather on the old cloned BB. for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) { for (Instruction::op_iterator ops = (it)->op_begin(); ops != (it)->op_end(); ++ops) { if (ValueMap.end() != ValueMap.find(*ops)) { //*ops = ValueMap[*ops]; it->replaceUsesOfWith(*ops, ValueMap[*ops]); } } } // Fixing the PHI nodes since they are no longer needed for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) { if (PHINode *phi = dyn_cast<PHINode>(it)) { // Taking the preheader entryfrom the PHI node Value* prevalue = phi->getIncomingValue(phi->getBasicBlockIndex(header)); assert(prevalue && "no prevalue. Don't know what to do"); // If we are handling a PHI node which is the induction index ? A[PHI(i,0)] ? // If so, turn it into A[i + offset] if (ValueMap[induction] == phi) { Instruction *add = subscripts::incrementValue(prevalue, offset); //add->insertBefore(phi); This is the same as next line (compiles on LLVM2.1) phi->getParent()->getInstList().insert(phi, add); phi->replaceAllUsesWith(add); } else { // eliminating the PHI node all together // This is just a regular variable or constant. No need to increment // the index. phi->replaceAllUsesWith(prevalue); } } } // Move all non PHI and non terminator instructions into the header. while (!newBB->getFirstNonPHI()->isTerminator()) { Instruction* inst = newBB->getFirstNonPHI(); if (dyn_cast<StoreInst>(inst)) { inst->eraseFromParent(); } else { inst->moveBefore(header->getTerminator()); } } newBB->dropAllReferences(); return ValueMap[inst]; }
/// Return true if we can evaluate the specified expression tree if the vector /// elements were shuffled in a different order. static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, unsigned Depth = 5) { // We can always reorder the elements of a constant. if (isa<Constant>(V)) return true; // We won't reorder vector arguments. No IPO here. Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; // Two users may expect different orders of the elements. Don't try it. if (!I->hasOneUse()) return false; if (Depth == 0) return false; switch (I->getOpcode()) { case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::GetElementPtr: { for (Value *Operand : I->operands()) { if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) return false; } return true; } case Instruction::InsertElement: { ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); if (!CI) return false; int ElementNumber = CI->getLimitedValue(); // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' // can't put an element into multiple indices. bool SeenOnce = false; for (int i = 0, e = Mask.size(); i != e; ++i) { if (Mask[i] == ElementNumber) { if (SeenOnce) return false; SeenOnce = true; } } return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); } } return false; }
bool ObjCARCContract::runOnFunction(Function &F) { if (!EnableARCOpts) return false; // If nothing in the Module uses ARC, don't do anything. if (!Run) return false; Changed = false; AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); PA.setAA(&getAnalysis<AliasAnalysis>()); // Track whether it's ok to mark objc_storeStrong calls with the "tail" // keyword. Be conservative if the function has variadic arguments. // It seems that functions which "return twice" are also unsafe for the // "tail" argument, because they are setjmp, which could need to // return to an earlier stack state. bool TailOkForStoreStrongs = !F.isVarArg() && !F.callsFunctionThatReturnsTwice(); // For ObjC library calls which return their argument, replace uses of the // argument with uses of the call return value, if it dominates the use. This // reduces register pressure. SmallPtrSet<Instruction *, 4> DependingInstructions; SmallPtrSet<const BasicBlock *, 4> Visited; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { Instruction *Inst = &*I++; DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); // Only these library routines return their argument. In particular, // objc_retainBlock does not necessarily return its argument. InstructionClass Class = GetBasicInstructionClass(Inst); switch (Class) { case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: break; case IC_Autorelease: case IC_AutoreleaseRV: if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) continue; break; case IC_Retain: // Attempt to convert retains to retainrvs if they are next to function // calls. if (!OptimizeRetainCall(F, Inst)) break; // If we succeed in our optimization, fall through. // FALLTHROUGH case IC_RetainRV: { // If we're compiling for a target which needs a special inline-asm // marker to do the retainAutoreleasedReturnValue optimization, // insert it now. if (!RetainRVMarker) break; BasicBlock::iterator BBI = Inst; BasicBlock *InstParent = Inst->getParent(); // Step up to see if the call immediately precedes the RetainRV call. // If it's an invoke, we have to cross a block boundary. And we have // to carefully dodge no-op instructions. do { if (&*BBI == InstParent->begin()) { BasicBlock *Pred = InstParent->getSinglePredecessor(); if (!Pred) goto decline_rv_optimization; BBI = Pred->getTerminator(); break; } --BBI; } while (IsNoopInstruction(BBI)); if (&*BBI == GetObjCArg(Inst)) { DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " "retainAutoreleasedReturnValue optimization.\n"); Changed = true; InlineAsm *IA = InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), /*isVarArg=*/false), RetainRVMarker->getString(), /*Constraints=*/"", /*hasSideEffects=*/true); CallInst::Create(IA, "", Inst); } decline_rv_optimization: break; } case IC_InitWeak: { // objc_initWeak(p, null) => *p = null CallInst *CI = cast<CallInst>(Inst); if (IsNullOrUndef(CI->getArgOperand(1))) { Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType())); Changed = true; new StoreInst(Null, CI->getArgOperand(0), CI); DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" << " New = " << *Null << "\n"); CI->replaceAllUsesWith(Null); CI->eraseFromParent(); } continue; } case IC_Release: ContractRelease(Inst, I); continue; case IC_User: // Be conservative if the function has any alloca instructions. // Technically we only care about escaping alloca instructions, // but this is sufficient to handle some interesting cases. if (isa<AllocaInst>(Inst)) TailOkForStoreStrongs = false; continue; case IC_IntrinsicUser: // Remove calls to @clang.arc.use(...). Inst->eraseFromParent(); continue; default: continue; } DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); // Don't use GetObjCArg because we don't want to look through bitcasts // and such; to do the replacement, the argument must have type i8*. const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); for (;;) { // If we're compiling bugpointed code, don't get in trouble. if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) break; // Look through the uses of the pointer. for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE; ) { Use &U = UI.getUse(); unsigned OperandNo = UI.getOperandNo(); ++UI; // Increment UI now, because we may unlink its element. // If the call's return value dominates a use of the call's argument // value, rewrite the use to use the return value. We check for // reachability here because an unreachable call is considered to // trivially dominate itself, which would lead us to rewriting its // argument in terms of its return value, which would lead to // infinite loops in GetObjCArg. if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { Changed = true; Instruction *Replacement = Inst; Type *UseTy = U.get()->getType(); if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) { // For PHI nodes, insert the bitcast in the predecessor block. unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); BasicBlock *BB = PHI->getIncomingBlock(ValNo); if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", &BB->back()); // While we're here, rewrite all edges for this PHI, rather // than just one use at a time, to minimize the number of // bitcasts we emit. for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) if (PHI->getIncomingBlock(i) == BB) { // Keep the UI iterator valid. if (&PHI->getOperandUse( PHINode::getOperandNumForIncomingValue(i)) == &UI.getUse()) ++UI; PHI->setIncomingValue(i, Replacement); } } else { if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", cast<Instruction>(U.getUser())); U.set(Replacement); } } } // If Arg is a no-op casted pointer, strip one level of casts and iterate. if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) Arg = BI->getOperand(0); else if (isa<GEPOperator>(Arg) && cast<GEPOperator>(Arg)->hasAllZeroIndices()) Arg = cast<GEPOperator>(Arg)->getPointerOperand(); else if (isa<GlobalAlias>(Arg) && !cast<GlobalAlias>(Arg)->mayBeOverridden()) Arg = cast<GlobalAlias>(Arg)->getAliasee(); else break; } } // If this function has no escaping allocas or suspicious vararg usage, // objc_storeStrong calls can be marked with the "tail" keyword. if (TailOkForStoreStrongs) for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(), E = StoreStrongCalls.end(); I != E; ++I) (*I)->setTailCall(); StoreStrongCalls.clear(); return Changed; }
Value * InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { // Mask.size() does not need to be equal to the number of vector elements. assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); if (isa<UndefValue>(V)) { return UndefValue::get(VectorType::get(V->getType()->getScalarType(), Mask.size())); } if (isa<ConstantAggregateZero>(V)) { return ConstantAggregateZero::get( VectorType::get(V->getType()->getScalarType(), Mask.size())); } if (Constant *C = dyn_cast<Constant>(V)) { SmallVector<Constant *, 16> MaskValues; for (int i = 0, e = Mask.size(); i != e; ++i) { if (Mask[i] == -1) MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); else MaskValues.push_back(Builder->getInt32(Mask[i])); } return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), ConstantVector::get(MaskValues)); } Instruction *I = cast<Instruction>(V); switch (I->getOpcode()) { case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::Select: case Instruction::GetElementPtr: { SmallVector<Value*, 8> NewOps; bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); for (int i = 0, e = I->getNumOperands(); i != e; ++i) { Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask); NewOps.push_back(V); NeedsRebuild |= (V != I->getOperand(i)); } if (NeedsRebuild) { return buildNew(I, NewOps); } return I; } case Instruction::InsertElement: { int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); // The insertelement was inserting at Element. Figure out which element // that becomes after shuffling. The answer is guaranteed to be unique // by CanEvaluateShuffled. bool Found = false; int Index = 0; for (int e = Mask.size(); Index != e; ++Index) { if (Mask[Index] == Element) { Found = true; break; } } // If element is not in Mask, no need to handle the operand 1 (element to // be inserted). Just evaluate values in operand 0 according to Mask. if (!Found) return EvaluateInDifferentElementOrder(I->getOperand(0), Mask); Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); return InsertElementInst::Create(V, I->getOperand(1), Builder->getInt32(Index), "", I); } } llvm_unreachable("failed to reorder elements of vector instruction!"); }
// Add the remainder of the reduction-variable chain to the instruction vector // (the initial PHINode has already been added). If successful, the object is // marked as valid. void LoopReroll::SimpleLoopReduction::add(Loop *L) { assert(!Valid && "Cannot add to an already-valid chain"); // The reduction variable must be a chain of single-use instructions // (including the PHI), except for the last value (which is used by the PHI // and also outside the loop). Instruction *C = Instructions.front(); do { C = cast<Instruction>(*C->use_begin()); if (C->hasOneUse()) { if (!C->isBinaryOp()) return; if (!(isa<PHINode>(Instructions.back()) || C->isSameOperationAs(Instructions.back()))) return; Instructions.push_back(C); } } while (C->hasOneUse()); if (Instructions.size() < 2 || !C->isSameOperationAs(Instructions.back()) || C->use_begin() == C->use_end()) return; // C is now the (potential) last instruction in the reduction chain. for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end(); UI != UIE; ++UI) { // The only in-loop user can be the initial PHI. if (L->contains(cast<Instruction>(*UI))) if (cast<Instruction>(*UI ) != Instructions.front()) return; } Instructions.push_back(C); Valid = true; }
bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { bool MadeChange = false; // Only prefetch in the inner-most loop if (!L->empty()) return MadeChange; SmallPtrSet<const Value *, 32> EphValues; CodeMetrics::collectEphemeralValues(L, AC, EphValues); // Calculate the number of iterations ahead to prefetch CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) { // If the loop already has prefetches, then assume that the user knows // what he or she is doing and don't add any more. for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) if (CallInst *CI = dyn_cast<CallInst>(J)) if (Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::prefetch) return MadeChange; Metrics.analyzeBasicBlock(*I, *TTI, EphValues); } unsigned LoopSize = Metrics.NumInsts; if (!LoopSize) LoopSize = 1; unsigned ItersAhead = PrefDist/LoopSize; if (!ItersAhead) ItersAhead = 1; SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads; for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) { for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { Value *PtrValue; Instruction *MemI; if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { MemI = LMemI; PtrValue = LMemI->getPointerOperand(); } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { if (!PrefetchWrites) continue; MemI = SMemI; PtrValue = SMemI->getPointerOperand(); } else continue; unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); if (PtrAddrSpace) continue; if (L->isLoopInvariant(PtrValue)) continue; const SCEV *LSCEV = SE->getSCEV(PtrValue); const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); if (!LSCEVAddRec) continue; // We don't want to double prefetch individual cache lines. If this load // is known to be within one cache line of some other load that has // already been prefetched, then don't prefetch this one as well. bool DupPref = false; for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end(); K != KE; ++K) { const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second); if (const SCEVConstant *ConstPtrDiff = dyn_cast<SCEVConstant>(PtrDiff)) { int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); if (PD < (int64_t) CacheLineSize) { DupPref = true; break; } } } if (DupPref) continue; const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr( SE->getConstant(LSCEVAddRec->getType(), ItersAhead), LSCEVAddRec->getStepRecurrence(*SE))); if (!isSafeToExpand(NextLSCEV, *SE)) continue; PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); IRBuilder<> Builder(MemI); Module *M = (*I)->getParent()->getParent(); Type *I32 = Type::getInt32Ty((*I)->getContext()); Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); Builder.CreateCall( PrefetchFunc, {PrefPtrValue, ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); MadeChange = true; } } return MadeChange; }
// Reroll the provided loop with respect to the provided induction variable. // Generally, we're looking for a loop like this: // // %iv = phi [ (preheader, ...), (body, %iv.next) ] // f(%iv) // %iv.1 = add %iv, 1 <-- a root increment // f(%iv.1) // %iv.2 = add %iv, 2 <-- a root increment // f(%iv.2) // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment // f(%iv.scale_m_1) // ... // %iv.next = add %iv, scale // %cmp = icmp(%iv, ...) // br %cmp, header, exit // // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can // be intermixed with eachother. The restriction imposed by this algorithm is // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1), // etc. be the same. // // First, we collect the use set of %iv, excluding the other increment roots. // This gives us f(%iv). Then we iterate over the loop instructions (scale-1) // times, having collected the use set of f(%iv.(i+1)), during which we: // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to // the next unmatched instruction in f(%iv.(i+1)). // - Ensure that both matched instructions don't have any external users // (with the exception of last-in-chain reduction instructions). // - Track the (aliasing) write set, and other side effects, of all // instructions that belong to future iterations that come before the matched // instructions. If the matched instructions read from that write set, then // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly, // if any of these future instructions had side effects (could not be // speculatively executed), and so do the matched instructions, when we // cannot reorder those side-effect-producing instructions, and rerolling // fails. // // Finally, we make sure that all loop instructions are either loop increment // roots, belong to simple latch code, parts of validated reductions, part of // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions // have been validated), then we reroll the loop. bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions) { const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV)); uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))-> getValue()->getZExtValue(); // The collection of loop increment instructions. SmallInstructionVector LoopIncs; uint64_t Scale = Inc; // The effective induction variable, IV, is normally also the real induction // variable. When we're dealing with a loop like: // for (int i = 0; i < 500; ++i) // x[3*i] = ...; // x[3*i+1] = ...; // x[3*i+2] = ...; // then the real IV is still i, but the effective IV is (3*i). Instruction *RealIV = IV; if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs)) return false; assert(Scale <= MaxInc && "Scale is too large"); assert(Scale > 1 && "Scale must be at least 2"); // The set of increment instructions for each increment value. SmallVector<SmallInstructionVector, 32> Roots(Scale-1); SmallInstructionSet AllRoots; if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs)) return false; DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *RealIV << "\n"); // An array of just the possible reductions for this scale factor. When we // collect the set of all users of some root instructions, these reduction // instructions are treated as 'final' (their uses are not considered). // This is important because we don't want the root use set to search down // the reduction chain. SmallInstructionSet PossibleRedSet; SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet; Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet, PossibleRedLastSet); // We now need to check for equivalence of the use graph of each root with // that of the primary induction variable (excluding the roots). Our goal // here is not to solve the full graph isomorphism problem, but rather to // catch common cases without a lot of work. As a result, we will assume // that the relative order of the instructions in each unrolled iteration // is the same (although we will not make an assumption about how the // different iterations are intermixed). Note that while the order must be // the same, the instructions may not be in the same basic block. SmallInstructionSet Exclude(AllRoots); Exclude.insert(LoopIncs.begin(), LoopIncs.end()); DenseSet<Instruction *> BaseUseSet; collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet); DenseSet<Instruction *> AllRootUses; std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1); bool MatchFailed = false; for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) { DenseSet<Instruction *> &RootUseSet = RootUseSets[i]; collectInLoopUserSet(L, Roots[i], SmallInstructionSet(), PossibleRedSet, RootUseSet); DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() << " vs. iteration increment " << (i+1) << " use set size: " << RootUseSet.size() << "\n"); if (BaseUseSet.size() != RootUseSet.size()) { MatchFailed = true; break; } // In addition to regular aliasing information, we need to look for // instructions from later (future) iterations that have side effects // preventing us from reordering them past other instructions with side // effects. bool FutureSideEffects = false; AliasSetTracker AST(*AA); // The map between instructions in f(%iv.(i+1)) and f(%iv). DenseMap<Value *, Value *> BaseMap; assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops"); for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(), JE = Header->end(); J1 != JE && !MatchFailed; ++J1) { if (cast<Instruction>(J1) == RealIV) continue; if (cast<Instruction>(J1) == IV) continue; if (!BaseUseSet.count(J1)) continue; if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs. continue; while (J2 != JE && (!RootUseSet.count(J2) || std::find(Roots[i].begin(), Roots[i].end(), J2) != Roots[i].end())) { // As we iterate through the instructions, instructions that don't // belong to previous iterations (or the base case), must belong to // future iterations. We want to track the alias set of writes from // previous iterations. if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) && !AllRootUses.count(J2)) { if (J2->mayWriteToMemory()) AST.add(J2); // Note: This is specifically guarded by a check on isa<PHINode>, // which while a valid (somewhat arbitrary) micro-optimization, is // needed because otherwise isSafeToSpeculativelyExecute returns // false on PHI nodes. if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL)) FutureSideEffects = true; } ++J2; } if (!J1->isSameOperationAs(J2)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << "\n"); MatchFailed = true; break; } // Make sure that this instruction, which is in the use set of this // root instruction, does not also belong to the base set or the set of // some previous root instruction. if (BaseUseSet.count(J2) || AllRootUses.count(J2)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (prev. case overlap)\n"); MatchFailed = true; break; } // Make sure that we don't alias with any instruction in the alias set // tracker. If we do, then we depend on a future iteration, and we // can't reroll. if (J2->mayReadFromMemory()) { for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end(); K != KE && !MatchFailed; ++K) { if (K->aliasesUnknownInst(J2, *AA)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (depends on future store)\n"); MatchFailed = true; break; } } } // If we've past an instruction from a future iteration that may have // side effects, and this instruction might also, then we can't reorder // them, and this matching fails. As an exception, we allow the alias // set tracker to handle regular (simple) load/store dependencies. if (FutureSideEffects && ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) || (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (side effects prevent reordering)\n"); MatchFailed = true; break; } // For instructions that are part of a reduction, if the operation is // associative, then don't bother matching the operands (because we // already know that the instructions are isomorphic, and the order // within the iteration does not matter). For non-associative reductions, // we do need to match the operands, because we need to reject // out-of-order instructions within an iteration! // For example (assume floating-point addition), we need to reject this: // x += a[i]; x += b[i]; // x += a[i+1]; x += b[i+1]; // x += b[i+2]; x += a[i+2]; bool InReduction = Reductions.isPairInSame(J1, J2); if (!(InReduction && J1->isAssociative())) { bool Swapped = false, SomeOpMatched = false;; for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { Value *Op2 = J2->getOperand(j); // If this is part of a reduction (and the operation is not // associatve), then we match all operands, but not those that are // part of the reduction. if (InReduction) if (Instruction *Op2I = dyn_cast<Instruction>(Op2)) if (Reductions.isPairInSame(J2, Op2I)) continue; DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2); if (BMI != BaseMap.end()) Op2 = BMI->second; else if (std::find(Roots[i].begin(), Roots[i].end(), (Instruction*) Op2) != Roots[i].end()) Op2 = IV; if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) { // If we've not already decided to swap the matched operands, and // we've not already matched our first operand (note that we could // have skipped matching the first operand because it is part of a // reduction above), and the instruction is commutative, then try // the swapped match. if (!Swapped && J1->isCommutative() && !SomeOpMatched && J1->getOperand(!j) == Op2) { Swapped = true; } else { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (operand " << j << ")\n"); MatchFailed = true; break; } } SomeOpMatched = true; } } if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) || (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (uses outside loop)\n"); MatchFailed = true; break; } if (!MatchFailed) BaseMap.insert(std::pair<Value *, Value *>(J2, J1)); AllRootUses.insert(J2); Reductions.recordPair(J1, J2, i+1); ++J2; } } if (MatchFailed) return false; DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *RealIV << "\n"); DenseSet<Instruction *> LoopIncUseSet; collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(), SmallInstructionSet(), LoopIncUseSet); DEBUG(dbgs() << "LRR: Loop increment set size: " << LoopIncUseSet.size() << "\n"); // Make sure that all instructions in the loop have been included in some // use set. for (BasicBlock::iterator J = Header->begin(), JE = Header->end(); J != JE; ++J) { if (isa<DbgInfoIntrinsic>(J)) continue; if (cast<Instruction>(J) == RealIV) continue; if (cast<Instruction>(J) == IV) continue; if (BaseUseSet.count(J) || AllRootUses.count(J) || (LoopIncUseSet.count(J) && (J->isTerminator() || isSafeToSpeculativelyExecute(J, DL)))) continue; if (AllRoots.count(J)) continue; if (Reductions.isSelectedPHI(J)) continue; DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV << " unprocessed instruction found: " << *J << "\n"); MatchFailed = true; break; } if (MatchFailed) return false; DEBUG(dbgs() << "LRR: all instructions processed from " << *RealIV << "\n"); if (!Reductions.validateSelected()) return false; // At this point, we've validated the rerolling, and we're committed to // making changes! Reductions.replaceSelected(); // Remove instructions associated with non-base iterations. for (BasicBlock::reverse_iterator J = Header->rbegin(); J != Header->rend();) { if (AllRootUses.count(&*J)) { Instruction *D = &*J; DEBUG(dbgs() << "LRR: removing: " << *D << "\n"); D->eraseFromParent(); continue; } ++J; } // Insert the new induction variable. const SCEV *Start = RealIVSCEV->getStart(); if (Inc == 1) Start = SE->getMulExpr(Start, SE->getConstant(Start->getType(), Scale)); const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start, SE->getConstant(RealIVSCEV->getType(), 1), L, SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. SCEVExpander Expander(*SE, "reroll"); Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(), JE = BaseUseSet.end(); J != JE; ++J) (*J)->replaceUsesOfWith(IV, NewIV); if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) { if (LoopIncUseSet.count(BI)) { const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); if (Inc == 1) ICSCEV = SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale)); // Iteration count SCEV minus 1 const SCEV *ICMinus1SCEV = SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); Value *ICMinus1; // Iteration count minus 1 if (isa<SCEVConstant>(ICMinus1SCEV)) { ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); } else { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) Preheader = InsertPreheaderForLoop(L, this); ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), Preheader->getTerminator()); } Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond"); BI->setCondition(Cond); if (BI->getSuccessor(1) != Header) BI->swapSuccessors(); } } } SimplifyInstructionsInBlock(Header, DL, TLI); DeleteDeadPHIs(Header, TLI); ++NumRerolledLoops; return true; }
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); return ReplaceInstUsesWith(I, GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); } // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate // a signed shift. // if (Op1->uge(TypeBits)) { if (I.getOpcode() != Instruction::AShr) return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); // ashr i32 X, 32 --> ashr i32 X, 31 I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); return &I; } // ((X*C1) << C2) == (X * (C1 << C2)) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) if (BO->getOpcode() == Instruction::Mul && isLeftShift) if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) return BinaryOperator::CreateMul(BO->getOperand(0), ConstantExpr::getShl(BOOp, Op1)); // Try to fold constant and into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); // If 'shift2' is an ashr, we would have to get the sign bit into a funny // place. Don't try to do this transformation in this case. Also, we // require that the input operand is a shift-by-constant so that we have // confidence that the shifts will get folded together. We could do this // xform in more cases, but it is unlikely to be profitable. if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa<ConstantInt>(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to // clear the top bits as needed. This 'and' will usually be zapped by // other xforms later if dead. unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); unsigned DstSize = TI->getType()->getScalarSizeInBits(); APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); // The mask we constructed says what the trunc would do if occurring // between the shifts. We want to know the effect *after* the second // shift. We know that it is a logical shift by a constant, so adjust the // mask as appropriate. if (I.getOpcode() == Instruction::Shl) MaskV <<= Op1->getZExtValue(); else { assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); MaskV = MaskV.lshr(Op1->getZExtValue()); } // shift1 & 0x00FF Value *And = Builder->CreateAnd(NSh, ConstantInt::get(I.getContext(), MaskV), TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); } } if (Op0->hasOneUse()) { if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) Value *V1, *V2; ConstantInt *CC; switch (Op0BO->getOpcode()) { default: break; case Instruction::Add: case Instruction::And: case Instruction::Or: case Instruction::Xor: { // These operators commute. // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, Op0BO->getOperand(1)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) Value *Op0BOOp1 = Op0BO->getOperand(1); if (isLeftShift && Op0BOOp1->hasOneUse() && match(Op0BOOp1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), m_ConstantInt(CC)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // X & (CC << C) Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } // FALL THROUGH. case Instruction::Sub: { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, Op0BO->getOperand(0)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), m_ConstantInt(CC))) && V2 == Op1) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } break; } } // If the operand is an bitwise operator with a constant RHS, and the // shift is the only use, we can pull it out of the shift. if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { bool isValid = true; // Valid only for And, Or, Xor bool highBitSet = false; // Transform if high bit of constant set? switch (Op0BO->getOpcode()) { default: isValid = false; break; // Do not perform transform! case Instruction::Add: isValid = isLeftShift; break; case Instruction::Or: case Instruction::Xor: highBitSet = false; break; case Instruction::And: highBitSet = true; break; } // If this is a signed shift right, and the high bit is modified // by the logical operation, do not perform the transformation. // The highBitSet boolean indicates the value of the high bit of // the constant which would cause it to be modified for this // operation. // if (isValid && I.getOpcode() == Instruction::AShr) isValid = Op0C->getValue()[TypeBits-1] == highBitSet; if (isValid) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); Value *NewShift = Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS); } } } } // Find out if this is a shift of a shift by a constant. BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); if (ShiftOp && !ShiftOp->isShift()) ShiftOp = 0; if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { // This is a constant shift of a constant shift. Be careful about hiding // shl instructions behind bit masks. They are used to represent multiplies // by a constant, and it is important that simple arithmetic expressions // are still recognizable by scalar evolution. // // The transforms applied to shl are very similar to the transforms applied // to mul by constant. We can be more aggressive about optimizing right // shifts. // // Combinations of right and left shifts will still be optimized in // DAGCombine where scalar evolution no longer applies. ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); IntegerType *Ty = cast<IntegerType>(I.getType()); // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. // If this is oversized composite shift, then unsigned shifts get 0, ashr // saturates. if (AmtSum >= TypeBits) { if (I.getOpcode() != Instruction::AShr) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. } return BinaryOperator::Create(I.getOpcode(), X, ConstantInt::get(Ty, AmtSum)); } if (ShiftAmt1 == ShiftAmt2) { // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), Mask)); } } else if (ShiftAmt1 < ShiftAmt2) { uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; // (X >>?,exact C1) << C2 --> X << (C2-C1) // The inexact version is deferred to DAGCombine so we don't hide shl // behind a bit mask. if (I.getOpcode() == Instruction::Shl && ShiftOp->getOpcode() != Instruction::Shl && ShiftOp->isExact()) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst); NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); return NewShl; } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); // (X <<nuw C1) >>u C2 --> X >>u (C2-C1) if (ShiftOp->hasNoUnsignedWrap()) { BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr, X, ShiftDiffCst); NewLShr->setIsExact(I.isExact()); return NewLShr; } Value *Shift = Builder->CreateLShr(X, ShiftDiffCst); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. if (I.getOpcode() == Instruction::AShr && ShiftOp->getOpcode() == Instruction::Shl) { if (ShiftOp->hasNoSignedWrap()) { // (X <<nsw C1) >>s C2 --> X >>s (C2-C1) ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr, X, ShiftDiffCst); NewAShr->setIsExact(I.isExact()); return NewAShr; } } } else { assert(ShiftAmt2 < ShiftAmt1); uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) // The inexact version is deferred to DAGCombine so we don't hide shl // behind a bit mask. if (I.getOpcode() == Instruction::Shl && ShiftOp->getOpcode() != Instruction::Shl && ShiftOp->isExact()) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), X, ShiftDiffCst); NewShr->setIsExact(true); return NewShr; } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); if (ShiftOp->hasNoUnsignedWrap()) { // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2) BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst); NewShl->setHasNoUnsignedWrap(true); return NewShl; } Value *Shift = Builder->CreateShl(X, ShiftDiffCst); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. if (I.getOpcode() == Instruction::AShr && ShiftOp->getOpcode() == Instruction::Shl) { if (ShiftOp->hasNoSignedWrap()) { // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2) ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst); NewShl->setHasNoSignedWrap(true); return NewShl; } } } } return 0; }
Formula* EncoderPass::makeTraceFormula() { MSTimer timer1; if(options->printDuration()) { timer1.start(); } // Create an empty trace formula Formula *formula = new Formula(); // Prepare the CFG variables encoder->prepareControlFlow(targetFun); // Pre-process the global variables initGlobalVariables(); // Save the line numbers of the call to assert initAssertCalls(); bool isWeigted = false; unsigned oldLine = 0; Instruction *lastInstruction = NULL; std::vector<ExprPtr> currentConstraits; // Iterate through the function in Topological Order // encode each instructions in SMT constraints // @See: eli.thegreenplace.net/2013/09/16/analyzing-function-cfgs-with-llvm/ ReversePostOrderTraversal<Function*> RPOT(this->targetFun); ReversePostOrderTraversal<Function*>::rpo_iterator itb; for (itb=RPOT.begin(); itb!=RPOT.end(); ++itb) { BasicBlock *bb = *itb; // Propagate pointers when we enter in a new basicblock ExprPtr e = ctx->propagatePointers(bb); if (e) { e->setHard(); formula->add(e); } // HFTF: Encode bug free blocks as hard bool doEncodeBB = true; if (options->htfUsed()) { if(profile->isBugFreeBlock(bb)) { doEncodeBB = false; } } // Iterate through the basicblocks for (BasicBlock::iterator iti=bb->begin(), eti=bb->end(); iti!=eti; ++iti) { Instruction *i = iti; bool doEncodeInst = doEncodeBB; // Check the line number unsigned line = 0; if (MDNode *N = i->getMetadata("dbg")) { DILocation Loc(N); line = Loc.getLineNumber(); // Instruction related to an assert if (ctx->isAssertCall(line)) { isWeigted = false; doEncodeInst = true; } else { // Instruction with line number // (not related to an assert) isWeigted = true; } } else { // Instruction with no line number isWeigted = false; } // Hardened Trace Formula if (options->htfUsed() && !doEncodeInst) { isWeigted = false; } // Encode the instruction in a SMT formula ExprPtr expr = NULL; switch (i->getOpcode()) { case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: expr = encoder->encode(cast<BinaryOperator>(i)); break; case Instruction::Select: expr = encoder->encode(cast<SelectInst>(i)); break; case Instruction::PHI: expr = encoder->encode(cast<PHINode>(i)); isWeigted = false; break; case Instruction::Br: expr = encoder->encode(cast<BranchInst>(i), loops); isWeigted = false; break; case Instruction::Switch: expr = encoder->encode(cast<SwitchInst>(i)); break; case Instruction::ICmp: expr = encoder->encode(cast<ICmpInst>(i)); break; case Instruction::Call: expr = encoder->encode(cast<CallInst>(i), preCond, postCond); if (!expr) { // Do not encode sniper_x functions. continue; } isWeigted = false; break; case Instruction::Alloca: expr = encoder->encode(cast<AllocaInst>(i)); if (!expr) { continue; } isWeigted = false; break; case Instruction::Store: expr = encoder->encode(cast<StoreInst>(i)); break; case Instruction::Load: expr = encoder->encode(cast<LoadInst>(i), postCond); break; case Instruction::GetElementPtr: expr = encoder->encode(cast<GetElementPtrInst>(i)); isWeigted = false; break; case Instruction::SExt: expr = encoder->encode(cast<SExtInst>(i)); isWeigted = false; break; case Instruction::ZExt: expr = encoder->encode(cast<ZExtInst>(i)); isWeigted = false; break; case Instruction::Ret: expr = encoder->encode(cast<ReturnInst>(i)); isWeigted = false; break; case Instruction::Unreachable: // Do not encode. continue; case Instruction::PtrToInt: { // Instruction added by SNIPER std::string instName = i->getName().str(); std::string prefix("sniper_ptrVal"); if (!instName.compare(0, prefix.size(), prefix)) { continue; } // NO BREAK! } case Instruction::VAArg: case Instruction::Invoke: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::IntToPtr: case Instruction::BitCast: case Instruction::FCmp: case Instruction::ExtractElement: case Instruction::InsertElement: case Instruction::ShuffleVector: case Instruction::ExtractValue: case Instruction::InsertValue: i->dump(); assert("unsupported LLVM instruction!\n"); break; default: llvm_unreachable("Illegal opcode!"); } assert(expr && "Expression is null!"); // Atoi checking if (isAtoiFunction(i)) { isWeigted = false; } // Instruction with line number if (isWeigted) { // Add each instruction separately if (options->instructionGranularityLevel()) { expr->setInstruction(i); expr->setSoft(); formula->add(expr); } // Pack and add all instructions from // the same line number else if (options->lineGranularityLevel()) { assert(line>0 && "Illegal line number!"); // New line, Add the collect constraints to the formula if (line!=oldLine && oldLine!=0) { assert(!currentConstraits.empty() && "No constraints!"); assert(lastInstruction && "Instruction is null!"); ExprPtr e = Expression::mkAnd(currentConstraits); e->setInstruction(lastInstruction); e->setSoft(); formula->add(e); currentConstraits.clear(); } currentConstraits.push_back(expr); oldLine = line; lastInstruction = i; } // Block level else if (options->blockGranularityLevel()) { currentConstraits.push_back(expr); } else { assert("Unknow granularity level!"); } } // Instruction with no line number else { expr->setHard(); formula->add(expr); } } // End of basic block iteration if (options->blockGranularityLevel()) { if (!currentConstraits.empty()) { ExprPtr e = Expression::mkAnd(currentConstraits); e->setInstruction(lastInstruction); e->setSoft(); formula->add(e); currentConstraits.clear(); } } } // Add the remaining soft constraints to the formula if (!currentConstraits.empty()) { assert(options->lineGranularityLevel() && "Some instructions could not be encoded!"); assert(lastInstruction && "Instruction is null!"); ExprPtr e = Expression::mkAnd(currentConstraits); e->setInstruction(lastInstruction); e->setSoft(); formula->add(e); currentConstraits.clear(); } if(options->printDuration()) { timer1.stop("Trace Formula Encoding Time"); } return formula; }
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is /// replaced with RepOp. static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, const DataLayout *TD, const TargetLibraryInfo *TLI) { // Trivial replacement. if (V == Op) return RepOp; Instruction *I = dyn_cast<Instruction>(V); if (!I) return nullptr; // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { if (B->getOperand(0) == Op) return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI); if (B->getOperand(1) == Op) return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI); } // Same for CmpInsts. if (CmpInst *C = dyn_cast<CmpInst>(I)) { if (C->getOperand(0) == Op) return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD, TLI); if (C->getOperand(1) == Op) return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD, TLI); } // TODO: We could hand off more cases to instsimplify here. // If all operands are constant after substituting Op for RepOp then we can // constant fold the instruction. if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) { // Build a list of all constant operands. SmallVector<Constant*, 8> ConstOps; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (I->getOperand(i) == Op) ConstOps.push_back(CRepOp); else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i))) ConstOps.push_back(COp); else break; } // All operands were constants, fold it. if (ConstOps.size() == I->getNumOperands()) { if (CmpInst *C = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], ConstOps[1], TD, TLI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) if (!LI->isVolatile()) return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps, TD, TLI); } } return nullptr; }
static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR, const SCCNodeSet &SCCNodes) { FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); if (MRB == FMRB_DoesNotAccessMemory) // Already perfect! return MAK_ReadNone; // Definitions with weak linkage may be overridden at linktime with // something that writes memory, so treat them like declarations. if (F.isDeclaration() || F.mayBeOverridden()) { if (AliasAnalysis::onlyReadsMemory(MRB)) return MAK_ReadOnly; // Conservatively assume it writes to memory. return MAK_MayWrite; } // Scan the function body for instructions that may read or write memory. bool ReadsMemory = false; for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { Instruction *I = &*II; // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. CallSite CS(cast<Value>(I)); if (CS) { // Ignore calls to functions in the same SCC, as long as the call sites // don't have operand bundles. Calls with operand bundles are allowed to // have memory effects not described by the memory effects of the call // target. if (!CS.hasOperandBundles() && CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) continue; FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS); // If the call doesn't access memory, we're done. if (!(MRB & MRI_ModRef)) continue; if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { // The call could access any memory. If that includes writes, give up. if (MRB & MRI_Mod) return MAK_MayWrite; // If it reads, note it. if (MRB & MRI_Ref) ReadsMemory = true; continue; } // Check whether all pointer arguments point to local memory, and // ignore calls that only access local memory. for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI) { Value *Arg = *CI; if (!Arg->getType()->isPtrOrPtrVectorTy()) continue; AAMDNodes AAInfo; I->getAAMetadata(AAInfo); MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo); // Skip accesses to local or constant memory as they don't impact the // externally visible mod/ref behavior. if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; if (MRB & MRI_Mod) // Writes non-local memory. Give up. return MAK_MayWrite; if (MRB & MRI_Ref) // Ok, it reads non-local memory. ReadsMemory = true; } continue; } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { // Ignore non-volatile loads from local memory. (Atomic is okay here.) if (!LI->isVolatile()) { MemoryLocation Loc = MemoryLocation::get(LI); if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // Ignore non-volatile stores to local memory. (Atomic is okay here.) if (!SI->isVolatile()) { MemoryLocation Loc = MemoryLocation::get(SI); if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { // Ignore vaargs on local memory. MemoryLocation Loc = MemoryLocation::get(VI); if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } // Any remaining instructions need to be taken seriously! Check if they // read or write memory. if (I->mayWriteToMemory()) // Writes memory. Just give up. return MAK_MayWrite; // If this instruction may read memory, remember that. ReadsMemory |= I->mayReadFromMemory(); } return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; }