static bool bitTrackingDCE(Function &F, DemandedBits &DB) { SmallVector<Instruction*, 128> Worklist; bool Changed = false; for (Instruction &I : instructions(F)) { // If the instruction has side effects and no non-dbg uses, // skip it. This way we avoid computing known bits on an instruction // that will not help us. if (I.mayHaveSideEffects() && I.use_empty()) continue; if (I.getType()->isIntegerTy() && !DB.getDemandedBits(&I).getBoolValue()) { // For live instructions that have all dead bits, first make them dead by // replacing all uses with something else. Then, if they don't need to // remain live (because they have side effects, etc.) we can remove them. DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. Value *Zero = ConstantInt::get(I.getType(), 0); ++NumSimplified; I.replaceNonMetadataUsesWith(Zero); Changed = true; } if (!DB.isInstructionDead(&I)) continue; Worklist.push_back(&I); I.dropAllReferences(); Changed = true; } for (Instruction *&I : Worklist) { ++NumRemoved; I->eraseFromParent(); } return Changed; }
MapVector<Instruction *, uint64_t> llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI) { // DemandedBits will give us every value's live-out bits. But we want // to ensure no extra casts would need to be inserted, so every DAG // of connected values must have the same minimum bitwidth. EquivalenceClasses<Value *> ECs; SmallVector<Value *, 16> Worklist; SmallPtrSet<Value *, 4> Roots; SmallPtrSet<Value *, 16> Visited; DenseMap<Value *, uint64_t> DBits; SmallPtrSet<Instruction *, 4> InstructionSet; MapVector<Instruction *, uint64_t> MinBWs; // Determine the roots. We work bottom-up, from truncs or icmps. bool SeenExtFromIllegalType = false; for (auto *BB : Blocks) for (auto &I : *BB) { InstructionSet.insert(&I); if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) && !TTI->isTypeLegal(I.getOperand(0)->getType())) SeenExtFromIllegalType = true; // Only deal with non-vector integers up to 64-bits wide. if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) && !I.getType()->isVectorTy() && I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) { // Don't make work for ourselves. If we know the loaded type is legal, // don't add it to the worklist. if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType())) continue; Worklist.push_back(&I); Roots.insert(&I); } } // Early exit. if (Worklist.empty() || (TTI && !SeenExtFromIllegalType)) return MinBWs; // Now proceed breadth-first, unioning values together. while (!Worklist.empty()) { Value *Val = Worklist.pop_back_val(); Value *Leader = ECs.getOrInsertLeaderValue(Val); if (Visited.count(Val)) continue; Visited.insert(Val); // Non-instructions terminate a chain successfully. if (!isa<Instruction>(Val)) continue; Instruction *I = cast<Instruction>(Val); // If we encounter a type that is larger than 64 bits, we can't represent // it so bail out. if (DB.getDemandedBits(I).getBitWidth() > 64) return MapVector<Instruction *, uint64_t>(); uint64_t V = DB.getDemandedBits(I).getZExtValue(); DBits[Leader] |= V; DBits[I] = V; // Casts, loads and instructions outside of our range terminate a chain // successfully. if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) || !InstructionSet.count(I)) continue; // Unsafe casts terminate a chain unsuccessfully. We can't do anything // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to // transform anything that relies on them. if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) || !I->getType()->isIntegerTy()) { DBits[Leader] |= ~0ULL; continue; } // We don't modify the types of PHIs. Reductions will already have been // truncated if possible, and inductions' sizes will have been chosen by // indvars. if (isa<PHINode>(I)) continue; if (DBits[Leader] == ~0ULL) // All bits demanded, no point continuing. continue; for (Value *O : cast<User>(I)->operands()) { ECs.unionSets(Leader, O); Worklist.push_back(O); } } // Now we've discovered all values, walk them to see if there are // any users we didn't see. If there are, we can't optimize that // chain. for (auto &I : DBits) for (auto *U : I.first->users()) if (U->getType()->isIntegerTy() && DBits.count(U) == 0) DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL; for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) { uint64_t LeaderDemandedBits = 0; for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) LeaderDemandedBits |= DBits[*MI]; uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) - llvm::countLeadingZeros(LeaderDemandedBits); // Round up to a power of 2 if (!isPowerOf2_64((uint64_t)MinBW)) MinBW = NextPowerOf2(MinBW); // We don't modify the types of PHIs. Reductions will already have been // truncated if possible, and inductions' sizes will have been chosen by // indvars. // If we are required to shrink a PHI, abandon this entire equivalence class. bool Abort = false; for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) { Abort = true; break; } if (Abort) continue; for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) { if (!isa<Instruction>(*MI)) continue; Type *Ty = (*MI)->getType(); if (Roots.count(*MI)) Ty = cast<Instruction>(*MI)->getOperand(0)->getType(); if (MinBW < Ty->getScalarSizeInBits()) MinBWs[cast<Instruction>(*MI)] = MinBW; } } return MinBWs; }