Esempio n. 1
0
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  if (DisablePeephole)
    return false;

  TM  = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

  bool Changed = false;

  SmallPtrSet<MachineInstr*, 8> LocalMIs;
  SmallSet<unsigned, 4> ImmDefRegs;
  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    bool SeenMoveImm = false;
    LocalMIs.clear();
    ImmDefRegs.clear();
    ImmDefMIs.clear();

    bool First = true;
    MachineBasicBlock::iterator PMII;
    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
      LocalMIs.insert(MI);

      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
          MI->hasUnmodeledSideEffects()) {
        ++MII;
        continue;
      }

      if (MI->isBitcast()) {
        if (optimizeBitcastInstr(MI, MBB)) {
          // MI is deleted.
          LocalMIs.erase(MI);
          Changed = true;
          MII = First ? I->begin() : llvm::next(PMII);
          continue;
        }
      } else if (MI->isCompare()) {
        if (optimizeCmpInstr(MI, MBB)) {
          // MI is deleted.
          LocalMIs.erase(MI);
          Changed = true;
          MII = First ? I->begin() : llvm::next(PMII);
          continue;
        }
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
        SeenMoveImm = true;
      } else {
        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
        if (SeenMoveImm)
          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
      }

      First = false;
      PMII = MII;
      ++MII;
    }
  }

  return Changed;
}
Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction *InsertPt) {
    SmallVector<Value *, 4> Stack;
    Value *CurrentV = V;
    // Follow pointer casts back, see if we're based on a pointer in
    // an untracked address space, in which case we're allowed to drop
    // intermediate addrspace casts.
    while (true) {
        Stack.push_back(CurrentV);
        if (isa<BitCastInst>(CurrentV))
            CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
        else if (isa<AddrSpaceCastInst>(CurrentV)) {
            CurrentV = cast<AddrSpaceCastInst>(CurrentV)->getOperand(0);
            if (!isSpecialAS(getValueAddrSpace(CurrentV)))
                break;
        }
        else if (isa<GetElementPtrInst>(CurrentV)) {
            if (LiftingMap.count(CurrentV)) {
                CurrentV = LiftingMap[CurrentV];
                break;
            } else if (Visited.count(CurrentV)) {
                return nullptr;
            }
            Visited.insert(CurrentV);
            CurrentV = cast<GetElementPtrInst>(CurrentV)->getOperand(0);
        } else
            break;
    }
    if (!CurrentV->getType()->isPointerTy())
        return nullptr;
    if (isSpecialAS(getValueAddrSpace(CurrentV)))
        return nullptr;
    // Ok, we're allowed to change the address space of this load, go back and
    // reconstitute any GEPs in the new address space.
    for (Value *V : llvm::reverse(Stack)) {
        GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
        if (!GEP)
            continue;
        if (LiftingMap.count(GEP)) {
            CurrentV = LiftingMap[GEP];
            continue;
        }
        GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(GEP->clone());
        ToInsert.push_back(std::make_pair(NewGEP, GEP));
        Type *GEPTy = GEP->getSourceElementType();
        Type *NewRetTy = cast<PointerType>(GEP->getType())->getElementType()->getPointerTo(getValueAddrSpace(CurrentV));
        NewGEP->mutateType(NewRetTy);
        if (cast<PointerType>(CurrentV->getType())->getElementType() != GEPTy) {
            auto *BCI = new BitCastInst(CurrentV, GEPTy->getPointerTo());
            ToInsert.push_back(std::make_pair(BCI, NewGEP));
            CurrentV = BCI;
        }
        NewGEP->setOperand(GetElementPtrInst::getPointerOperandIndex(), CurrentV);
        LiftingMap[GEP] = NewGEP;
        CurrentV = NewGEP;
    }
    if (LocTy && cast<PointerType>(CurrentV->getType())->getElementType() != LocTy) {
        auto *BCI = new BitCastInst(CurrentV, LocTy->getPointerTo());
        ToInsert.push_back(std::make_pair(BCI, InsertPt));
        CurrentV = BCI;
    }
    return CurrentV;
}
Esempio n. 3
0
void StackColoring::calculateLocalLiveness() {
  // Perform a standard reverse dataflow computation to solve for
  // global liveness.  The BEGIN set here is equivalent to KILL in the standard
  // formulation, and END is equivalent to GEN.  The result of this computation
  // is a map from blocks to bitvectors where the bitvectors represent which
  // allocas are live in/out of that block.
  SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
                                                 BasicBlockNumbering.end());
  unsigned NumSSMIters = 0;
  bool changed = true;
  while (changed) {
    changed = false;
    ++NumSSMIters;

    SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;

    for (const MachineBasicBlock *BB : BasicBlockNumbering) {
      if (!BBSet.count(BB)) continue;

      // Use an iterator to avoid repeated lookups.
      LivenessMap::iterator BI = BlockLiveness.find(BB);
      assert(BI != BlockLiveness.end() && "Block not found");
      BlockLifetimeInfo &BlockInfo = BI->second;

      BitVector LocalLiveIn;
      BitVector LocalLiveOut;

      // Forward propagation from begins to ends.
      for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
           PE = BB->pred_end(); PI != PE; ++PI) {
        LivenessMap::const_iterator I = BlockLiveness.find(*PI);
        assert(I != BlockLiveness.end() && "Predecessor not found");
        LocalLiveIn |= I->second.LiveOut;
      }
      LocalLiveIn |= BlockInfo.End;
      LocalLiveIn.reset(BlockInfo.Begin);

      // Reverse propagation from ends to begins.
      for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
           SE = BB->succ_end(); SI != SE; ++SI) {
        LivenessMap::const_iterator I = BlockLiveness.find(*SI);
        assert(I != BlockLiveness.end() && "Successor not found");
        LocalLiveOut |= I->second.LiveIn;
      }
      LocalLiveOut |= BlockInfo.Begin;
      LocalLiveOut.reset(BlockInfo.End);

      LocalLiveIn |= LocalLiveOut;
      LocalLiveOut |= LocalLiveIn;

      // After adopting the live bits, we need to turn-off the bits which
      // are de-activated in this block.
      LocalLiveOut.reset(BlockInfo.End);
      LocalLiveIn.reset(BlockInfo.Begin);

      // If we have both BEGIN and END markers in the same basic block then
      // we know that the BEGIN marker comes after the END, because we already
      // handle the case where the BEGIN comes before the END when collecting
      // the markers (and building the BEGIN/END vectore).
      // Want to enable the LIVE_IN and LIVE_OUT of slots that have both
      // BEGIN and END because it means that the value lives before and after
      // this basic block.
      BitVector LocalEndBegin = BlockInfo.End;
      LocalEndBegin &= BlockInfo.Begin;
      LocalLiveIn |= LocalEndBegin;
      LocalLiveOut |= LocalEndBegin;

      if (LocalLiveIn.test(BlockInfo.LiveIn)) {
        changed = true;
        BlockInfo.LiveIn |= LocalLiveIn;

        NextBBSet.insert(BB->pred_begin(), BB->pred_end());
      }

      if (LocalLiveOut.test(BlockInfo.LiveOut)) {
        changed = true;
        BlockInfo.LiveOut |= LocalLiveOut;

        NextBBSet.insert(BB->succ_begin(), BB->succ_end());
      }
    }

    BBSet = std::move(NextBBSet);
  }// while changed.
}
Esempio n. 4
0
/// At this point, we're committed to promoting the alloca using IDF's, and the
/// standard SSA construction algorithm.  Determine which blocks need phi nodes
/// and see if we can optimize out some work by avoiding insertion of dead phi
/// nodes.
void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
                                             AllocaInfo &Info) {
  // Unique the set of defining blocks for efficient lookup.
  SmallPtrSet<BasicBlock *, 32> DefBlocks;
  DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());

  // Determine which blocks the value is live in.  These are blocks which lead
  // to uses.
  SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
  ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);

  // Use a priority queue keyed on dominator tree level so that inserted nodes
  // are handled from the bottom of the dominator tree upwards.
  typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
                              less_second> IDFPriorityQueue;
  IDFPriorityQueue PQ;

  for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
                                                     E = DefBlocks.end();
       I != E; ++I) {
    if (DomTreeNode *Node = DT.getNode(*I))
      PQ.push(std::make_pair(Node, DomLevels[Node]));
  }

  SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
  SmallPtrSet<DomTreeNode *, 32> Visited;
  SmallVector<DomTreeNode *, 32> Worklist;
  while (!PQ.empty()) {
    DomTreeNodePair RootPair = PQ.top();
    PQ.pop();
    DomTreeNode *Root = RootPair.first;
    unsigned RootLevel = RootPair.second;

    // Walk all dominator tree children of Root, inspecting their CFG edges with
    // targets elsewhere on the dominator tree. Only targets whose level is at
    // most Root's level are added to the iterated dominance frontier of the
    // definition set.

    Worklist.clear();
    Worklist.push_back(Root);

    while (!Worklist.empty()) {
      DomTreeNode *Node = Worklist.pop_back_val();
      BasicBlock *BB = Node->getBlock();

      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
           ++SI) {
        DomTreeNode *SuccNode = DT.getNode(*SI);

        // Quickly skip all CFG edges that are also dominator tree edges instead
        // of catching them below.
        if (SuccNode->getIDom() == Node)
          continue;

        unsigned SuccLevel = DomLevels[SuccNode];
        if (SuccLevel > RootLevel)
          continue;

        if (!Visited.insert(SuccNode))
          continue;

        BasicBlock *SuccBB = SuccNode->getBlock();
        if (!LiveInBlocks.count(SuccBB))
          continue;

        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
        if (!DefBlocks.count(SuccBB))
          PQ.push(std::make_pair(SuccNode, SuccLevel));
      }

      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
           ++CI) {
        if (!Visited.count(*CI))
          Worklist.push_back(*CI);
      }
    }
  }

  if (DFBlocks.size() > 1)
    std::sort(DFBlocks.begin(), DFBlocks.end());

  unsigned CurrentVersion = 0;
  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
}
Esempio n. 5
0
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
/// result.
///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
bool PeepholeOptimizer::
optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
  unsigned SrcReg, DstReg, SubIdx;
  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    return false;

  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
      TargetRegisterInfo::isPhysicalRegister(SrcReg))
    return false;

  if (MRI->hasOneNonDBGUse(SrcReg))
    // No other uses.
    return false;

  // Ensure DstReg can get a register class that actually supports
  // sub-registers. Don't change the class until we commit.
  const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
  DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
  if (!DstRC)
    return false;

  // The ext instr may be operating on a sub-register of SrcReg as well.
  // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
  // register.
  // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
  // SrcReg:SubIdx should be replaced.
  bool UseSrcSubIdx = TM->getRegisterInfo()->
    getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;

  // The source has other uses. See if we can replace the other uses with use of
  // the result of the extension.
  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
  for (MachineRegisterInfo::use_nodbg_iterator
       UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
       UI != UE; ++UI)
    ReachedBBs.insert(UI->getParent());

  // Uses that are in the same BB of uses of the result of the instruction.
  SmallVector<MachineOperand*, 8> Uses;

  // Uses that the result of the instruction can reach.
  SmallVector<MachineOperand*, 8> ExtendedUses;

  bool ExtendLife = true;
  for (MachineRegisterInfo::use_nodbg_iterator
       UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
       UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    MachineInstr *UseMI = &*UI;
    if (UseMI == MI)
      continue;

    if (UseMI->isPHI()) {
      ExtendLife = false;
      continue;
    }

    // Only accept uses of SrcReg:SubIdx.
    if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
      continue;

    // It's an error to translate this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    //
    // into this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1027 = COPY %reg1025:4
    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    //
    // The problem here is that SUBREG_TO_REG is there to assert that an
    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    // the COPY here, it will give us the value after the <sext>, not the
    // original value of %reg1024 before <sext>.
    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
      continue;

    MachineBasicBlock *UseMBB = UseMI->getParent();
    if (UseMBB == MBB) {
      // Local uses that come after the extension.
      if (!LocalMIs.count(UseMI))
        Uses.push_back(&UseMO);
    } else if (ReachedBBs.count(UseMBB)) {
      // Non-local uses where the result of the extension is used. Always
      // replace these unless it's a PHI.
      Uses.push_back(&UseMO);
    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
      // We may want to extend the live range of the extension result in order
      // to replace these uses.
      ExtendedUses.push_back(&UseMO);
    } else {
      // Both will be live out of the def MBB anyway. Don't extend live range of
      // the extension result.
      ExtendLife = false;
      break;
    }
  }

  if (ExtendLife && !ExtendedUses.empty())
    // Extend the liveness of the extension result.
    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
              std::back_inserter(Uses));

  // Now replace all uses.
  bool Changed = false;
  if (!Uses.empty()) {
    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;

    // Look for PHI uses of the extended result, we don't want to extend the
    // liveness of a PHI input. It breaks all kinds of assumptions down
    // stream. A PHI use is expected to be the kill of its source values.
    for (MachineRegisterInfo::use_nodbg_iterator
         UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
         UI != UE; ++UI)
      if (UI->isPHI())
        PHIBBs.insert(UI->getParent());

    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
      MachineOperand *UseMO = Uses[i];
      MachineInstr *UseMI = UseMO->getParent();
      MachineBasicBlock *UseMBB = UseMI->getParent();
      if (PHIBBs.count(UseMBB))
        continue;

      // About to add uses of DstReg, clear DstReg's kill flags.
      if (!Changed) {
        MRI->clearKillFlags(DstReg);
        MRI->constrainRegClass(DstReg, DstRC);
      }

      unsigned NewVR = MRI->createVirtualRegister(RC);
      MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
                                   TII->get(TargetOpcode::COPY), NewVR)
        .addReg(DstReg, 0, SubIdx);
      // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
      if (UseSrcSubIdx) {
        Copy->getOperand(0).setSubReg(SubIdx);
        Copy->getOperand(0).setIsUndef();
      }
      UseMO->setReg(NewVR);
      ++NumReuse;
      Changed = true;
    }
  }

  return Changed;
}
Esempio n. 6
0
/// InlineCallIfPossible - If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
/// This function also does some basic book-keeping to update the IR.  The
/// InlinedArrayAllocas map keeps track of any allocas that are already
/// available from other  functions inlined into the caller.  If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
                                 InlinedArrayAllocasTy &InlinedArrayAllocas,
                                 int InlineHistory) {
  Function *Callee = CS.getCalledFunction();
  Function *Caller = CS.getCaller();

  // Try to inline the function.  Get the list of static allocas that were
  // inlined.
  if (!InlineFunction(CS, IFI))
    return false;

  // If the inlined function had a higher stack protection level than the
  // calling function, then bump up the caller's stack protection level.
  if (Callee->hasFnAttr(Attribute::StackProtectReq))
    Caller->addFnAttr(Attribute::StackProtectReq);
  else if (Callee->hasFnAttr(Attribute::StackProtect) &&
           !Caller->hasFnAttr(Attribute::StackProtectReq))
    Caller->addFnAttr(Attribute::StackProtect);

  // Look at all of the allocas that we inlined through this call site.  If we
  // have already inlined other allocas through other calls into this function,
  // then we know that they have disjoint lifetimes and that we can merge them.
  //
  // There are many heuristics possible for merging these allocas, and the
  // different options have different tradeoffs.  One thing that we *really*
  // don't want to hurt is SRoA: once inlining happens, often allocas are no
  // longer address taken and so they can be promoted.
  //
  // Our "solution" for that is to only merge allocas whose outermost type is an
  // array type.  These are usually not promoted because someone is using a
  // variable index into them.  These are also often the most important ones to
  // merge.
  //
  // A better solution would be to have real memory lifetime markers in the IR
  // and not have the inliner do any merging of allocas at all.  This would
  // allow the backend to do proper stack slot coloring of all allocas that
  // *actually make it to the backend*, which is really what we want.
  //
  // Because we don't have this information, we do this simple and useful hack.
  //
  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
  
  // When processing our SCC, check to see if CS was inlined from some other
  // call site.  For example, if we're processing "A" in this code:
  //   A() { B() }
  //   B() { x = alloca ... C() }
  //   C() { y = alloca ... }
  // Assume that C was not inlined into B initially, and so we're processing A
  // and decide to inline B into A.  Doing this makes an alloca available for
  // reuse and makes a callsite (C) available for inlining.  When we process
  // the C call site we don't want to do any alloca merging between X and Y
  // because their scopes are not disjoint.  We could make this smarter by
  // keeping track of the inline history for each alloca in the
  // InlinedArrayAllocas but this isn't likely to be a significant win.
  if (InlineHistory != -1)  // Only do merging for top-level call sites in SCC.
    return true;
  
  // Loop over all the allocas we have so far and see if they can be merged with
  // a previously inlined alloca.  If not, remember that we had it.
  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
       AllocaNo != e; ++AllocaNo) {
    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
    
    // Don't bother trying to merge array allocations (they will usually be
    // canonicalized to be an allocation *of* an array), or allocations whose
    // type is not itself an array (because we're afraid of pessimizing SRoA).
    const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
    if (ATy == 0 || AI->isArrayAllocation())
      continue;
    
    // Get the list of all available allocas for this array type.
    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
    
    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
    // that we have to be careful not to reuse the same "available" alloca for
    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
    // set to keep track of which "available" allocas are being used by this
    // function.  Also, AllocasForType can be empty of course!
    bool MergedAwayAlloca = false;
    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
      AllocaInst *AvailableAlloca = AllocasForType[i];
      
      // The available alloca has to be in the right function, not in some other
      // function in this SCC.
      if (AvailableAlloca->getParent() != AI->getParent())
        continue;
      
      // If the inlined function already uses this alloca then we can't reuse
      // it.
      if (!UsedAllocas.insert(AvailableAlloca))
        continue;
      
      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
      // success!
      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
                   << *AvailableAlloca << '\n');
      
      AI->replaceAllUsesWith(AvailableAlloca);
      AI->eraseFromParent();
      MergedAwayAlloca = true;
      ++NumMergedAllocas;
      IFI.StaticAllocas[AllocaNo] = 0;
      break;
    }

    // If we already nuked the alloca, we're done with it.
    if (MergedAwayAlloca)
      continue;
    
    // If we were unable to merge away the alloca either because there are no
    // allocas of the right type available or because we reused them all
    // already, remember that this alloca came from an inlined function and mark
    // it used so we don't reuse it for other allocas from this inline
    // operation.
    AllocasForType.push_back(AI);
    UsedAllocas.insert(AI);
  }
  
  return true;
}
Esempio n. 7
0
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block.  Ex:
/// %A = alloca i32
/// ...
/// store i32 1, i32* %A
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
  bool MadeChange = false;
  
  // Keep track of all of the stack objects that are dead at the end of the
  // function.
  SmallPtrSet<Value*, 16> DeadStackObjects;
  
  // Find all of the alloca'd pointers in the entry block.
  BasicBlock *Entry = BB.getParent()->begin();
  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
      DeadStackObjects.insert(AI);
  
  // Treat byval arguments the same, stores to them are dead at the end of the
  // function.
  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
    if (AI->hasByValAttr())
      DeadStackObjects.insert(AI);
  
  // Scan the basic block backwards
  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
    --BBI;
    
    // If we find a store, check to see if it points into a dead stack value.
    if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
      // See through pointer-to-pointer bitcasts
      Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));

      // Stores to stack values are valid candidates for removal.
      if (DeadStackObjects.count(Pointer)) {
        Instruction *Dead = BBI++;
        
        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
                     << *Dead << "\n  Object: " << *Pointer << '\n');
        
        // DCE instructions only used to calculate that store.
        DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
        ++NumFastStores;
        MadeChange = true;
        continue;
      }
    }
    
    // Remove any dead non-memory-mutating instructions.
    if (isInstructionTriviallyDead(BBI)) {
      Instruction *Inst = BBI++;
      DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
      ++NumFastOther;
      MadeChange = true;
      continue;
    }
    
    if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
      DeadStackObjects.erase(A);
      continue;
    }
    
    if (CallSite CS = cast<Value>(BBI)) {
      // If this call does not access memory, it can't be loading any of our
      // pointers.
      if (AA->doesNotAccessMemory(CS))
        continue;
      
      unsigned NumModRef = 0, NumOther = 0;
      
      // If the call might load from any of our allocas, then any store above
      // the call is live.
      SmallVector<Value*, 8> LiveAllocas;
      for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
           E = DeadStackObjects.end(); I != E; ++I) {
        // If we detect that our AA is imprecise, it's not worth it to scan the
        // rest of the DeadPointers set.  Just assume that the AA will return
        // ModRef for everything, and go ahead and bail out.
        if (NumModRef >= 16 && NumOther == 0)
          return MadeChange;

        // See if the call site touches it.
        AliasAnalysis::ModRefResult A = 
          AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
        
        if (A == AliasAnalysis::ModRef)
          ++NumModRef;
        else
          ++NumOther;
        
        if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
          LiveAllocas.push_back(*I);
      }
      
      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
           E = LiveAllocas.end(); I != E; ++I)
        DeadStackObjects.erase(*I);
      
      // If all of the allocas were clobbered by the call then we're not going
      // to find anything else to process.
      if (DeadStackObjects.empty())
        return MadeChange;
      
      continue;
    }
    
    AliasAnalysis::Location LoadedLoc;
    
    // If we encounter a use of the pointer, it is no longer considered dead
    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
      LoadedLoc = AA->getLocation(L);
    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
      LoadedLoc = AA->getLocation(V);
    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
      LoadedLoc = AA->getLocationForSource(MTI);
    } else {
      // Not a loading instruction.
      continue;
    }

    // Remove any allocas from the DeadPointer set that are loaded, as this
    // makes any stores above the access live.
    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);

    // If all of the allocas were clobbered by the access then we're not going
    // to find anything else to process.
    if (DeadStackObjects.empty())
      break;
  }
  
  return MadeChange;
}
Esempio n. 8
0
/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
/// single uncond branch between them, and BB contains no other non-phi
/// instructions.
bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
                                    const BasicBlock *DestBB) const {
  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
  // the successor.  If there are more complex condition (e.g. preheaders),
  // don't mess around with them.
  BasicBlock::const_iterator BBI = BB->begin();
  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
    for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
         UI != E; ++UI) {
      const Instruction *User = cast<Instruction>(*UI);
      if (User->getParent() != DestBB || !isa<PHINode>(User))
        return false;
      // If User is inside DestBB block and it is a PHINode then check
      // incoming value. If incoming value is not from BB then this is
      // a complex condition (e.g. preheaders) we want to avoid here.
      if (User->getParent() == DestBB) {
        if (const PHINode *UPN = dyn_cast<PHINode>(User))
          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
            if (Insn && Insn->getParent() == BB &&
                Insn->getParent() != UPN->getIncomingBlock(I))
              return false;
          }
      }
    }
  }

  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
  // and DestBB may have conflicting incoming values for the block.  If so, we
  // can't merge the block.
  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
  if (!DestBBPN) return true;  // no conflict.

  // Collect the preds of BB.
  SmallPtrSet<const BasicBlock*, 16> BBPreds;
  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
    // It is faster to get preds from a PHI than with pred_iterator.
    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
      BBPreds.insert(BBPN->getIncomingBlock(i));
  } else {
    BBPreds.insert(pred_begin(BB), pred_end(BB));
  }

  // Walk the preds of DestBB.
  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
    if (BBPreds.count(Pred)) {   // Common predecessor?
      BBI = DestBB->begin();
      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
        const Value *V1 = PN->getIncomingValueForBlock(Pred);
        const Value *V2 = PN->getIncomingValueForBlock(BB);

        // If V2 is a phi node in BB, look up what the mapped value will be.
        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
          if (V2PN->getParent() == BB)
            V2 = V2PN->getIncomingValueForBlock(Pred);

        // If there is a conflict, bail out.
        if (V1 != V2) return false;
      }
    }
  }

  return true;
}
Esempio n. 9
0
bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
  SmallPtrSet<CallGraphNode *, 8> SCCNodes;
  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
  bool MadeChange = false;

  // Fill SCCNodes with the elements of the SCC.  Used for quickly
  // looking up whether a given CallGraphNode is in this SCC.
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
    SCCNodes.insert(*I);

  // First pass, scan all of the functions in the SCC, simplifying them
  // according to what we know.
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
    if (Function *F = (*I)->getFunction())
      MadeChange |= SimplifyFunction(F);

  // Next, check to see if any callees might throw or if there are any external
  // functions in this SCC: if so, we cannot prune any functions in this SCC.
  // Definitions that are weak and not declared non-throwing might be 
  // overridden at linktime with something that throws, so assume that.
  // If this SCC includes the unwind instruction, we KNOW it throws, so
  // obviously the SCC might throw.
  //
  bool SCCMightUnwind = false, SCCMightReturn = false;
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); 
       (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
    Function *F = (*I)->getFunction();
    if (!F) {
      SCCMightUnwind = true;
      SCCMightReturn = true;
    } else if (F->isDeclaration() || F->mayBeOverridden()) {
      SCCMightUnwind |= !F->doesNotThrow();
      SCCMightReturn |= !F->doesNotReturn();
    } else {
      bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
      bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
      // Determine if we should scan for InlineAsm in a naked function as it
      // is the only way to return without a ReturnInst.  Only do this for
      // no-inline functions as functions which may be inlined cannot
      // meaningfully return via assembly.
      bool CheckReturnViaAsm = CheckReturn &&
                               F->hasFnAttribute(Attribute::Naked) &&
                               F->hasFnAttribute(Attribute::NoInline);

      if (!CheckUnwind && !CheckReturn)
        continue;

      for (const BasicBlock &BB : *F) {
        const TerminatorInst *TI = BB.getTerminator();
        if (CheckUnwind && TI->mayThrow()) {
          SCCMightUnwind = true;
        } else if (CheckReturn && isa<ReturnInst>(TI)) {
          SCCMightReturn = true;
        }

        for (const Instruction &I : BB) {
          if ((!CheckUnwind || SCCMightUnwind) &&
              (!CheckReturnViaAsm || SCCMightReturn))
            break;

          // Check to see if this function performs an unwind or calls an
          // unwinding function.
          if (CheckUnwind && !SCCMightUnwind && I.mayThrow()) {
            bool InstMightUnwind = true;
            if (const auto *CI = dyn_cast<CallInst>(&I)) {
              if (Function *Callee = CI->getCalledFunction()) {
                CallGraphNode *CalleeNode = CG[Callee];
                // If the callee is outside our current SCC then we may throw
                // because it might.  If it is inside, do nothing.
                if (SCCNodes.count(CalleeNode) > 0)
                  InstMightUnwind = false;
              }
            }
            SCCMightUnwind |= InstMightUnwind;
          }
          if (CheckReturnViaAsm && !SCCMightReturn)
            if (auto ICS = ImmutableCallSite(&I))
              if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue()))
                if (IA->hasSideEffects())
                  SCCMightReturn = true;
        }

        if (SCCMightUnwind && SCCMightReturn)
          break;
      }
    }
  }

  // If the SCC doesn't unwind or doesn't throw, note this fact.
  if (!SCCMightUnwind || !SCCMightReturn)
    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
      AttrBuilder NewAttributes;

      if (!SCCMightUnwind)
        NewAttributes.addAttribute(Attribute::NoUnwind);
      if (!SCCMightReturn)
        NewAttributes.addAttribute(Attribute::NoReturn);

      Function *F = (*I)->getFunction();
      const AttributeSet &PAL = F->getAttributes().getFnAttributes();
      const AttributeSet &NPAL = AttributeSet::get(
          F->getContext(), AttributeSet::FunctionIndex, NewAttributes);

      if (PAL != NPAL) {
        MadeChange = true;
        F->addAttributes(AttributeSet::FunctionIndex, NPAL);
      }
    }

  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    // Convert any invoke instructions to non-throwing functions in this node
    // into call instructions with a branch.  This makes the exception blocks
    // dead.
    if (Function *F = (*I)->getFunction())
      MadeChange |= SimplifyFunction(F);
  }

  return MadeChange;
}
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
  AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);

  std::vector<GlobalAlias*> AliasesToRemove;

  SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
  SmallPtrSet<Function *, 8> FuncsToNoInline;

  for (GlobalAlias &A : M.aliases()) {
    if (Function* F = dyn_cast<Function>(A.getAliasee())) {
      A.replaceAllUsesWith(F);
      AliasesToRemove.push_back(&A);
    }

    // FIXME: If the aliasee isn't a function, it's some kind of constant expr
    // cast that won't be inlined through.
  }

  if (GlobalOpt) {
    for (GlobalAlias* A : AliasesToRemove) {
      A->eraseFromParent();
    }
  }

  // Always force inlining of any function that uses an LDS global address. This
  // is something of a workaround because we don't have a way of supporting LDS
  // objects defined in functions. LDS is always allocated by a kernel, and it
  // is difficult to manage LDS usage if a function may be used by multiple
  // kernels.
  //
  // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
  // should only appear when IPO passes manages to move LDs defined in a kernel
  // into a single user function.

  for (GlobalVariable &GV : M.globals()) {
    // TODO: Region address
    unsigned AS = GV.getType()->getAddressSpace();
    if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
      continue;

    recursivelyVisitUsers(GV, FuncsToAlwaysInline);
  }

  if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
    auto IncompatAttr
      = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;

    for (Function &F : M) {
      if (!F.isDeclaration() && !F.use_empty() &&
          !F.hasFnAttribute(IncompatAttr)) {
        if (StressCalls) {
          if (!FuncsToAlwaysInline.count(&F))
            FuncsToNoInline.insert(&F);
        } else
          FuncsToAlwaysInline.insert(&F);
      }
    }
  }

  for (Function *F : FuncsToAlwaysInline)
    F->addFnAttr(Attribute::AlwaysInline);

  for (Function *F : FuncsToNoInline)
    F->addFnAttr(Attribute::NoInline);

  return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
}
Esempio n. 11
0
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
        Loop *TheLoop, bool HasFunNoNaNAttr,
        RecurrenceDescriptor &RedDes) {
    if (Phi->getNumIncomingValues() != 2)
        return false;

    // Reduction variables are only found in the loop header block.
    if (Phi->getParent() != TheLoop->getHeader())
        return false;

    // Obtain the reduction start value from the value that comes from the loop
    // preheader.
    Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());

    // ExitInstruction is the single value which is used outside the loop.
    // We only allow for a single reduction value to be used outside the loop.
    // This includes users of the reduction, variables (which form a cycle
    // which ends in the phi node).
    Instruction *ExitInstruction = nullptr;
    // Indicates that we found a reduction operation in our scan.
    bool FoundReduxOp = false;

    // We start with the PHI node and scan for all of the users of this
    // instruction. All users must be instructions that can be used as reduction
    // variables (such as ADD). We must have a single out-of-block user. The cycle
    // must include the original PHI.
    bool FoundStartPHI = false;

    // To recognize min/max patterns formed by a icmp select sequence, we store
    // the number of instruction we saw from the recognized min/max pattern,
    //  to make sure we only see exactly the two instructions.
    unsigned NumCmpSelectPatternInst = 0;
    InstDesc ReduxDesc(false, nullptr);

    // Data used for determining if the recurrence has been type-promoted.
    Type *RecurrenceType = Phi->getType();
    SmallPtrSet<Instruction *, 4> CastInsts;
    Instruction *Start = Phi;
    bool IsSigned = false;

    SmallPtrSet<Instruction *, 8> VisitedInsts;
    SmallVector<Instruction *, 8> Worklist;

    // Return early if the recurrence kind does not match the type of Phi. If the
    // recurrence kind is arithmetic, we attempt to look through AND operations
    // resulting from the type promotion performed by InstCombine.  Vector
    // operations are not limited to the legal integer widths, so we may be able
    // to evaluate the reduction in the narrower width.
    if (RecurrenceType->isFloatingPointTy()) {
        if (!isFloatingPointRecurrenceKind(Kind))
            return false;
    } else {
        if (!isIntegerRecurrenceKind(Kind))
            return false;
        if (isArithmeticRecurrenceKind(Kind))
            Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
    }

    Worklist.push_back(Start);
    VisitedInsts.insert(Start);

    // A value in the reduction can be used:
    //  - By the reduction:
    //      - Reduction operation:
    //        - One use of reduction value (safe).
    //        - Multiple use of reduction value (not safe).
    //      - PHI:
    //        - All uses of the PHI must be the reduction (safe).
    //        - Otherwise, not safe.
    //  - By one instruction outside of the loop (safe).
    //  - By further instructions outside of the loop (not safe).
    //  - By an instruction that is not part of the reduction (not safe).
    //    This is either:
    //      * An instruction type other than PHI or the reduction operation.
    //      * A PHI in the header other than the initial PHI.
    while (!Worklist.empty()) {
        Instruction *Cur = Worklist.back();
        Worklist.pop_back();

        // No Users.
        // If the instruction has no users then this is a broken chain and can't be
        // a reduction variable.
        if (Cur->use_empty())
            return false;

        bool IsAPhi = isa<PHINode>(Cur);

        // A header PHI use other than the original PHI.
        if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
            return false;

        // Reductions of instructions such as Div, and Sub is only possible if the
        // LHS is the reduction variable.
        if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
                !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
                !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
            return false;

        // Any reduction instruction must be of one of the allowed kinds. We ignore
        // the starting value (the Phi or an AND instruction if the Phi has been
        // type-promoted).
        if (Cur != Start) {
            ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
            if (!ReduxDesc.isRecurrence())
                return false;
        }

        // A reduction operation must only have one use of the reduction value.
        if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
                hasMultipleUsesOf(Cur, VisitedInsts))
            return false;

        // All inputs to a PHI node must be a reduction value.
        if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
            return false;

        if (Kind == RK_IntegerMinMax &&
                (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
            ++NumCmpSelectPatternInst;
        if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
            ++NumCmpSelectPatternInst;

        // Check  whether we found a reduction operator.
        FoundReduxOp |= !IsAPhi && Cur != Start;

        // Process users of current instruction. Push non-PHI nodes after PHI nodes
        // onto the stack. This way we are going to have seen all inputs to PHI
        // nodes once we get to them.
        SmallVector<Instruction *, 8> NonPHIs;
        SmallVector<Instruction *, 8> PHIs;
        for (User *U : Cur->users()) {
            Instruction *UI = cast<Instruction>(U);

            // Check if we found the exit user.
            BasicBlock *Parent = UI->getParent();
            if (!TheLoop->contains(Parent)) {
                // Exit if you find multiple outside users or if the header phi node is
                // being used. In this case the user uses the value of the previous
                // iteration, in which case we would loose "VF-1" iterations of the
                // reduction operation if we vectorize.
                if (ExitInstruction != nullptr || Cur == Phi)
                    return false;

                // The instruction used by an outside user must be the last instruction
                // before we feed back to the reduction phi. Otherwise, we loose VF-1
                // operations on the value.
                if (!is_contained(Phi->operands(), Cur))
                    return false;

                ExitInstruction = Cur;
                continue;
            }

            // Process instructions only once (termination). Each reduction cycle
            // value must only be used once, except by phi nodes and min/max
            // reductions which are represented as a cmp followed by a select.
            InstDesc IgnoredVal(false, nullptr);
            if (VisitedInsts.insert(UI).second) {
                if (isa<PHINode>(UI))
                    PHIs.push_back(UI);
                else
                    NonPHIs.push_back(UI);
            } else if (!isa<PHINode>(UI) &&
                       ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
                         !isa<SelectInst>(UI)) ||
                        !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
                return false;

            // Remember that we completed the cycle.
            if (UI == Phi)
                FoundStartPHI = true;
        }
        Worklist.append(PHIs.begin(), PHIs.end());
        Worklist.append(NonPHIs.begin(), NonPHIs.end());
    }

    // This means we have seen one but not the other instruction of the
    // pattern or more than just a select and cmp.
    if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
            NumCmpSelectPatternInst != 2)
        return false;

    if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
        return false;

    // If we think Phi may have been type-promoted, we also need to ensure that
    // all source operands of the reduction are either SExtInsts or ZEstInsts. If
    // so, we will be able to evaluate the reduction in the narrower bit width.
    if (Start != Phi)
        if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
                                    IsSigned, VisitedInsts, CastInsts))
            return false;

    // We found a reduction var if we have reached the original phi node and we
    // only have a single instruction with out-of-loop users.

    // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
    // is saved as part of the RecurrenceDescriptor.

    // Save the description of this reduction variable.
    RecurrenceDescriptor RD(
        RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
        ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
    RedDes = RD;

    return true;
}
Esempio n. 12
0
/// findValueImpl - Implementation helper for findValue.
Value *Lint::findValueImpl(Value *V, bool OffsetOk,
                           SmallPtrSetImpl<Value *> &Visited) const {
  // Detect self-referential values.
  if (!Visited.insert(V).second)
    return UndefValue::get(V->getType());

  // TODO: Look through sext or zext cast, when the result is known to
  // be interpreted as signed or unsigned, respectively.
  // TODO: Look through eliminable cast pairs.
  // TODO: Look through calls with unique return values.
  // TODO: Look through vector insert/extract/shuffle.
  V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
  if (LoadInst *L = dyn_cast<LoadInst>(V)) {
    BasicBlock::iterator BBI = L->getIterator();
    BasicBlock *BB = L->getParent();
    SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
    for (;;) {
      if (!VisitedBlocks.insert(BB).second)
        break;
      if (Value *U =
          FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA))
        return findValueImpl(U, OffsetOk, Visited);
      if (BBI != BB->begin()) break;
      BB = BB->getUniquePredecessor();
      if (!BB) break;
      BBI = BB->end();
    }
  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
    if (Value *W = PN->hasConstantValue())
      if (W != V)
        return findValueImpl(W, OffsetOk, Visited);
  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
    if (CI->isNoopCast(*DL))
      return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
  } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
    if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
                                     Ex->getIndices()))
      if (W != V)
        return findValueImpl(W, OffsetOk, Visited);
  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
    // Same as above, but for ConstantExpr instead of Instruction.
    if (Instruction::isCast(CE->getOpcode())) {
      if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
                               CE->getOperand(0)->getType(), CE->getType(),
                               DL->getIntPtrType(V->getType())))
        return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
    } else if (CE->getOpcode() == Instruction::ExtractValue) {
      ArrayRef<unsigned> Indices = CE->getIndices();
      if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
        if (W != V)
          return findValueImpl(W, OffsetOk, Visited);
    }
  }

  // As a last resort, try SimplifyInstruction or constant folding.
  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
    if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
      return findValueImpl(W, OffsetOk, Visited);
  } else if (auto *C = dyn_cast<Constant>(V)) {
    if (Value *W = ConstantFoldConstant(C, *DL, TLI))
      if (W && W != V)
        return findValueImpl(W, OffsetOk, Visited);
  }

  return V;
}
Esempio n. 13
0
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
                             SmallPtrSet<const PHINode *, 16> &PhiUsers) {
  for (const Use &U : V->uses()) {
    const User *UR = U.getUser();
    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
      GS.HasNonInstructionUser = true;

      // If the result of the constantexpr isn't pointer type, then we won't
      // know to expect it in various places.  Just reject early.
      if (!isa<PointerType>(CE->getType()))
        return true;

      if (analyzeGlobalAux(CE, GS, PhiUsers))
        return true;
    } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
      if (!GS.HasMultipleAccessingFunctions) {
        if (I->getParent() && I->getParent()->getParent()) {
          const Function *F = I->getParent()->getParent();

          if (!GS.AccessingFunction)
            GS.AccessingFunction = F;
          else if (GS.AccessingFunction != F)
            GS.HasMultipleAccessingFunctions = true;
        }
      }
      if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
        GS.IsLoaded = true;
        // Don't hack on volatile loads.
        if (LI->isVolatile())
          return true;
        GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
      } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
        // Don't allow a store OF the address, only stores TO the address.
        if (SI->getOperand(0) == V)
          return true;

        // Don't hack on volatile stores.
        if (SI->isVolatile())
          return true;

        GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());

        // If this is a direct store to the global (i.e., the global is a scalar
        // value, not an aggregate), keep more specific information about
        // stores.
        if (GS.StoredType != GlobalStatus::Stored) {
          if (const GlobalVariable *GV =
                  dyn_cast<GlobalVariable>(SI->getOperand(1))) {
            Value *StoredVal = SI->getOperand(0);

            if (Constant *C = dyn_cast<Constant>(StoredVal)) {
              if (C->isThreadDependent()) {
                // The stored value changes between threads; don't track it.
                return true;
              }
            }

            if (StoredVal == GV->getInitializer()) {
              if (GS.StoredType < GlobalStatus::InitializerStored)
                GS.StoredType = GlobalStatus::InitializerStored;
            } else if (isa<LoadInst>(StoredVal) &&
                       cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
              if (GS.StoredType < GlobalStatus::InitializerStored)
                GS.StoredType = GlobalStatus::InitializerStored;
            } else if (GS.StoredType < GlobalStatus::StoredOnce) {
              GS.StoredType = GlobalStatus::StoredOnce;
              GS.StoredOnceValue = StoredVal;
            } else if (GS.StoredType == GlobalStatus::StoredOnce &&
                       GS.StoredOnceValue == StoredVal) {
              // noop.
            } else {
              GS.StoredType = GlobalStatus::Stored;
            }
          } else {
            GS.StoredType = GlobalStatus::Stored;
          }
        }
      } else if (isa<BitCastInst>(I)) {
        if (analyzeGlobalAux(I, GS, PhiUsers))
          return true;
      } else if (isa<GetElementPtrInst>(I)) {
        if (analyzeGlobalAux(I, GS, PhiUsers))
          return true;
      } else if (isa<SelectInst>(I)) {
        if (analyzeGlobalAux(I, GS, PhiUsers))
          return true;
      } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
        // PHI nodes we can check just like select or GEP instructions, but we
        // have to be careful about infinite recursion.
        if (PhiUsers.insert(PN)) // Not already visited.
          if (analyzeGlobalAux(I, GS, PhiUsers))
            return true;
      } else if (isa<CmpInst>(I)) {
        GS.IsCompared = true;
      } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
        if (MTI->isVolatile())
          return true;
        if (MTI->getArgOperand(0) == V)
          GS.StoredType = GlobalStatus::Stored;
        if (MTI->getArgOperand(1) == V)
          GS.IsLoaded = true;
      } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
        assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
        if (MSI->isVolatile())
          return true;
        GS.StoredType = GlobalStatus::Stored;
      } else if (ImmutableCallSite C = I) {
        if (!C.isCallee(&U))
          return true;
        GS.IsLoaded = true;
      } else {
        return true; // Any other non-load instruction might take address!
      }
    } else if (const Constant *C = dyn_cast<Constant>(UR)) {
      GS.HasNonInstructionUser = true;
      // We might have a dead and dangling constant hanging off of here.
      if (!isSafeToDestroyConstant(C))
        return true;
    } else {
      GS.HasNonInstructionUser = true;
      // Otherwise must be some other user.
      return true;
    }
  }

  return false;
}
Esempio n. 14
0
/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
/// Emit code to ensure constants are copied into registers when needed.
/// Remember the virtual registers that need to be added to the Machine PHI
/// nodes as input.  We cannot just directly add them, because expansion
/// might result in multiple MBB's for one BB.  As such, the start of the
/// BB might correspond to a different MBB than the end.
bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
  const TerminatorInst *TI = LLVMBB->getTerminator();

  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
  unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();

  // Check successor nodes' PHI nodes that expect a constant to be available
  // from this block.
  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
    const BasicBlock *SuccBB = TI->getSuccessor(succ);
    if (!isa<PHINode>(SuccBB->begin())) continue;
    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];

    // If this terminator has multiple identical successors (common for
    // switches), only handle each succ once.
    if (!SuccsHandled.insert(SuccMBB)) continue;

    MachineBasicBlock::iterator MBBI = SuccMBB->begin();

    // At this point we know that there is a 1-1 correspondence between LLVM PHI
    // nodes and Machine PHI nodes, but the incoming operands have not been
    // emitted yet.
    for (BasicBlock::const_iterator I = SuccBB->begin();
         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {

      // Ignore dead phi's.
      if (PN->use_empty()) continue;

      // Only handle legal types. Two interesting things to note here. First,
      // by bailing out early, we may leave behind some dead instructions,
      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
      // own moves. Second, this check is necessary because FastISel doesn't
      // use CreateRegs to create registers, so it always creates
      // exactly one register for each non-void instruction.
      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
        // Promote MVT::i1.
        if (VT == MVT::i1)
          VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
        else {
          FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
          return false;
        }
      }

      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);

      // Set the DebugLoc for the copy. Prefer the location of the operand
      // if there is one; use the location of the PHI otherwise.
      DL = PN->getDebugLoc();
      if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
        DL = Inst->getDebugLoc();

      unsigned Reg = getRegForValue(PHIOp);
      if (Reg == 0) {
        FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
        return false;
      }
      FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
      DL = DebugLoc();
    }
  }

  return true;
}
Esempio n. 15
0
static bool markAliveBlocks(BasicBlock *BB,
                            SmallPtrSet<BasicBlock*, 128> &Reachable) {

  SmallVector<BasicBlock*, 128> Worklist;
  Worklist.push_back(BB);
  Reachable.insert(BB);
  bool Changed = false;
  do {
    BB = Worklist.pop_back_val();

    // Do a quick scan of the basic block, turning any obviously unreachable
    // instructions into LLVM unreachable insts.  The instruction combining pass
    // canonicalizes unreachable insts into stores to null or undef.
    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
        if (CI->doesNotReturn()) {
          // If we found a call to a no-return function, insert an unreachable
          // instruction after it.  Make sure there isn't *already* one there
          // though.
          ++BBI;
          if (!isa<UnreachableInst>(BBI)) {
            // Don't insert a call to llvm.trap right before the unreachable.
            changeToUnreachable(BBI, false);
            Changed = true;
          }
          break;
        }
      }

      // Store to undef and store to null are undefined and used to signal that
      // they should be changed to unreachable by passes that can't modify the
      // CFG.
      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
        // Don't touch volatile stores.
        if (SI->isVolatile()) continue;

        Value *Ptr = SI->getOperand(1);

        if (isa<UndefValue>(Ptr) ||
            (isa<ConstantPointerNull>(Ptr) &&
             SI->getPointerAddressSpace() == 0)) {
          changeToUnreachable(SI, true);
          Changed = true;
          break;
        }
      }
    }

    // Turn invokes that call 'nounwind' functions into ordinary calls.
    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
      Value *Callee = II->getCalledValue();
      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
        changeToUnreachable(II, true);
        Changed = true;
      } else if (II->doesNotThrow()) {
        if (II->use_empty() && II->onlyReadsMemory()) {
          // jump to the normal destination branch.
          BranchInst::Create(II->getNormalDest(), II);
          II->getUnwindDest()->removePredecessor(II->getParent());
          II->eraseFromParent();
        } else
          changeToCall(II);
        Changed = true;
      }
    }

    Changed |= ConstantFoldTerminator(BB, true);
    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
      if (Reachable.insert(*SI))
        Worklist.push_back(*SI);
  } while (!Worklist.empty());
  return Changed;
}
Esempio n. 16
0
/// Some initialization for the rename part
///
void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) {
  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
       E = value.end(); I != E; ++I)
    value_stack[*I].push_back(*I);
}
Esempio n. 17
0
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
  CallGraph &CG = getAnalysis<CallGraph>();
  const TargetData *TD = getAnalysisIfAvailable<TargetData>();

  SmallPtrSet<Function*, 8> SCCFunctions;
  DEBUG(dbgs() << "Inliner visiting SCC:");
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    Function *F = (*I)->getFunction();
    if (F) SCCFunctions.insert(F);
    DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
  }

  // Scan through and identify all call sites ahead of time so that we only
  // inline call sites in the original functions, not call sites that result
  // from inlining other functions.
  SmallVector<std::pair<CallSite, int>, 16> CallSites;
  
  // When inlining a callee produces new call sites, we want to keep track of
  // the fact that they were inlined from the callee.  This allows us to avoid
  // infinite inlining in some obscure cases.  To represent this, we use an
  // index into the InlineHistory vector.
  SmallVector<std::pair<Function*, int>, 8> InlineHistory;

  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    Function *F = (*I)->getFunction();
    if (!F) continue;
    
    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
        CallSite CS(cast<Value>(I));
        // If this isn't a call, or it is a call to an intrinsic, it can
        // never be inlined.
        if (!CS || isa<IntrinsicInst>(I))
          continue;
        
        // If this is a direct call to an external function, we can never inline
        // it.  If it is an indirect call, inlining may resolve it to be a
        // direct call, so we keep it.
        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
          continue;
        
        CallSites.push_back(std::make_pair(CS, -1));
      }
  }

  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");

  // If there are no calls in this function, exit early.
  if (CallSites.empty())
    return false;
  
  // Now that we have all of the call sites, move the ones to functions in the
  // current SCC to the end of the list.
  unsigned FirstCallInSCC = CallSites.size();
  for (unsigned i = 0; i < FirstCallInSCC; ++i)
    if (Function *F = CallSites[i].first.getCalledFunction())
      if (SCCFunctions.count(F))
        std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);

  
  InlinedArrayAllocasTy InlinedArrayAllocas;
  InlineFunctionInfo InlineInfo(&CG, TD);
  
  // Now that we have all of the call sites, loop over them and inline them if
  // it looks profitable to do so.
  bool Changed = false;
  bool LocalChange;
  do {
    LocalChange = false;
    // Iterate over the outer loop because inlining functions can cause indirect
    // calls to become direct calls.
    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
      CallSite CS = CallSites[CSi].first;
      
      Function *Caller = CS.getCaller();
      Function *Callee = CS.getCalledFunction();

      // If this call site is dead and it is to a readonly function, we should
      // just delete the call instead of trying to inline it, regardless of
      // size.  This happens because IPSCCP propagates the result out of the
      // call and then we're left with the dead call.
      if (isInstructionTriviallyDead(CS.getInstruction())) {
        DEBUG(dbgs() << "    -> Deleting dead call: "
                     << *CS.getInstruction() << "\n");
        // Update the call graph by deleting the edge from Callee to Caller.
        CG[Caller]->removeCallEdgeFor(CS);
        CS.getInstruction()->eraseFromParent();
        ++NumCallsDeleted;
        // Update the cached cost info with the missing call
        growCachedCostInfo(Caller, NULL);
      } else {
        // We can only inline direct calls to non-declarations.
        if (Callee == 0 || Callee->isDeclaration()) continue;
      
        // If this call site was obtained by inlining another function, verify
        // that the include path for the function did not include the callee
        // itself.  If so, we'd be recursively inlining the same function,
        // which would provide the same callsites, which would cause us to
        // infinitely inline.
        int InlineHistoryID = CallSites[CSi].second;
        if (InlineHistoryID != -1 &&
            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
          continue;
        
        
        // If the policy determines that we should inline this function,
        // try to do so.
        if (!shouldInline(CS))
          continue;

        // Attempt to inline the function.
        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
                                  InlineHistoryID))
          continue;
        ++NumInlined;
        
        // If inlining this function gave us any new call sites, throw them
        // onto our worklist to process.  They are useful inline candidates.
        if (!InlineInfo.InlinedCalls.empty()) {
          // Create a new inline history entry for this, so that we remember
          // that these new callsites came about due to inlining Callee.
          int NewHistoryID = InlineHistory.size();
          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));

          for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
               i != e; ++i) {
            Value *Ptr = InlineInfo.InlinedCalls[i];
            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
          }
        }
        
        // Update the cached cost info with the inlined call.
        growCachedCostInfo(Caller, Callee);
      }
      
      // If we inlined or deleted the last possible call site to the function,
      // delete the function body now.
      if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
          // TODO: Can remove if in SCC now.
          !SCCFunctions.count(Callee) &&
          
          // The function may be apparently dead, but if there are indirect
          // callgraph references to the node, we cannot delete it yet, this
          // could invalidate the CGSCC iterator.
          CG[Callee]->getNumReferences() == 0) {
        DEBUG(dbgs() << "    -> Deleting dead function: "
              << Callee->getName() << "\n");
        CallGraphNode *CalleeNode = CG[Callee];
        
        // Remove any call graph edges from the callee to its callees.
        CalleeNode->removeAllCalledFunctions();
        
        resetCachedCostInfo(Callee);
        
        // Removing the node for callee from the call graph and delete it.
        delete CG.removeFunctionFromModule(CalleeNode);
        ++NumDeleted;
      }

      // Remove this call site from the list.  If possible, use 
      // swap/pop_back for efficiency, but do not use it if doing so would
      // move a call site to a function in this SCC before the
      // 'FirstCallInSCC' barrier.
      if (SCC.isSingular()) {
        CallSites[CSi] = CallSites.back();
        CallSites.pop_back();
      } else {
        CallSites.erase(CallSites.begin()+CSi);
      }
      --CSi;

      Changed = true;
      LocalChange = true;
    }
  } while (LocalChange);

  return Changed;
}
Esempio n. 18
0
/// Renames all variables in the specified BasicBlock.
/// Only variables that need to be rename will be.
///
void SSI::rename(BasicBlock *BB) {
  SmallPtrSet<Instruction*, 8> defined;

  // Iterate through instructions and make appropriate renaming.
  // For SSI_PHI (b = PHI()), store b at value_stack as a new
  // definition of the variable it represents.
  // For SSI_SIG (b = PHI(a)), substitute a with the current
  // value of a, present in the value_stack.
  // Then store bin the value_stack as the new definition of a.
  // For all other instructions (b = OP(a, c, d, ...)), we need to substitute
  // all operands with its current value, present in value_stack.
  for (BasicBlock::iterator begin = BB->begin(), end = BB->end();
       begin != end; ++begin) {
    Instruction *I = begin;
    if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
      Instruction* position;

      // Treat SSI_PHI
      if ((position = getPositionPhi(PN))) {
        value_stack[position].push_back(PN);
        defined.insert(position);
      // Treat SSI_SIG
      } else if ((position = getPositionSigma(PN))) {
        substituteUse(I);
        value_stack[position].push_back(PN);
        defined.insert(position);
      }

      // Treat all other PHI functions
      else {
        substituteUse(I);
      }
    }

    // Treat all other functions
    else {
      substituteUse(I);
    }
  }

  // This loop iterates in all BasicBlocks that are successors of the current
  // BasicBlock. For each SSI_PHI instruction found, insert an operand.
  // This operand is the current operand in value_stack for the variable
  // in "position". And the BasicBlock this operand represents is the current
  // BasicBlock.
  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) {
    BasicBlock *BB_succ = *SI;

    for (BasicBlock::iterator begin = BB_succ->begin(),
         notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
      Instruction *I = begin;
      PHINode *PN = dyn_cast<PHINode>(I);
      Instruction* position;
      if (PN && ((position = getPositionPhi(PN)))) {
        PN->addIncoming(value_stack[position].back(), BB);
      }
    }
  }

  // This loop calls rename on all children from this block. This time children
  // refers to a successor block in the dominance tree.
  DomTreeNode *DTN = DT_->getNode(BB);
  for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end();
       begin != end; ++begin) {
    DomTreeNodeBase<BasicBlock> *DTN_children = *begin;
    BasicBlock *BB_children = DTN_children->getBlock();
    rename(BB_children);
  }

  // Now we remove all inserted definitions of a variable from the top of
  // the stack leaving the previous one as the top.
  for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
       DE = defined.end(); DI != DE; ++DI)
    value_stack[*DI].pop_back();
}
Esempio n. 19
0
static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI,
                             AssumptionCache *AC,
                             const TargetLibraryInfo *TLI) {
  SmallVector<BasicBlock *, 8> ExitBlocks;
  L->getUniqueExitBlocks(ExitBlocks);
  array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());

  SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;

  // The bit we are stealing from the pointer represents whether this basic
  // block is the header of a subloop, in which case we only process its phis.
  typedef PointerIntPair<BasicBlock *, 1> WorklistItem;
  SmallVector<WorklistItem, 16> VisitStack;
  SmallPtrSet<BasicBlock *, 32> Visited;

  bool Changed = false;
  bool LocalChanged;
  do {
    LocalChanged = false;

    VisitStack.clear();
    Visited.clear();

    VisitStack.push_back(WorklistItem(L->getHeader(), false));

    while (!VisitStack.empty()) {
      WorklistItem Item = VisitStack.pop_back_val();
      BasicBlock *BB = Item.getPointer();
      bool IsSubloopHeader = Item.getInt();
      const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();

      // Simplify instructions in the current basic block.
      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
        Instruction *I = &*BI++;

        // The first time through the loop ToSimplify is empty and we try to
        // simplify all instructions. On later iterations ToSimplify is not
        // empty and we only bother simplifying instructions that are in it.
        if (!ToSimplify->empty() && !ToSimplify->count(I))
          continue;

        // Don't bother simplifying unused instructions.
        if (!I->use_empty()) {
          Value *V = SimplifyInstruction(I, DL, TLI, DT, AC);
          if (V && LI->replacementPreservesLCSSAForm(I, V)) {
            // Mark all uses for resimplification next time round the loop.
            for (User *U : I->users())
              Next->insert(cast<Instruction>(U));

            I->replaceAllUsesWith(V);
            LocalChanged = true;
            ++NumSimplified;
          }
        }
        if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) {
          // RecursivelyDeleteTriviallyDeadInstruction can remove more than one
          // instruction, so simply incrementing the iterator does not work.
          // When instructions get deleted re-iterate instead.
          BI = BB->begin();
          BE = BB->end();
          LocalChanged = true;
        }

        if (IsSubloopHeader && !isa<PHINode>(I))
          break;
      }

      // Add all successors to the worklist, except for loop exit blocks and the
      // bodies of subloops. We visit the headers of loops so that we can
      // process
      // their phis, but we contract the rest of the subloop body and only
      // follow
      // edges leading back to the original loop.
      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
           ++SI) {
        BasicBlock *SuccBB = *SI;
        if (!Visited.insert(SuccBB).second)
          continue;

        const Loop *SuccLoop = LI->getLoopFor(SuccBB);
        if (SuccLoop && SuccLoop->getHeader() == SuccBB &&
            L->contains(SuccLoop)) {
          VisitStack.push_back(WorklistItem(SuccBB, true));

          SmallVector<BasicBlock *, 8> SubLoopExitBlocks;
          SuccLoop->getExitBlocks(SubLoopExitBlocks);

          for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
            BasicBlock *ExitBB = SubLoopExitBlocks[i];
            if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB).second)
              VisitStack.push_back(WorklistItem(ExitBB, false));
          }

          continue;
        }

        bool IsExitBlock =
            std::binary_search(ExitBlocks.begin(), ExitBlocks.end(), SuccBB);
        if (IsExitBlock)
          continue;

        VisitStack.push_back(WorklistItem(SuccBB, false));
      }
    }

    // Place the list of instructions to simplify on the next loop iteration
    // into ToSimplify.
    std::swap(ToSimplify, Next);
    Next->clear();

    Changed |= LocalChanged;
  } while (LocalChanged);

  return Changed;
}
Esempio n. 20
0
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
  MF = &mf;
  MRI = &mf.getRegInfo();
  TRI = MF->getTarget().getRegisterInfo();

  ReservedRegisters = TRI->getReservedRegs(mf);

  unsigned NumRegs = TRI->getNumRegs();
  PhysRegDef  = new MachineInstr*[NumRegs];
  PhysRegUse  = new MachineInstr*[NumRegs];
  PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
  std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
  std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
  PHIJoins.clear();

  // FIXME: LiveIntervals will be updated to remove its dependence on
  // LiveVariables to improve compilation time and eliminate bizarre pass
  // dependencies. Until then, we can't change much in -O0.
  if (!MRI->isSSA())
    report_fatal_error("regalloc=... not currently supported with -O0");

  analyzePHINodes(mf);

  // Calculate live variable information in depth first order on the CFG of the
  // function.  This guarantees that we will see the definition of a virtual
  // register before its uses due to dominance properties of SSA (except for PHI
  // nodes, which are treated as a special case).
  MachineBasicBlock *Entry = MF->begin();
  SmallPtrSet<MachineBasicBlock*,16> Visited;

  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
       DFI != E; ++DFI) {
    MachineBasicBlock *MBB = *DFI;

    // Mark live-in registers as live-in.
    SmallVector<unsigned, 4> Defs;
    for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
           EE = MBB->livein_end(); II != EE; ++II) {
      assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
             "Cannot have a live-in virtual register!");
      HandlePhysRegDef(*II, 0, Defs);
    }

    // Loop over all of the instructions, processing them.
    DistanceMap.clear();
    unsigned Dist = 0;
    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
         I != E; ++I) {
      MachineInstr *MI = I;
      if (MI->isDebugValue())
        continue;
      DistanceMap.insert(std::make_pair(MI, Dist++));

      // Process all of the operands of the instruction...
      unsigned NumOperandsToProcess = MI->getNumOperands();

      // Unless it is a PHI node.  In this case, ONLY process the DEF, not any
      // of the uses.  They will be handled in other basic blocks.
      if (MI->isPHI())
        NumOperandsToProcess = 1;

      // Clear kill and dead markers. LV will recompute them.
      SmallVector<unsigned, 4> UseRegs;
      SmallVector<unsigned, 4> DefRegs;
      SmallVector<unsigned, 1> RegMasks;
      for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
        MachineOperand &MO = MI->getOperand(i);
        if (MO.isRegMask()) {
          RegMasks.push_back(i);
          continue;
        }
        if (!MO.isReg() || MO.getReg() == 0)
          continue;
        unsigned MOReg = MO.getReg();
        if (MO.isUse()) {
          MO.setIsKill(false);
          UseRegs.push_back(MOReg);
        } else /*MO.isDef()*/ {
          MO.setIsDead(false);
          DefRegs.push_back(MOReg);
        }
      }

      // Process all uses.
      for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
        unsigned MOReg = UseRegs[i];
        if (TargetRegisterInfo::isVirtualRegister(MOReg))
          HandleVirtRegUse(MOReg, MBB, MI);
        else if (!ReservedRegisters[MOReg])
          HandlePhysRegUse(MOReg, MI);
      }

      // Process all masked registers. (Call clobbers).
      for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
        HandleRegMask(MI->getOperand(RegMasks[i]));

      // Process all defs.
      for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
        unsigned MOReg = DefRegs[i];
        if (TargetRegisterInfo::isVirtualRegister(MOReg))
          HandleVirtRegDef(MOReg, MI);
        else if (!ReservedRegisters[MOReg])
          HandlePhysRegDef(MOReg, MI, Defs);
      }
      UpdatePhysRegDefs(MI, Defs);
    }

    // Handle any virtual assignments from PHI nodes which might be at the
    // bottom of this basic block.  We check all of our successor blocks to see
    // if they have PHI nodes, and if so, we simulate an assignment at the end
    // of the current block.
    if (!PHIVarInfo[MBB->getNumber()].empty()) {
      SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];

      for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
             E = VarInfoVec.end(); I != E; ++I)
        // Mark it alive only in the block we are representing.
        MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
                                MBB);
    }

    // Finally, if the last instruction in the block is a return, make sure to
    // mark it as using all of the live-out values in the function.
    // Things marked both call and return are tail calls; do not do this for
    // them.  The tail callee need not take the same registers as input
    // that it produces as output, and there are dependencies for its input
    // registers elsewhere.
    if (!MBB->empty() && MBB->back().isReturn()
        && !MBB->back().isCall()) {
      MachineInstr *Ret = &MBB->back();

      for (MachineRegisterInfo::liveout_iterator
           I = MF->getRegInfo().liveout_begin(),
           E = MF->getRegInfo().liveout_end(); I != E; ++I) {
        assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
               "Cannot have a live-out virtual register!");
        HandlePhysRegUse(*I, Ret);

        // Add live-out registers as implicit uses.
        if (!Ret->readsRegister(*I))
          Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
      }
    }

    // MachineCSE may CSE instructions which write to non-allocatable physical
    // registers across MBBs. Remember if any reserved register is liveout.
    SmallSet<unsigned, 4> LiveOuts;
    for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
           SE = MBB->succ_end(); SI != SE; ++SI) {
      MachineBasicBlock *SuccMBB = *SI;
      if (SuccMBB->isLandingPad())
        continue;
      for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
             LE = SuccMBB->livein_end(); LI != LE; ++LI) {
        unsigned LReg = *LI;
        if (!TRI->isInAllocatableClass(LReg))
          // Ignore other live-ins, e.g. those that are live into landing pads.
          LiveOuts.insert(LReg);
      }
    }

    // Loop over PhysRegDef / PhysRegUse, killing any registers that are
    // available at the end of the basic block.
    for (unsigned i = 0; i != NumRegs; ++i)
      if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
        HandlePhysRegDef(i, 0, Defs);

    std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
    std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
  }

  // Convert and transfer the dead / killed information we have gathered into
  // VirtRegInfo onto MI's.
  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
    const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
    for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
      if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
        VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
      else
        VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
  }

  // Check to make sure there are no unreachable blocks in the MC CFG for the
  // function.  If so, it is due to a bug in the instruction selector or some
  // other part of the code generator if this happens.
#ifndef NDEBUG
  for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
    assert(Visited.count(&*i) != 0 && "unreachable basic block found");
#endif

  delete[] PhysRegDef;
  delete[] PhysRegUse;
  delete[] PHIVarInfo;

  return false;
}
Esempio n. 21
0
/// \brief Determine which blocks the value is live in.
///
/// These are blocks which lead to uses.  Knowing this allows us to avoid
/// inserting PHI nodes into blocks which don't lead to uses (thus, the
/// inserted phi nodes would be dead).
void PromoteMem2Reg::ComputeLiveInBlocks(
    AllocaInst *AI, AllocaInfo &Info,
    const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
    SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) {

  // To determine liveness, we must iterate through the predecessors of blocks
  // where the def is live.  Blocks are added to the worklist if we need to
  // check their predecessors.  Start with all the using blocks.
  SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
                                                    Info.UsingBlocks.end());

  // If any of the using blocks is also a definition block, check to see if the
  // definition occurs before or after the use.  If it happens before the use,
  // the value isn't really live-in.
  for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
    BasicBlock *BB = LiveInBlockWorklist[i];
    if (!DefBlocks.count(BB))
      continue;

    // Okay, this is a block that both uses and defines the value.  If the first
    // reference to the alloca is a def (store), then we know it isn't live-in.
    for (BasicBlock::iterator I = BB->begin();; ++I) {
      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
        if (SI->getOperand(1) != AI)
          continue;

        // We found a store to the alloca before a load.  The alloca is not
        // actually live-in here.
        LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
        LiveInBlockWorklist.pop_back();
        --i, --e;
        break;
      }

      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
        if (LI->getOperand(0) != AI)
          continue;

        // Okay, we found a load before a store to the alloca.  It is actually
        // live into this block.
        break;
      }
    }
  }

  // Now that we have a set of blocks where the phi is live-in, recursively add
  // their predecessors until we find the full region the value is live.
  while (!LiveInBlockWorklist.empty()) {
    BasicBlock *BB = LiveInBlockWorklist.pop_back_val();

    // The block really is live in here, insert it into the set.  If already in
    // the set, then it has already been processed.
    if (!LiveInBlocks.insert(BB))
      continue;

    // Since the value is live into BB, it is either defined in a predecessor or
    // live into it to.  Add the preds to the worklist unless they are a
    // defining block.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      BasicBlock *P = *PI;

      // The value is not live into a predecessor if it defines the value.
      if (DefBlocks.count(P))
        continue;

      // Otherwise it is, add to the worklist.
      LiveInBlockWorklist.push_back(P);
    }
  }
}
Esempio n. 22
0
const DominanceFrontier::DomSetType &
DominanceFrontier::calculate(const DominatorTree &DT,
                             const DomTreeNode *Node) {
  BasicBlock *BB = Node->getBlock();
  DomSetType *Result = NULL;

  std::vector<DFCalculateWorkObject> workList;
  SmallPtrSet<BasicBlock *, 32> visited;

  workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
  do {
    DFCalculateWorkObject *currentW = &workList.back();
    assert (currentW && "Missing work object.");

    BasicBlock *currentBB = currentW->currentBB;
    BasicBlock *parentBB = currentW->parentBB;
    const DomTreeNode *currentNode = currentW->Node;
    const DomTreeNode *parentNode = currentW->parentNode;
    assert (currentBB && "Invalid work object. Missing current Basic Block");
    assert (currentNode && "Invalid work object. Missing current Node");
    DomSetType &S = Frontiers[currentBB];

    // Visit each block only once.
    if (visited.count(currentBB) == 0) {
      visited.insert(currentBB);

      // Loop over CFG successors to calculate DFlocal[currentNode]
      for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
           SI != SE; ++SI) {
        // Does Node immediately dominate this successor?
        if (DT[*SI]->getIDom() != currentNode)
          S.insert(*SI);
      }
    }

    // At this point, S is DFlocal.  Now we union in DFup's of our children...
    // Loop through and visit the nodes that Node immediately dominates (Node's
    // children in the IDomTree)
    bool visitChild = false;
    for (DomTreeNode::const_iterator NI = currentNode->begin(), 
           NE = currentNode->end(); NI != NE; ++NI) {
      DomTreeNode *IDominee = *NI;
      BasicBlock *childBB = IDominee->getBlock();
      if (visited.count(childBB) == 0) {
        workList.push_back(DFCalculateWorkObject(childBB, currentBB,
                                                 IDominee, currentNode));
        visitChild = true;
      }
    }

    // If all children are visited or there is any child then pop this block
    // from the workList.
    if (!visitChild) {

      if (!parentBB) {
        Result = &S;
        break;
      }

      DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
      DomSetType &parentSet = Frontiers[parentBB];
      for (; CDFI != CDFE; ++CDFI) {
        if (!DT.properlyDominates(parentNode, DT[*CDFI]))
          parentSet.insert(*CDFI);
      }
      workList.pop_back();
    }

  } while (!workList.empty());

  return *Result;
}
Esempio n. 23
0
/// \brief Recursively traverse the CFG of the function, renaming loads and
/// stores to the allocas which we are promoting.
///
/// IncomingVals indicates what value each Alloca contains on exit from the
/// predecessor block Pred.
void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
                                RenamePassData::ValVector &IncomingVals,
                                std::vector<RenamePassData> &Worklist) {
NextIteration:
  // If we are inserting any phi nodes into this BB, they will already be in the
  // block.
  if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
    // If we have PHI nodes to update, compute the number of edges from Pred to
    // BB.
    if (PhiToAllocaMap.count(APN)) {
      // We want to be able to distinguish between PHI nodes being inserted by
      // this invocation of mem2reg from those phi nodes that already existed in
      // the IR before mem2reg was run.  We determine that APN is being inserted
      // because it is missing incoming edges.  All other PHI nodes being
      // inserted by this pass of mem2reg will have the same number of incoming
      // operands so far.  Remember this count.
      unsigned NewPHINumOperands = APN->getNumOperands();

      unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
      assert(NumEdges && "Must be at least one edge from Pred to BB!");

      // Add entries for all the phis.
      BasicBlock::iterator PNI = BB->begin();
      do {
        unsigned AllocaNo = PhiToAllocaMap[APN];

        // Add N incoming values to the PHI node.
        for (unsigned i = 0; i != NumEdges; ++i)
          APN->addIncoming(IncomingVals[AllocaNo], Pred);

        // The currently active variable for this block is now the PHI.
        IncomingVals[AllocaNo] = APN;

        // Get the next phi node.
        ++PNI;
        APN = dyn_cast<PHINode>(PNI);
        if (APN == 0)
          break;

        // Verify that it is missing entries.  If not, it is not being inserted
        // by this mem2reg invocation so we want to ignore it.
      } while (APN->getNumOperands() == NewPHINumOperands);
    }
  }

  // Don't revisit blocks.
  if (!Visited.insert(BB))
    return;

  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
    Instruction *I = II++; // get the instruction, increment iterator

    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
      if (!Src)
        continue;

      DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
      if (AI == AllocaLookup.end())
        continue;

      Value *V = IncomingVals[AI->second];

      // Anything using the load now uses the current value.
      LI->replaceAllUsesWith(V);
      if (AST && LI->getType()->isPointerTy())
        AST->deleteValue(LI);
      BB->getInstList().erase(LI);
    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
      // Delete this instruction and mark the name as the current holder of the
      // value
      AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
      if (!Dest)
        continue;

      DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
      if (ai == AllocaLookup.end())
        continue;

      // what value were we writing?
      IncomingVals[ai->second] = SI->getOperand(0);
      // Record debuginfo for the store before removing it.
      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
        ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
      BB->getInstList().erase(SI);
    }
  }

  // 'Recurse' to our successors.
  succ_iterator I = succ_begin(BB), E = succ_end(BB);
  if (I == E)
    return;

  // Keep track of the successors so we don't visit the same successor twice
  SmallPtrSet<BasicBlock *, 8> VisitedSuccs;

  // Handle the first successor without using the worklist.
  VisitedSuccs.insert(*I);
  Pred = BB;
  BB = *I;
  ++I;

  for (; I != E; ++I)
    if (VisitedSuccs.insert(*I))
      Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));

  goto NextIteration;
}
Esempio n. 24
0
File: LICM.cpp Progetto: CPFL/guc
/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop.  We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
/// which are loop invariant.
///
void LICM::PromoteAliasSet(AliasSet &AS) {
  // We can promote this alias set if it has a store, if it is a "Must" alias
  // set, if the pointer is loop invariant, and if we are not eliminating any
  // volatile loads or stores.
  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
      AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
    return;
  
  assert(!AS.empty() &&
         "Must alias set should have at least one pointer element in it!");
  Value *SomePtr = AS.begin()->getValue();

  // It isn't safe to promote a load/store from the loop if the load/store is
  // conditional.  For example, turning:
  //
  //    for () { if (c) *P += 1; }
  //
  // into:
  //
  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
  //
  // is not safe, because *P may only be valid to access if 'c' is true.
  // 
  // It is safe to promote P if all uses are direct load/stores and if at
  // least one is guaranteed to be executed.
  bool GuaranteedToExecute = false;
  
  SmallVector<Instruction*, 64> LoopUses;
  SmallPtrSet<Value*, 4> PointerMustAliases;

  // Check that all of the pointers in the alias set have the same type.  We
  // cannot (yet) promote a memory location that is loaded and stored in
  // different sizes.
  for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
    Value *ASIV = ASI->getValue();
    PointerMustAliases.insert(ASIV);
    
    // Check that all of the pointers in the alias set have the same type.  We
    // cannot (yet) promote a memory location that is loaded and stored in
    // different sizes.
    if (SomePtr->getType() != ASIV->getType())
      return;
    
    for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
         UI != UE; ++UI) {
      // Ignore instructions that are outside the loop.
      Instruction *Use = dyn_cast<Instruction>(*UI);
      if (!Use || !CurLoop->contains(Use))
        continue;
      
      // If there is an non-load/store instruction in the loop, we can't promote
      // it.
      if (isa<LoadInst>(Use))
        assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
      else if (isa<StoreInst>(Use)) {
        assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
        if (Use->getOperand(0) == ASIV) return;
      } else
        return; // Not a load or store.
      
      if (!GuaranteedToExecute)
        GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
      
      LoopUses.push_back(Use);
    }
  }
  
  // If there isn't a guaranteed-to-execute instruction, we can't promote.
  if (!GuaranteedToExecute)
    return;
  
  // Otherwise, this is safe to promote, lets do it!
  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
  Changed = true;
  ++NumPromoted;

  // We use the SSAUpdater interface to insert phi nodes as required.
  SmallVector<PHINode*, 16> NewPHIs;
  SSAUpdater SSA(&NewPHIs);
  
  // It wants to know some value of the same type as what we'll be inserting.
  Value *SomeValue;
  if (isa<LoadInst>(LoopUses[0]))
    SomeValue = LoopUses[0];
  else
    SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
  SSA.Initialize(SomeValue->getType(), SomeValue->getName());

  // First step: bucket up uses of the pointers by the block they occur in.
  // This is important because we have to handle multiple defs/uses in a block
  // ourselves: SSAUpdater is purely for cross-block references.
  // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
    Instruction *User = LoopUses[i];
    UsesByBlock[User->getParent()].push_back(User);
  }
  
  // Okay, now we can iterate over all the blocks in the loop with uses,
  // processing them.  Keep track of which loads are loading a live-in value.
  SmallVector<LoadInst*, 32> LiveInLoads;
  DenseMap<Value*, Value*> ReplacedLoads;
  
  for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
    Instruction *User = LoopUses[LoopUse];
    std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
    
    // If this block has already been processed, ignore this repeat use.
    if (BlockUses.empty()) continue;
    
    // Okay, this is the first use in the block.  If this block just has a
    // single user in it, we can rewrite it trivially.
    if (BlockUses.size() == 1) {
      // If it is a store, it is a trivial def of the value in the block.
      if (isa<StoreInst>(User)) {
        SSA.AddAvailableValue(User->getParent(),
                              cast<StoreInst>(User)->getOperand(0));
      } else {
        // Otherwise it is a load, queue it to rewrite as a live-in load.
        LiveInLoads.push_back(cast<LoadInst>(User));
      }
      BlockUses.clear();
      continue;
    }
    
    // Otherwise, check to see if this block is all loads.  If so, we can queue
    // them all as live in loads.
    bool HasStore = false;
    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
      if (isa<StoreInst>(BlockUses[i])) {
        HasStore = true;
        break;
      }
    }
    
    if (!HasStore) {
      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
      BlockUses.clear();
      continue;
    }

    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
    // Since SSAUpdater is purely for cross-block values, we need to determine
    // the order of these instructions in the block.  If the first use in the
    // block is a load, then it uses the live in value.  The last store defines
    // the live out value.  We handle this by doing a linear scan of the block.
    BasicBlock *BB = User->getParent();
    Value *StoredValue = 0;
    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
        // If this is a load from an unrelated pointer, ignore it.
        if (!PointerMustAliases.count(L->getOperand(0))) continue;

        // If we haven't seen a store yet, this is a live in use, otherwise
        // use the stored value.
        if (StoredValue) {
          L->replaceAllUsesWith(StoredValue);
          ReplacedLoads[L] = StoredValue;
        } else {
          LiveInLoads.push_back(L);
        }
        continue;
      }
      
      if (StoreInst *S = dyn_cast<StoreInst>(II)) {
        // If this is a store to an unrelated pointer, ignore it.
        if (!PointerMustAliases.count(S->getOperand(1))) continue;

        // Remember that this is the active value in the block.
        StoredValue = S->getOperand(0);
      }
    }
    
    // The last stored value that happened is the live-out for the block.
    assert(StoredValue && "Already checked that there is a store in block");
    SSA.AddAvailableValue(BB, StoredValue);
    BlockUses.clear();
  }
  
  // Now that all the intra-loop values are classified, set up the preheader.
  // It gets a load of the pointer we're promoting, and it is the live-out value
  // from the preheader.
  LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
                                         Preheader->getTerminator());
  SSA.AddAvailableValue(Preheader, PreheaderLoad);

  // Now that the preheader is good to go, set up the exit blocks.  Each exit
  // block gets a store of the live-out values that feed them.  Since we've
  // already told the SSA updater about the defs in the loop and the preheader
  // definition, it is all set and we can start using it.
  SmallVector<BasicBlock*, 8> ExitBlocks;
  CurLoop->getUniqueExitBlocks(ExitBlocks);
  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    BasicBlock *ExitBlock = ExitBlocks[i];
    Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
    Instruction *InsertPos = ExitBlock->getFirstNonPHI();
    new StoreInst(LiveInValue, SomePtr, InsertPos);
  }

  // Okay, now we rewrite all loads that use live-in values in the loop,
  // inserting PHI nodes as necessary.
  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
    LoadInst *ALoad = LiveInLoads[i];
    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
    ALoad->replaceAllUsesWith(NewVal);
    CurAST->copyValue(ALoad, NewVal);
    ReplacedLoads[ALoad] = NewVal;
  }
  
  // If the preheader load is itself a pointer, we need to tell alias analysis
  // about the new pointer we created in the preheader block and about any PHI
  // nodes that just got inserted.
  if (PreheaderLoad->getType()->isPointerTy()) {
    // Copy any value stored to or loaded from a must-alias of the pointer.
    CurAST->copyValue(SomeValue, PreheaderLoad);
    
    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
      CurAST->copyValue(SomeValue, NewPHIs[i]);
  }
  
  // Now that everything is rewritten, delete the old instructions from the body
  // of the loop.  They should all be dead now.
  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
    Instruction *User = LoopUses[i];
    
    // If this is a load that still has uses, then the load must have been added
    // as a live value in the SSAUpdate data structure for a block (e.g. because
    // the loaded value was stored later).  In this case, we need to recursively
    // propagate the updates until we get to the real value.
    if (!User->use_empty()) {
      Value *NewVal = ReplacedLoads[User];
      assert(NewVal && "not a replaced load?");
      
      // Propagate down to the ultimate replacee.  The intermediately loads
      // could theoretically already have been deleted, so we don't want to
      // dereference the Value*'s.
      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
      while (RLI != ReplacedLoads.end()) {
        NewVal = RLI->second;
        RLI = ReplacedLoads.find(NewVal);
      }
      
      User->replaceAllUsesWith(NewVal);
      CurAST->copyValue(User, NewVal);
    }
    
    CurAST->deleteValue(User);
    User->eraseFromParent();
  }
  
  // fwew, we're done!
}
Esempio n. 25
0
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
  DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');

  if (DisablePeephole)
    return false;

  TM  = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;

  bool Changed = false;

  SmallPtrSet<MachineInstr*, 8> LocalMIs;
  SmallSet<unsigned, 4> ImmDefRegs;
  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
  unsigned FoldAsLoadDefReg;
  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    bool SeenMoveImm = false;
    LocalMIs.clear();
    ImmDefRegs.clear();
    ImmDefMIs.clear();
    FoldAsLoadDefReg = 0;

    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
      // We may be erasing MI below, increment MII now.
      ++MII;
      LocalMIs.insert(MI);

      // If there exists an instruction which belongs to the following
      // categories, we will discard the load candidate.
      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
          MI->hasUnmodeledSideEffects()) {
        FoldAsLoadDefReg = 0;
        continue;
      }
      if (MI->mayStore() || MI->isCall())
        FoldAsLoadDefReg = 0;

      if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
          (MI->isSelect() && optimizeSelect(MI))) {
        // MI is deleted.
        LocalMIs.erase(MI);
        Changed = true;
        continue;
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
        SeenMoveImm = true;
      } else {
        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
        // optimizeExtInstr might have created new instructions after MI
        // and before the already incremented MII. Adjust MII so that the
        // next iteration sees the new instructions.
        MII = MI;
        ++MII;
        if (SeenMoveImm)
          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
      }

      // Check whether MI is a load candidate for folding into a later
      // instruction. If MI is not a candidate, check whether we can fold an
      // earlier load into MI.
      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
        // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
        // can enable folding by converting SUB to CMP.
        MachineInstr *DefMI = 0;
        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
                                                      FoldAsLoadDefReg, DefMI);
        if (FoldMI) {
          // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
          DEBUG(dbgs() << "Replacing: " << *MI);
          DEBUG(dbgs() << "     With: " << *FoldMI);
          LocalMIs.erase(MI);
          LocalMIs.erase(DefMI);
          LocalMIs.insert(FoldMI);
          MI->eraseFromParent();
          DefMI->eraseFromParent();
          ++NumLoadFold;

          // MI is replaced with FoldMI.
          Changed = true;
          continue;
        }
      }
    }
  }

  return Changed;
}
/// nonLocalHelper - Private helper used to calculate non-local dependencies
/// by doing DFS on the predecessors of a block to find its dependencies
void MemoryDependenceAnalysis::nonLocalHelper(Instruction* query,
                                              BasicBlock* block,
                                         DenseMap<BasicBlock*, Value*>& resp) {
  // Set of blocks that we've already visited in our DFS
  SmallPtrSet<BasicBlock*, 4> visited;
  // If we're updating a dirtied cache entry, we don't need to reprocess
  // already computed entries.
  for (DenseMap<BasicBlock*, Value*>::iterator I = resp.begin(), 
       E = resp.end(); I != E; ++I)
    if (I->second != Dirty)
      visited.insert(I->first);
  
  // Current stack of the DFS
  SmallVector<BasicBlock*, 4> stack;
  for (pred_iterator PI = pred_begin(block), PE = pred_end(block);
       PI != PE; ++PI)
    stack.push_back(*PI);
  
  // Do a basic DFS
  while (!stack.empty()) {
    BasicBlock* BB = stack.back();
    
    // If we've already visited this block, no need to revist
    if (visited.count(BB)) {
      stack.pop_back();
      continue;
    }
    
    // If we find a new block with a local dependency for query,
    // then we insert the new dependency and backtrack.
    if (BB != block) {
      visited.insert(BB);
      
      Instruction* localDep = getDependency(query, 0, BB);
      if (localDep != NonLocal) {
        resp.insert(std::make_pair(BB, localDep));
        stack.pop_back();
        
        continue;
      }
    // If we re-encounter the starting block, we still need to search it
    // because there might be a dependency in the starting block AFTER
    // the position of the query.  This is necessary to get loops right.
    } else if (BB == block) {
      visited.insert(BB);
      
      Instruction* localDep = getDependency(query, 0, BB);
      if (localDep != query)
        resp.insert(std::make_pair(BB, localDep));
      
      stack.pop_back();
      
      continue;
    }
    
    // If we didn't find anything, recurse on the precessors of this block
    // Only do this for blocks with a small number of predecessors.
    bool predOnStack = false;
    bool inserted = false;
    if (std::distance(pred_begin(BB), pred_end(BB)) <= PredLimit) { 
      for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
           PI != PE; ++PI)
        if (!visited.count(*PI)) {
          stack.push_back(*PI);
          inserted = true;
        } else
          predOnStack = true;
    }
    
    // If we inserted a new predecessor, then we'll come back to this block
    if (inserted)
      continue;
    // If we didn't insert because we have no predecessors, then this
    // query has no dependency at all.
    else if (!inserted && !predOnStack) {
      resp.insert(std::make_pair(BB, None));
    // If we didn't insert because our predecessors are already on the stack,
    // then we might still have a dependency, but it will be discovered during
    // backtracking.
    } else if (!inserted && predOnStack){
      resp.insert(std::make_pair(BB, NonLocal));
    }
    
    stack.pop_back();
  }
}
Esempio n. 27
0
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
/// split the critical edge.  This will update DominatorTree information if it
/// is available, thus calling this pass will not invalidate either of them.
/// This returns the new block if the edge was split, null otherwise.
///
/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
/// specified successor will be merged into the same critical edge block.  
/// This is most commonly interesting with switch instructions, which may 
/// have many edges to any one destination.  This ensures that all edges to that
/// dest go to one block instead of each going to a different block, but isn't 
/// the standard definition of a "critical edge".
///
/// It is invalid to call this function on a critical edge that starts at an
/// IndirectBrInst.  Splitting these edges will almost always create an invalid
/// program because the address of the new block won't be the one that is jumped
/// to.
///
BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
                                    Pass *P, bool MergeIdenticalEdges) {
  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
  
  assert(!isa<IndirectBrInst>(TI) &&
         "Cannot split critical edge from IndirectBrInst");
  
  BasicBlock *TIBB = TI->getParent();
  BasicBlock *DestBB = TI->getSuccessor(SuccNum);

  // Create a new basic block, linking it into the CFG.
  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
  // Create our unconditional branch.
  BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
  NewBI->setDebugLoc(TI->getDebugLoc());

  // Branch to the new block, breaking the edge.
  TI->setSuccessor(SuccNum, NewBB);

  // Insert the block into the function... right after the block TI lives in.
  Function &F = *TIBB->getParent();
  Function::iterator FBBI = TIBB;
  F.getBasicBlockList().insert(++FBBI, NewBB);
  
  // If there are any PHI nodes in DestBB, we need to update them so that they
  // merge incoming values from NewBB instead of from TIBB.
  if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
    // This conceptually does:
    //  foreach (PHINode *PN in DestBB)
    //    PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
    // but is optimized for two cases.
    
    if (APHI->getNumIncomingValues() <= 8) {  // Small # preds case.
      unsigned BBIdx = 0;
      for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
        // We no longer enter through TIBB, now we come in through NewBB.
        // Revector exactly one entry in the PHI node that used to come from
        // TIBB to come from NewBB.
        PHINode *PN = cast<PHINode>(I);
        
        // Reuse the previous value of BBIdx if it lines up.  In cases where we
        // have multiple phi nodes with *lots* of predecessors, this is a speed
        // win because we don't have to scan the PHI looking for TIBB.  This
        // happens because the BB list of PHI nodes are usually in the same
        // order.
        if (PN->getIncomingBlock(BBIdx) != TIBB)
          BBIdx = PN->getBasicBlockIndex(TIBB);
        PN->setIncomingBlock(BBIdx, NewBB);
      }
    } else {
      // However, the foreach loop is slow for blocks with lots of predecessors
      // because PHINode::getIncomingBlock is O(n) in # preds.  Instead, walk
      // the user list of TIBB to find the PHI nodes.
      SmallPtrSet<PHINode*, 16> UpdatedPHIs;
    
      for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
           UI != E; ) {
        Value::use_iterator Use = UI++;
        if (PHINode *PN = dyn_cast<PHINode>(*Use)) {
          // Remove one entry from each PHI.
          if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
            PN->setOperand(Use.getOperandNo(), NewBB);
        }
      }
    }
  }
   
  // If there are any other edges from TIBB to DestBB, update those to go
  // through the split block, making those edges non-critical as well (and
  // reducing the number of phi entries in the DestBB if relevant).
  if (MergeIdenticalEdges) {
    for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
      if (TI->getSuccessor(i) != DestBB) continue;
      
      // Remove an entry for TIBB from DestBB phi nodes.
      DestBB->removePredecessor(TIBB);
      
      // We found another edge to DestBB, go to NewBB instead.
      TI->setSuccessor(i, NewBB);
    }
  }
  
  

  // If we don't have a pass object, we can't update anything...
  if (P == 0) return NewBB;
  
  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
  ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
  
  // If we have nothing to update, just return.
  if (DT == 0 && LI == 0 && PI == 0)
    return NewBB;

  // Now update analysis information.  Since the only predecessor of NewBB is
  // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
  // anything, as there are other successors of DestBB.  However, if all other
  // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
  // loop header) then NewBB dominates DestBB.
  SmallVector<BasicBlock*, 8> OtherPreds;

  // If there is a PHI in the block, loop over predecessors with it, which is
  // faster than iterating pred_begin/end.
  if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
      if (PN->getIncomingBlock(i) != NewBB)
        OtherPreds.push_back(PN->getIncomingBlock(i));
  } else {
    for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
         I != E; ++I) {
      BasicBlock *P = *I;
      if (P != NewBB)
        OtherPreds.push_back(P);
    }
  }

  bool NewBBDominatesDestBB = true;
  
  // Should we update DominatorTree information?
  if (DT) {
    DomTreeNode *TINode = DT->getNode(TIBB);

    // The new block is not the immediate dominator for any other nodes, but
    // TINode is the immediate dominator for the new node.
    //
    if (TINode) {       // Don't break unreachable code!
      DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
      DomTreeNode *DestBBNode = 0;
     
      // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
      if (!OtherPreds.empty()) {
        DestBBNode = DT->getNode(DestBB);
        while (!OtherPreds.empty() && NewBBDominatesDestBB) {
          if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
            NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
          OtherPreds.pop_back();
        }
        OtherPreds.clear();
      }
      
      // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
      // doesn't dominate anything.
      if (NewBBDominatesDestBB) {
        if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
        DT->changeImmediateDominator(DestBBNode, NewBBNode);
      }
    }
  }

  // Update LoopInfo if it is around.
  if (LI) {
    if (Loop *TIL = LI->getLoopFor(TIBB)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NewBB joins loop.
          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NewBB, LI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == DestBB &&
                 "Should not create irreducible loops!");
          if (Loop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NewBB, LI->getBase());
        }
      }
      // If TIBB is in a loop and DestBB is outside of that loop, split the
      // other exit blocks of the loop that also have predecessors outside
      // the loop, to maintain a LoopSimplify guarantee.
      if (!TIL->contains(DestBB) &&
          P->mustPreserveAnalysisID(LoopSimplifyID)) {
        assert(!TIL->contains(NewBB) &&
               "Split point for loop exit is contained in loop!");

        // Update LCSSA form in the newly created exit block.
        if (P->mustPreserveAnalysisID(LCSSAID)) {
          SmallVector<BasicBlock *, 1> OrigPred;
          OrigPred.push_back(TIBB);
          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
        }

        // For each unique exit block...
        SmallVector<BasicBlock *, 4> ExitBlocks;
        TIL->getExitBlocks(ExitBlocks);
        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
          // Collect all the preds that are inside the loop, and note
          // whether there are any preds outside the loop.
          SmallVector<BasicBlock *, 4> Preds;
          bool HasPredOutsideOfLoop = false;
          BasicBlock *Exit = ExitBlocks[i];
          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
               I != E; ++I) {
            BasicBlock *P = *I;
            if (TIL->contains(P))
              Preds.push_back(P);
            else
              HasPredOutsideOfLoop = true;
          }
          // If there are any preds not in the loop, we'll need to split
          // the edges. The Preds.empty() check is needed because a block
          // may appear multiple times in the list. We can't use
          // getUniqueExitBlocks above because that depends on LoopSimplify
          // form, which we're in the process of restoring!
          if (!Preds.empty() && HasPredOutsideOfLoop) {
            BasicBlock *NewExitBB =
              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
                                     "split", P);
            if (P->mustPreserveAnalysisID(LCSSAID))
              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
          }
        }
      }
      // LCSSA form was updated above for the case where LoopSimplify is
      // available, which means that all predecessors of loop exit blocks
      // are within the loop. Without LoopSimplify form, it would be
      // necessary to insert a new phi.
      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
             "SplitCriticalEdge doesn't know how to update LCCSA form "
             "without LoopSimplify!");
    }
  }

  // Update ProfileInfo if it is around.
  if (PI)
    PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);

  return NewBB;
}
bool TypeChecker::validateGenericFuncSignature(AbstractFunctionDecl *func) {
  bool invalid = false;

  // Create the archetype builder.
  ArchetypeBuilder builder = createArchetypeBuilder(func->getParentModule());

  // Type check the function declaration, treating all generic type
  // parameters as dependent, unresolved.
  DependentGenericTypeResolver dependentResolver(builder);
  if (checkGenericFuncSignature(*this, &builder, func, dependentResolver))
    invalid = true;

  // If this triggered a recursive validation, back out: we're done.
  // FIXME: This is an awful hack.
  if (func->hasType())
    return !func->isInvalid();

  // Finalize the generic requirements.
  (void)builder.finalize(func->getLoc());

  // The archetype builder now has all of the requirements, although there might
  // still be errors that have not yet been diagnosed. Revert the generic
  // function signature and type-check it again, completely.
  revertGenericFuncSignature(func);
  CompleteGenericTypeResolver completeResolver(*this, builder);
  if (checkGenericFuncSignature(*this, nullptr, func, completeResolver))
    invalid = true;

  // The generic function signature is complete and well-formed. Determine
  // the type of the generic function.

  // Collect the complete set of generic parameter types.
  SmallVector<GenericTypeParamType *, 4> allGenericParams;
  collectGenericParamTypes(func->getGenericParams(),
                           func->getDeclContext()->getGenericSignatureOfContext(),
                           allGenericParams);

  auto sig = builder.getGenericSignature(allGenericParams);

  // Debugging of the archetype builder and generic signature generation.
  if (sig && Context.LangOpts.DebugGenericSignatures) {
    func->dumpRef(llvm::errs());
    llvm::errs() << "\n";
    builder.dump(llvm::errs());
    llvm::errs() << "Generic signature: ";
    sig->print(llvm::errs());
    llvm::errs() << "\n";
    llvm::errs() << "Canonical generic signature: ";
    sig->getCanonicalSignature()->print(llvm::errs());
    llvm::errs() << "\n";
    llvm::errs() << "Canonical generic signature for mangling: ";
    sig->getCanonicalManglingSignature(*func->getParentModule())
    ->print(llvm::errs());
    llvm::errs() << "\n";
  }

  func->setGenericSignature(sig);

  if (invalid) {
    func->overwriteType(ErrorType::get(Context));
    return true;
  }

  // Compute the function type.
  Type funcTy;
  Type initFuncTy;
  if (auto fn = dyn_cast<FuncDecl>(func)) {
    funcTy = fn->getBodyResultTypeLoc().getType();
    
    if (!funcTy) {
      funcTy = TupleType::getEmpty(Context);
    } else {
      funcTy = getResultType(*this, fn, funcTy);
    }

  } else if (auto ctor = dyn_cast<ConstructorDecl>(func)) {
    // FIXME: shouldn't this just be
    // ctor->getDeclContext()->getDeclaredInterfaceType()?
    if (ctor->getDeclContext()->getAsProtocolOrProtocolExtensionContext()) {
      funcTy = ctor->getDeclContext()->getProtocolSelf()->getDeclaredType();
    } else {
      funcTy = ctor->getExtensionType()->getAnyNominal()
                 ->getDeclaredInterfaceType();
    }
    
    // Adjust result type for failability.
    if (ctor->getFailability() != OTK_None)
      funcTy = OptionalType::get(ctor->getFailability(), funcTy);

    initFuncTy = funcTy;
  } else {
    assert(isa<DestructorDecl>(func));
    funcTy = TupleType::getEmpty(Context);
  }

  auto paramLists = func->getParameterLists();
  SmallVector<ParameterList*, 4> storedParamLists;

  // FIXME: Destructors don't have the '()' pattern in their signature, so
  // paste it here.
  if (isa<DestructorDecl>(func)) {
    assert(paramLists.size() == 1 && "Only the self paramlist");
    storedParamLists.push_back(paramLists[0]);
    storedParamLists.push_back(ParameterList::createEmpty(Context));
    paramLists = storedParamLists;
  }

  bool hasSelf = func->getDeclContext()->isTypeContext();
  for (unsigned i = 0, e = paramLists.size(); i != e; ++i) {
    Type argTy;
    Type initArgTy;

    Type selfTy;
    if (i == e-1 && hasSelf) {
      selfTy = func->computeInterfaceSelfType(/*isInitializingCtor=*/false);
      // Substitute in our own 'self' parameter.

      argTy = selfTy;
      if (initFuncTy) {
        initArgTy = func->computeInterfaceSelfType(/*isInitializingCtor=*/true);
      }
    } else {
      argTy = paramLists[e - i - 1]->getType(Context);

      // For an implicit declaration, our argument type will be in terms of
      // archetypes rather than dependent types. Replace the
      // archetypes with their corresponding dependent types.
      if (func->isImplicit()) {
        argTy = ArchetypeBuilder::mapTypeOutOfContext(func, argTy); 
      }

      if (initFuncTy)
        initArgTy = argTy;
    }

    auto info = applyFunctionTypeAttributes(func, i);

    // FIXME: We shouldn't even get here if the function isn't locally generic
    // to begin with, but fixing that requires a lot of reengineering for local
    // definitions in generic contexts.
    if (sig && i == e-1) {
      if (func->getGenericParams()) {
        // Collect all generic params referenced in parameter types,
        // return type or requirements.
        SmallPtrSet<GenericTypeParamDecl *, 4> referencedGenericParams;
        argTy.visit([&referencedGenericParams](Type t) {
          if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) {
            referencedGenericParams.insert(
                t->castTo<GenericTypeParamType>()->getDecl());
          }
        });
        funcTy.visit([&referencedGenericParams](Type t) {
          if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) {
            referencedGenericParams.insert(
                t->castTo<GenericTypeParamType>()->getDecl());
          }
        });

        auto requirements = sig->getRequirements();
        for (auto req : requirements) {
          if (req.getKind() == RequirementKind::SameType) {
            // Same type requirements may allow for generic
            // inference, even if this generic parameter
            // is not mentioned in the function signature.
            // TODO: Make the test more precise.
            auto left = req.getFirstType();
            auto right = req.getSecondType();
            // For now consider any references inside requirements
            // as a possibility to infer the generic type.
            left.visit([&referencedGenericParams](Type t) {
              if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) {
                referencedGenericParams.insert(
                    t->castTo<GenericTypeParamType>()->getDecl());
              }
            });
            right.visit([&referencedGenericParams](Type t) {
              if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) {
                referencedGenericParams.insert(
                    t->castTo<GenericTypeParamType>()->getDecl());
              }
            });
          }
        }

        // Find the depth of the function's own generic parameters.
        unsigned fnGenericParamsDepth = func->getGenericParams()->getDepth();

        // Check that every generic parameter type from the signature is
        // among referencedArchetypes.
        for (auto *genParam : sig->getGenericParams()) {
          auto *paramDecl = genParam->getDecl();
          if (paramDecl->getDepth() != fnGenericParamsDepth)
            continue;
          if (!referencedGenericParams.count(paramDecl)) {
            // Produce an error that this generic parameter cannot be bound.
            diagnose(paramDecl->getLoc(), diag::unreferenced_generic_parameter,
                     paramDecl->getNameStr());
            func->setInvalid();
          }
        }
      }

      funcTy = GenericFunctionType::get(sig, argTy, funcTy, info);
      if (initFuncTy)
        initFuncTy = GenericFunctionType::get(sig, initArgTy, initFuncTy, info);
    } else {
      funcTy = FunctionType::get(argTy, funcTy, info);

      if (initFuncTy)
        initFuncTy = FunctionType::get(initArgTy, initFuncTy, info);
    }
  }

  // Record the interface type.
  func->setInterfaceType(funcTy);
  if (initFuncTy)
    cast<ConstructorDecl>(func)->setInitializerInterfaceType(initFuncTy);
  return false;
}
Esempio n. 29
0
void PromoteMem2Reg::run() {
  Function &F = *DT.getRoot()->getParent();

  if (AST)
    PointerAllocaValues.resize(Allocas.size());
  AllocaDbgDeclares.resize(Allocas.size());

  AllocaInfo Info;
  LargeBlockInfo LBI;
  IDFCalculator IDF(DT);

  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
    AllocaInst *AI = Allocas[AllocaNum];

    assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
    assert(AI->getParent()->getParent() == &F &&
           "All allocas should be in the same function, which is same as DF!");

    removeLifetimeIntrinsicUsers(AI);

    if (AI->use_empty()) {
      // If there are no uses of the alloca, just delete it now.
      if (AST)
        AST->deleteValue(AI);
      AI->eraseFromParent();

      // Remove the alloca from the Allocas list, since it has been processed
      RemoveFromAllocasList(AllocaNum);
      ++NumDeadAlloca;
      continue;
    }

    // Calculate the set of read and write-locations for each alloca.  This is
    // analogous to finding the 'uses' and 'definitions' of each variable.
    Info.AnalyzeAlloca(AI);

    // If there is only a single store to this value, replace any loads of
    // it that are directly dominated by the definition with the value stored.
    if (Info.DefiningBlocks.size() == 1) {
      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
        // The alloca has been processed, move on.
        RemoveFromAllocasList(AllocaNum);
        ++NumSingleStore;
        continue;
      }
    }

    // If the alloca is only read and written in one basic block, just perform a
    // linear sweep over the block to eliminate it.
    if (Info.OnlyUsedInOneBlock &&
        promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
      // The alloca has been processed, move on.
      RemoveFromAllocasList(AllocaNum);
      continue;
    }

    // If we haven't computed a numbering for the BB's in the function, do so
    // now.
    if (BBNumbers.empty()) {
      unsigned ID = 0;
      for (auto &BB : F)
        BBNumbers[&BB] = ID++;
    }

    // If we have an AST to keep updated, remember some pointer value that is
    // stored into the alloca.
    if (AST)
      PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;

    // Remember the dbg.declare intrinsic describing this alloca, if any.
    if (Info.DbgDeclare)
      AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;

    // Keep the reverse mapping of the 'Allocas' array for the rename pass.
    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;

    // At this point, we're committed to promoting the alloca using IDF's, and
    // the standard SSA construction algorithm.  Determine which blocks need PHI
    // nodes and see if we can optimize out some work by avoiding insertion of
    // dead phi nodes.


    // Unique the set of defining blocks for efficient lookup.
    SmallPtrSet<BasicBlock *, 32> DefBlocks;
    DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());

    // Determine which blocks the value is live in.  These are blocks which lead
    // to uses.
    SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
    ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);

    // At this point, we're committed to promoting the alloca using IDF's, and
    // the standard SSA construction algorithm.  Determine which blocks need phi
    // nodes and see if we can optimize out some work by avoiding insertion of
    // dead phi nodes.
    IDF.setLiveInBlocks(LiveInBlocks);
    IDF.setDefiningBlocks(DefBlocks);
    SmallVector<BasicBlock *, 32> PHIBlocks;
    IDF.calculate(PHIBlocks);
    if (PHIBlocks.size() > 1)
      std::sort(PHIBlocks.begin(), PHIBlocks.end(),
                [this](BasicBlock *A, BasicBlock *B) {
                  return BBNumbers.lookup(A) < BBNumbers.lookup(B);
                });

    unsigned CurrentVersion = 0;
    for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i)
      QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion);
  }

  if (Allocas.empty())
    return; // All of the allocas must have been trivial!

  LBI.clear();

  // Set the incoming values for the basic block to be null values for all of
  // the alloca's.  We do this in case there is a load of a value that has not
  // been stored yet.  In this case, it will get this null value.
  //
  RenamePassData::ValVector Values(Allocas.size());
  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());

  // Walks all basic blocks in the function performing the SSA rename algorithm
  // and inserting the phi nodes we marked as necessary
  //
  std::vector<RenamePassData> RenamePassWorkList;
  RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
  do {
    RenamePassData RPD;
    RPD.swap(RenamePassWorkList.back());
    RenamePassWorkList.pop_back();
    // RenamePass may add new worklist entries.
    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
  } while (!RenamePassWorkList.empty());

  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
  Visited.clear();

  // Remove the allocas themselves from the function.
  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
    Instruction *A = Allocas[i];

    // If there are any uses of the alloca instructions left, they must be in
    // unreachable basic blocks that were not processed by walking the dominator
    // tree. Just delete the users now.
    if (!A->use_empty())
      A->replaceAllUsesWith(UndefValue::get(A->getType()));
    if (AST)
      AST->deleteValue(A);
    A->eraseFromParent();
  }

  const DataLayout &DL = F.getParent()->getDataLayout();

  // Remove alloca's dbg.declare instrinsics from the function.
  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
      DDI->eraseFromParent();

  // Loop over all of the PHI nodes and see if there are any that we can get
  // rid of because they merge all of the same incoming values.  This can
  // happen due to undef values coming into the PHI nodes.  This process is
  // iterative, because eliminating one PHI node can cause others to be removed.
  bool EliminatedAPHI = true;
  while (EliminatedAPHI) {
    EliminatedAPHI = false;

    // Iterating over NewPhiNodes is deterministic, so it is safe to try to
    // simplify and RAUW them as we go.  If it was not, we could add uses to
    // the values we replace with in a non-deterministic order, thus creating
    // non-deterministic def->use chains.
    for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
             I = NewPhiNodes.begin(),
             E = NewPhiNodes.end();
         I != E;) {
      PHINode *PN = I->second;

      // If this PHI node merges one value and/or undefs, get the value.
      if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
        if (AST && PN->getType()->isPointerTy())
          AST->deleteValue(PN);
        PN->replaceAllUsesWith(V);
        PN->eraseFromParent();
        NewPhiNodes.erase(I++);
        EliminatedAPHI = true;
        continue;
      }
      ++I;
    }
  }

  // At this point, the renamer has added entries to PHI nodes for all reachable
  // code.  Unfortunately, there may be unreachable blocks which the renamer
  // hasn't traversed.  If this is the case, the PHI nodes may not
  // have incoming values for all predecessors.  Loop over all PHI nodes we have
  // created, inserting undef values if they are missing any incoming values.
  //
  for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
           I = NewPhiNodes.begin(),
           E = NewPhiNodes.end();
       I != E; ++I) {
    // We want to do this once per basic block.  As such, only process a block
    // when we find the PHI that is the first entry in the block.
    PHINode *SomePHI = I->second;
    BasicBlock *BB = SomePHI->getParent();
    if (&BB->front() != SomePHI)
      continue;

    // Only do work here if there the PHI nodes are missing incoming values.  We
    // know that all PHI nodes that were inserted in a block will have the same
    // number of incoming values, so we can just check any of them.
    if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
      continue;

    // Get the preds for BB.
    SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));

    // Ok, now we know that all of the PHI nodes are missing entries for some
    // basic blocks.  Start by sorting the incoming predecessors for efficient
    // access.
    std::sort(Preds.begin(), Preds.end());

    // Now we loop through all BB's which have entries in SomePHI and remove
    // them from the Preds list.
    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
      // Do a log(n) search of the Preds list for the entry we want.
      SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
          Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
             "PHI node has entry for a block which is not a predecessor!");

      // Remove the entry
      Preds.erase(EntIt);
    }

    // At this point, the blocks left in the preds list must have dummy
    // entries inserted into every PHI nodes for the block.  Update all the phi
    // nodes in this block that we are inserting (there could be phis before
    // mem2reg runs).
    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
    BasicBlock::iterator BBI = BB->begin();
    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
           SomePHI->getNumIncomingValues() == NumBadPreds) {
      Value *UndefVal = UndefValue::get(SomePHI->getType());
      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
        SomePHI->addIncoming(UndefVal, Preds[pred]);
    }
  }

  NewPhiNodes.clear();
}
Esempio n. 30
0
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
/// result.
///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
bool PeepholeOptimizer::
optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
  unsigned SrcReg, DstReg, SubIdx;
  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    return false;

  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
      TargetRegisterInfo::isPhysicalRegister(SrcReg))
    return false;

  MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
  if (++UI == MRI->use_nodbg_end())
    // No other uses.
    return false;

  // The source has other uses. See if we can replace the other uses with use of
  // the result of the extension.
  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
  UI = MRI->use_nodbg_begin(DstReg);
  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
       UI != UE; ++UI)
    ReachedBBs.insert(UI->getParent());

  // Uses that are in the same BB of uses of the result of the instruction.
  SmallVector<MachineOperand*, 8> Uses;

  // Uses that the result of the instruction can reach.
  SmallVector<MachineOperand*, 8> ExtendedUses;

  bool ExtendLife = true;
  UI = MRI->use_nodbg_begin(SrcReg);
  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
       UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    MachineInstr *UseMI = &*UI;
    if (UseMI == MI)
      continue;

    if (UseMI->isPHI()) {
      ExtendLife = false;
      continue;
    }

    // It's an error to translate this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    //
    // into this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1027 = COPY %reg1025:4
    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    //
    // The problem here is that SUBREG_TO_REG is there to assert that an
    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    // the COPY here, it will give us the value after the <sext>, not the
    // original value of %reg1024 before <sext>.
    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
      continue;

    MachineBasicBlock *UseMBB = UseMI->getParent();
    if (UseMBB == MBB) {
      // Local uses that come after the extension.
      if (!LocalMIs.count(UseMI))
        Uses.push_back(&UseMO);
    } else if (ReachedBBs.count(UseMBB)) {
      // Non-local uses where the result of the extension is used. Always
      // replace these unless it's a PHI.
      Uses.push_back(&UseMO);
    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
      // We may want to extend the live range of the extension result in order
      // to replace these uses.
      ExtendedUses.push_back(&UseMO);
    } else {
      // Both will be live out of the def MBB anyway. Don't extend live range of
      // the extension result.
      ExtendLife = false;
      break;
    }
  }

  if (ExtendLife && !ExtendedUses.empty())
    // Extend the liveness of the extension result.
    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
              std::back_inserter(Uses));

  // Now replace all uses.
  bool Changed = false;
  if (!Uses.empty()) {
    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;

    // Look for PHI uses of the extended result, we don't want to extend the
    // liveness of a PHI input. It breaks all kinds of assumptions down
    // stream. A PHI use is expected to be the kill of its source values.
    UI = MRI->use_nodbg_begin(DstReg);
    for (MachineRegisterInfo::use_nodbg_iterator
           UE = MRI->use_nodbg_end(); UI != UE; ++UI)
      if (UI->isPHI())
        PHIBBs.insert(UI->getParent());

    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
      MachineOperand *UseMO = Uses[i];
      MachineInstr *UseMI = UseMO->getParent();
      MachineBasicBlock *UseMBB = UseMI->getParent();
      if (PHIBBs.count(UseMBB))
        continue;

      // About to add uses of DstReg, clear DstReg's kill flags.
      if (!Changed)
        MRI->clearKillFlags(DstReg);

      unsigned NewVR = MRI->createVirtualRegister(RC);
      BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
              TII->get(TargetOpcode::COPY), NewVR)
        .addReg(DstReg, 0, SubIdx);

      UseMO->setReg(NewVR);
      ++NumReuse;
      Changed = true;
    }
  }

  return Changed;
}