Esempio n. 1
0
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();

  bool StackGrowsDown =
    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;

  // Loop over all of the stack objects, assigning sequential addresses...
  MachineFrameInfo *MFI = Fn.getFrameInfo();

  // Start at the beginning of the local area.
  // The Offset is the distance from the stack top in the direction
  // of stack growth -- so it's always nonnegative.
  int LocalAreaOffset = TFI.getOffsetOfLocalArea();
  if (StackGrowsDown)
    LocalAreaOffset = -LocalAreaOffset;
  assert(LocalAreaOffset >= 0
         && "Local area offset should be in direction of stack growth");
  int64_t Offset = LocalAreaOffset;

  // If there are fixed sized objects that are preallocated in the local area,
  // non-fixed objects can't be allocated right at the start of local area.
  // We currently don't support filling in holes in between fixed sized
  // objects, so we adjust 'Offset' to point to the end of last fixed sized
  // preallocated object.
  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
    int64_t FixedOff;
    if (StackGrowsDown) {
      // The maximum distance from the stack pointer is at lower address of
      // the object -- which is given by offset. For down growing stack
      // the offset is negative, so we negate the offset to get the distance.
      FixedOff = -MFI->getObjectOffset(i);
    } else {
      // The maximum distance from the start pointer is at the upper
      // address of the object.
      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
    }
    if (FixedOff > Offset) Offset = FixedOff;
  }

  // First assign frame offsets to stack objects that are used to spill
  // callee saved registers.
  if (StackGrowsDown) {
    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
      // If the stack grows down, we need to add the size to find the lowest
      // address of the object.
      Offset += MFI->getObjectSize(i);

      unsigned Align = MFI->getObjectAlignment(i);
      // Adjust to alignment boundary
      Offset = (Offset+Align-1)/Align*Align;

      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
    }
  } else {
    int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
    for (int i = MaxCSFI; i >= MinCSFI ; --i) {
      unsigned Align = MFI->getObjectAlignment(i);
      // Adjust to alignment boundary
      Offset = (Offset+Align-1)/Align*Align;

      MFI->setObjectOffset(i, Offset);
      Offset += MFI->getObjectSize(i);
    }
  }

  unsigned MaxAlign = MFI->getMaxAlignment();

  // Make sure the special register scavenging spill slot is closest to the
  // frame pointer if a frame pointer is required.
  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
  if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
      !RegInfo->needsStackRealignment(Fn)) {
    int SFI = RS->getScavengingFrameIndex();
    if (SFI >= 0)
      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
  }

  // FIXME: Once this is working, then enable flag will change to a target
  // check for whether the frame is large enough to want to use virtual
  // frame index registers. Functions which don't want/need this optimization
  // will continue to use the existing code path.
  if (MFI->getUseLocalStackAllocationBlock()) {
    unsigned Align = MFI->getLocalFrameMaxAlign();

    // Adjust to alignment boundary.
    Offset = (Offset + Align - 1) / Align * Align;

    DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");

    // Resolve offsets for objects in the local block.
    for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
      std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
      int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
      DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
            FIOffset << "]\n");
      MFI->setObjectOffset(Entry.first, FIOffset);
    }
    // Allocate the local block
    Offset += MFI->getLocalFrameSize();

    MaxAlign = std::max(Align, MaxAlign);
  }

  // Make sure that the stack protector comes before the local variables on the
  // stack.
  SmallSet<int, 16> LargeStackObjs;
  if (MFI->getStackProtectorIndex() >= 0) {
    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
                      Offset, MaxAlign);

    // Assign large stack objects first.
    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
      if (MFI->isObjectPreAllocated(i) &&
          MFI->getUseLocalStackAllocationBlock())
        continue;
      if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
        continue;
      if (RS && (int)i == RS->getScavengingFrameIndex())
        continue;
      if (MFI->isDeadObjectIndex(i))
        continue;
      if (MFI->getStackProtectorIndex() == (int)i)
        continue;
      if (!MFI->MayNeedStackProtector(i))
        continue;

      AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
      LargeStackObjs.insert(i);
    }
  }

  // Then assign frame offsets to stack objects that are not used to spill
  // callee saved registers.
  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
    if (MFI->isObjectPreAllocated(i) &&
        MFI->getUseLocalStackAllocationBlock())
      continue;
    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
      continue;
    if (RS && (int)i == RS->getScavengingFrameIndex())
      continue;
    if (MFI->isDeadObjectIndex(i))
      continue;
    if (MFI->getStackProtectorIndex() == (int)i)
      continue;
    if (LargeStackObjs.count(i))
      continue;

    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
  }

  // Make sure the special register scavenging spill slot is closest to the
  // stack pointer.
  if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
             !RegInfo->useFPForScavengingIndex(Fn))) {
    int SFI = RS->getScavengingFrameIndex();
    if (SFI >= 0)
      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
  }

  if (!TFI.targetHandlesStackFrameRounding()) {
    // If we have reserved argument space for call sites in the function
    // immediately on entry to the current function, count it as part of the
    // overall stack size.
    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
      Offset += MFI->getMaxCallFrameSize();

    // Round up the size to a multiple of the alignment.  If the function has
    // any calls or alloca's, align to the target's StackAlignment value to
    // ensure that the callee's frame or the alloca data is suitably aligned;
    // otherwise, for leaf functions, align to the TransientStackAlignment
    // value.
    unsigned StackAlign;
    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
      StackAlign = TFI.getStackAlignment();
    else
      StackAlign = TFI.getTransientStackAlignment();

    // If the frame pointer is eliminated, all frame offsets will be relative to
    // SP not FP. Align to MaxAlign so this works.
    StackAlign = std::max(StackAlign, MaxAlign);
    unsigned AlignMask = StackAlign - 1;
    Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
  }

  // Update frame info to pretend that this is part of the stack...
  int64_t StackSize = Offset - LocalAreaOffset;
  MFI->setStackSize(StackSize);
  NumBytesStackSpace += StackSize;
}
Esempio n. 2
0
bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
  MachineInstr *LastDef = PhysRegDef[Reg];
  MachineInstr *LastUse = PhysRegUse[Reg];
  if (!LastDef && !LastUse)
    return false;

  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
  unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
  // The whole register is used.
  // AL =
  // AH =
  //
  //    = AX
  //    = AL, AX<imp-use, kill>
  // AX =
  //
  // Or whole register is defined, but not used at all.
  // AX<dead> =
  // ...
  // AX =
  //
  // Or whole register is defined, but only partly used.
  // AX<dead> = AL<imp-def>
  //    = AL<kill>
  // AX =
  MachineInstr *LastPartDef = 0;
  unsigned LastPartDefDist = 0;
  SmallSet<unsigned, 8> PartUses;
  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
       unsigned SubReg = *SubRegs; ++SubRegs) {
    MachineInstr *Def = PhysRegDef[SubReg];
    if (Def && Def != LastDef) {
      // There was a def of this sub-register in between. This is a partial
      // def, keep track of the last one.
      unsigned Dist = DistanceMap[Def];
      if (Dist > LastPartDefDist) {
        LastPartDefDist = Dist;
        LastPartDef = Def;
      }
      continue;
    }
    if (MachineInstr *Use = PhysRegUse[SubReg]) {
      PartUses.insert(SubReg);
      for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
        PartUses.insert(*SS);
      unsigned Dist = DistanceMap[Use];
      if (Dist > LastRefOrPartRefDist) {
        LastRefOrPartRefDist = Dist;
        LastRefOrPartRef = Use;
      }
    }
  }

  if (!PhysRegUse[Reg]) {
    // Partial uses. Mark register def dead and add implicit def of
    // sub-registers which are used.
    // EAX<dead>  = op  AL<imp-def>
    // That is, EAX def is dead but AL def extends pass it.
    PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
         unsigned SubReg = *SubRegs; ++SubRegs) {
      if (!PartUses.count(SubReg))
        continue;
      bool NeedDef = true;
      if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
        MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
        if (MO) {
          NeedDef = false;
          assert(!MO->isDead());
        }
      }
      if (NeedDef)
        PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
                                                 true/*IsDef*/, true/*IsImp*/));
      MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
      if (LastSubRef)
        LastSubRef->addRegisterKilled(SubReg, TRI, true);
      else {
        LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
        PhysRegUse[SubReg] = LastRefOrPartRef;
        for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg);
             unsigned SSReg = *SSRegs; ++SSRegs)
          PhysRegUse[SSReg] = LastRefOrPartRef;
      }
      for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
        PartUses.erase(*SS);
    }
  } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
    if (LastPartDef)
      // The last partial def kills the register.
      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
                                                true/*IsImp*/, true/*IsKill*/));
    else {
      MachineOperand *MO =
        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
      // If the last reference is the last def, then it's not used at all.
      // That is, unless we are currently processing the last reference itself.
      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
      if (NeedEC) {
        // If we are adding a subreg def and the superreg def is marked early
        // clobber, add an early clobber marker to the subreg def.
        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
        if (MO)
          MO->setIsEarlyClobber();
      }
    }
  } else
    LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
  return true;
}
Esempio n. 3
0
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
  MF = &mf;
  MRI = &mf.getRegInfo();
  TRI = MF->getTarget().getRegisterInfo();

  ReservedRegisters = TRI->getReservedRegs(mf);

  unsigned NumRegs = TRI->getNumRegs();
  PhysRegDef  = new MachineInstr*[NumRegs];
  PhysRegUse  = new MachineInstr*[NumRegs];
  PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
  std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
  std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
  PHIJoins.clear();

  // FIXME: LiveIntervals will be updated to remove its dependence on
  // LiveVariables to improve compilation time and eliminate bizarre pass
  // dependencies. Until then, we can't change much in -O0.
  if (!MRI->isSSA())
    report_fatal_error("regalloc=... not currently supported with -O0");

  analyzePHINodes(mf);

  // Calculate live variable information in depth first order on the CFG of the
  // function.  This guarantees that we will see the definition of a virtual
  // register before its uses due to dominance properties of SSA (except for PHI
  // nodes, which are treated as a special case).
  MachineBasicBlock *Entry = MF->begin();
  SmallPtrSet<MachineBasicBlock*,16> Visited;

  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
       DFI != E; ++DFI) {
    MachineBasicBlock *MBB = *DFI;

    // Mark live-in registers as live-in.
    SmallVector<unsigned, 4> Defs;
    for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
           EE = MBB->livein_end(); II != EE; ++II) {
      assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
             "Cannot have a live-in virtual register!");
      HandlePhysRegDef(*II, 0, Defs);
    }

    // Loop over all of the instructions, processing them.
    DistanceMap.clear();
    unsigned Dist = 0;
    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
         I != E; ++I) {
      MachineInstr *MI = I;
      if (MI->isDebugValue())
        continue;
      DistanceMap.insert(std::make_pair(MI, Dist++));

      // Process all of the operands of the instruction...
      unsigned NumOperandsToProcess = MI->getNumOperands();

      // Unless it is a PHI node.  In this case, ONLY process the DEF, not any
      // of the uses.  They will be handled in other basic blocks.
      if (MI->isPHI())
        NumOperandsToProcess = 1;

      // Clear kill and dead markers. LV will recompute them.
      SmallVector<unsigned, 4> UseRegs;
      SmallVector<unsigned, 4> DefRegs;
      SmallVector<unsigned, 1> RegMasks;
      for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
        MachineOperand &MO = MI->getOperand(i);
        if (MO.isRegMask()) {
          RegMasks.push_back(i);
          continue;
        }
        if (!MO.isReg() || MO.getReg() == 0)
          continue;
        unsigned MOReg = MO.getReg();
        if (MO.isUse()) {
          MO.setIsKill(false);
          UseRegs.push_back(MOReg);
        } else /*MO.isDef()*/ {
          MO.setIsDead(false);
          DefRegs.push_back(MOReg);
        }
      }

      // Process all uses.
      for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
        unsigned MOReg = UseRegs[i];
        if (TargetRegisterInfo::isVirtualRegister(MOReg))
          HandleVirtRegUse(MOReg, MBB, MI);
        else if (!ReservedRegisters[MOReg])
          HandlePhysRegUse(MOReg, MI);
      }

      // Process all masked registers. (Call clobbers).
      for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
        HandleRegMask(MI->getOperand(RegMasks[i]));

      // Process all defs.
      for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
        unsigned MOReg = DefRegs[i];
        if (TargetRegisterInfo::isVirtualRegister(MOReg))
          HandleVirtRegDef(MOReg, MI);
        else if (!ReservedRegisters[MOReg])
          HandlePhysRegDef(MOReg, MI, Defs);
      }
      UpdatePhysRegDefs(MI, Defs);
    }

    // Handle any virtual assignments from PHI nodes which might be at the
    // bottom of this basic block.  We check all of our successor blocks to see
    // if they have PHI nodes, and if so, we simulate an assignment at the end
    // of the current block.
    if (!PHIVarInfo[MBB->getNumber()].empty()) {
      SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];

      for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
             E = VarInfoVec.end(); I != E; ++I)
        // Mark it alive only in the block we are representing.
        MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
                                MBB);
    }

    // Finally, if the last instruction in the block is a return, make sure to
    // mark it as using all of the live-out values in the function.
    // Things marked both call and return are tail calls; do not do this for
    // them.  The tail callee need not take the same registers as input
    // that it produces as output, and there are dependencies for its input
    // registers elsewhere.
    if (!MBB->empty() && MBB->back().isReturn()
        && !MBB->back().isCall()) {
      MachineInstr *Ret = &MBB->back();

      for (MachineRegisterInfo::liveout_iterator
           I = MF->getRegInfo().liveout_begin(),
           E = MF->getRegInfo().liveout_end(); I != E; ++I) {
        assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
               "Cannot have a live-out virtual register!");
        HandlePhysRegUse(*I, Ret);

        // Add live-out registers as implicit uses.
        if (!Ret->readsRegister(*I))
          Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
      }
    }

    // MachineCSE may CSE instructions which write to non-allocatable physical
    // registers across MBBs. Remember if any reserved register is liveout.
    SmallSet<unsigned, 4> LiveOuts;
    for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
           SE = MBB->succ_end(); SI != SE; ++SI) {
      MachineBasicBlock *SuccMBB = *SI;
      if (SuccMBB->isLandingPad())
        continue;
      for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
             LE = SuccMBB->livein_end(); LI != LE; ++LI) {
        unsigned LReg = *LI;
        if (!TRI->isInAllocatableClass(LReg))
          // Ignore other live-ins, e.g. those that are live into landing pads.
          LiveOuts.insert(LReg);
      }
    }

    // Loop over PhysRegDef / PhysRegUse, killing any registers that are
    // available at the end of the basic block.
    for (unsigned i = 0; i != NumRegs; ++i)
      if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
        HandlePhysRegDef(i, 0, Defs);

    std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
    std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
  }

  // Convert and transfer the dead / killed information we have gathered into
  // VirtRegInfo onto MI's.
  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
    const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
    for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
      if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
        VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
      else
        VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
  }

  // Check to make sure there are no unreachable blocks in the MC CFG for the
  // function.  If so, it is due to a bug in the instruction selector or some
  // other part of the code generator if this happens.
#ifndef NDEBUG
  for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
    assert(Visited.count(&*i) != 0 && "unreachable basic block found");
#endif

  delete[] PhysRegDef;
  delete[] PhysRegUse;
  delete[] PHIVarInfo;

  return false;
}
Esempio n. 4
0
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
  if (skipFunction(F))
    return false;

  // TODO: Could probably handle variadic functions.
  if (F.isVarArg() || F.hasStructRetAttr() ||
      AMDGPU::isEntryFunctionCC(F.getCallingConv()))
    return false;

  MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();

  unsigned ReturnNumRegs = 0;
  SmallSet<int, 4> OutArgIndexes;
  SmallVector<Type *, 4> ReturnTypes;
  Type *RetTy = F.getReturnType();
  if (!RetTy->isVoidTy()) {
    ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4;

    if (ReturnNumRegs >= MaxNumRetRegs)
      return false;

    ReturnTypes.push_back(RetTy);
  }

  SmallVector<Argument *, 4> OutArgs;
  for (Argument &Arg : F.args()) {
    if (isOutArgumentCandidate(Arg)) {
      LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
                        << " in function " << F.getName() << '\n');
      OutArgs.push_back(&Arg);
    }
  }

  if (OutArgs.empty())
    return false;

  using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>;

  DenseMap<ReturnInst *, ReplacementVec> Replacements;

  SmallVector<ReturnInst *, 4> Returns;
  for (BasicBlock &BB : F) {
    if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
      Returns.push_back(RI);
  }

  if (Returns.empty())
    return false;

  bool Changing;

  do {
    Changing = false;

    // Keep retrying if we are able to successfully eliminate an argument. This
    // helps with cases with multiple arguments which may alias, such as in a
    // sincos implemntation. If we have 2 stores to arguments, on the first
    // attempt the MDA query will succeed for the second store but not the
    // first. On the second iteration we've removed that out clobbering argument
    // (by effectively moving it into another function) and will find the second
    // argument is OK to move.
    for (Argument *OutArg : OutArgs) {
      bool ThisReplaceable = true;
      SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;

      Type *ArgTy = OutArg->getType()->getPointerElementType();

      // Skip this argument if converting it will push us over the register
      // count to return limit.

      // TODO: This is an approximation. When legalized this could be more. We
      // can ask TLI for exactly how many.
      unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4;
      if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs)
        continue;

      // An argument is convertible only if all exit blocks are able to replace
      // it.
      for (ReturnInst *RI : Returns) {
        BasicBlock *BB = RI->getParent();

        MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg),
                                                       true, BB->end(), BB, RI);
        StoreInst *SI = nullptr;
        if (Q.isDef())
          SI = dyn_cast<StoreInst>(Q.getInst());

        if (SI) {
          LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n');
          ReplaceableStores.emplace_back(RI, SI);
        } else {
          ThisReplaceable = false;
          break;
        }
      }

      if (!ThisReplaceable)
        continue; // Try the next argument candidate.

      for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) {
        Value *ReplVal = Store.second->getValueOperand();

        auto &ValVec = Replacements[Store.first];
        if (llvm::find_if(ValVec,
              [OutArg](const std::pair<Argument *, Value *> &Entry) {
                 return Entry.first == OutArg;}) != ValVec.end()) {
          LLVM_DEBUG(dbgs()
                     << "Saw multiple out arg stores" << *OutArg << '\n');
          // It is possible to see stores to the same argument multiple times,
          // but we expect these would have been optimized out already.
          ThisReplaceable = false;
          break;
        }

        ValVec.emplace_back(OutArg, ReplVal);
        Store.second->eraseFromParent();
      }

      if (ThisReplaceable) {
        ReturnTypes.push_back(ArgTy);
        OutArgIndexes.insert(OutArg->getArgNo());
        ++NumOutArgumentsReplaced;
        Changing = true;
      }
    }
  } while (Changing);

  if (Replacements.empty())
    return false;

  LLVMContext &Ctx = F.getParent()->getContext();
  StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName());

  FunctionType *NewFuncTy = FunctionType::get(NewRetTy,
                                              F.getFunctionType()->params(),
                                              F.isVarArg());

  LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n');

  Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage,
                                       F.getName() + ".body");
  F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc);
  NewFunc->copyAttributesFrom(&F);
  NewFunc->setComdat(F.getComdat());

  // We want to preserve the function and param attributes, but need to strip
  // off any return attributes, e.g. zeroext doesn't make sense with a struct.
  NewFunc->stealArgumentListFrom(F);

  AttrBuilder RetAttrs;
  RetAttrs.addAttribute(Attribute::SExt);
  RetAttrs.addAttribute(Attribute::ZExt);
  RetAttrs.addAttribute(Attribute::NoAlias);
  NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs);
  // TODO: How to preserve metadata?

  // Move the body of the function into the new rewritten function, and replace
  // this function with a stub.
  NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList());

  for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
    ReturnInst *RI = Replacement.first;
    IRBuilder<> B(RI);
    B.SetCurrentDebugLocation(RI->getDebugLoc());

    int RetIdx = 0;
    Value *NewRetVal = UndefValue::get(NewRetTy);

    Value *RetVal = RI->getReturnValue();
    if (RetVal)
      NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);

    for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
      Argument *Arg = ReturnPoint.first;
      Value *Val = ReturnPoint.second;
      Type *EltTy = Arg->getType()->getPointerElementType();
      if (Val->getType() != EltTy) {
        Type *EffectiveEltTy = EltTy;
        if (StructType *CT = dyn_cast<StructType>(EltTy)) {
          assert(CT->getNumElements() == 1);
          EffectiveEltTy = CT->getElementType(0);
        }

        if (DL->getTypeSizeInBits(EffectiveEltTy) !=
            DL->getTypeSizeInBits(Val->getType())) {
          assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
          Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()),
                                      { 0, 1, 2 });
        }

        Val = B.CreateBitCast(Val, EffectiveEltTy);

        // Re-create single element composite.
        if (EltTy != EffectiveEltTy)
          Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0);
      }

      NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++);
    }

    if (RetVal)
      RI->setOperand(0, NewRetVal);
    else {
      B.CreateRet(NewRetVal);
      RI->eraseFromParent();
    }
  }

  SmallVector<Value *, 16> StubCallArgs;
  for (Argument &Arg : F.args()) {
    if (OutArgIndexes.count(Arg.getArgNo())) {
      // It's easier to preserve the type of the argument list. We rely on
      // DeadArgumentElimination to take care of these.
      StubCallArgs.push_back(UndefValue::get(Arg.getType()));
    } else {
      StubCallArgs.push_back(&Arg);
    }
  }

  BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F);
  IRBuilder<> B(StubBB);
  CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs);

  int RetIdx = RetTy->isVoidTy() ? 0 : 1;
  for (Argument &Arg : F.args()) {
    if (!OutArgIndexes.count(Arg.getArgNo()))
      continue;

    PointerType *ArgType = cast<PointerType>(Arg.getType());

    auto *EltTy = ArgType->getElementType();
    unsigned Align = Arg.getParamAlignment();
    if (Align == 0)
      Align = DL->getABITypeAlignment(EltTy);

    Value *Val = B.CreateExtractValue(StubCall, RetIdx++);
    Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace());

    // We can peek through bitcasts, so the type may not match.
    Value *PtrVal = B.CreateBitCast(&Arg, PtrTy);

    B.CreateAlignedStore(Val, PtrVal, Align);
  }

  if (!RetTy->isVoidTy()) {
    B.CreateRet(B.CreateExtractValue(StubCall, 0));
  } else {
    B.CreateRetVoid();
  }

  // The function is now a stub we want to inline.
  F.addFnAttr(Attribute::AlwaysInline);

  ++NumOutArgumentFunctionsReplaced;
  return true;
}
Esempio n. 5
0
static bool verifyCTRBranch(MachineBasicBlock *MBB,
                            MachineBasicBlock::iterator I) {
  MachineBasicBlock::iterator BI = I;
  SmallSet<MachineBasicBlock *, 16>   Visited;
  SmallVector<MachineBasicBlock *, 8> Preds;
  bool CheckPreds;

  if (I == MBB->begin()) {
    Visited.insert(MBB);
    goto queue_preds;
  } else
    --I;

check_block:
  Visited.insert(MBB);
  if (I == MBB->end())
    goto queue_preds;

  CheckPreds = true;
  for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
    unsigned Opc = I->getOpcode();
    if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
      CheckPreds = false;
      break;
    }

    if (I != BI && clobbersCTR(I)) {
      DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
                      MBB->getFullName() << ") instruction " << *I <<
                      " clobbers CTR, invalidating " << "BB#" <<
                      BI->getParent()->getNumber() << " (" <<
                      BI->getParent()->getFullName() << ") instruction " <<
                      *BI << "\n");
      return false;
    }

    if (I == IE)
      break;
  }

  if (!CheckPreds && Preds.empty())
    return true;

  if (CheckPreds) {
queue_preds:
    if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
      DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
                      BI->getParent()->getNumber() << " (" <<
                      BI->getParent()->getFullName() << ") instruction " <<
                      *BI << "\n");
      return false;
    }

    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
         PIE = MBB->pred_end(); PI != PIE; ++PI)
      Preds.push_back(*PI);
  }

  do {
    MBB = Preds.pop_back_val();
    if (!Visited.count(MBB)) {
      I = MBB->getLastNonDebugInstr();
      goto check_block;
    }
  } while (!Preds.empty());

  return true;
}
Esempio n. 6
0
bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
  bool MadeChange = false;

  if (!DL)
    return MadeChange;

  // Only prep. the inner-most loop
  if (!L->empty())
    return MadeChange;

  BasicBlock *Header = L->getHeader();

  const PPCSubtarget *ST =
    TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;

  unsigned HeaderLoopPredCount = 0;
  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
       PI != PE; ++PI) {
    ++HeaderLoopPredCount;
  }

  // Collect buckets of comparable addresses used by loads and stores.
  typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
  SmallVector<Bucket, 16> Buckets;
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I) {
    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
        J != JE; ++J) {
      Value *PtrValue;
      Instruction *MemI;

      if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
        MemI = LMemI;
        PtrValue = LMemI->getPointerOperand();
      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
        MemI = SMemI;
        PtrValue = SMemI->getPointerOperand();
      } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
        if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
          MemI = IMemI;
          PtrValue = IMemI->getArgOperand(0);
        } else continue;
      } else continue;

      unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
      if (PtrAddrSpace)
        continue;

      // There are no update forms for Altivec vector load/stores.
      if (ST && ST->hasAltivec() &&
          PtrValue->getType()->getPointerElementType()->isVectorTy())
        continue;

      if (L->isLoopInvariant(PtrValue))
        continue;

      const SCEV *LSCEV = SE->getSCEV(PtrValue);
      if (!isa<SCEVAddRecExpr>(LSCEV))
        continue;

      bool FoundBucket = false;
      for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
        for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end();
             K != KE; ++K) {
          const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV);
          if (isa<SCEVConstant>(Diff)) {
            Buckets[i].insert(std::make_pair(LSCEV, MemI));
            FoundBucket = true;
            break;
          }
        }

      if (!FoundBucket) {
        Buckets.push_back(Bucket(SCEVLess(SE)));
        Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI));
      }
    }
  }

  if (Buckets.empty() || Buckets.size() > MaxVars)
    return MadeChange;

  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
  // If there is no loop predecessor, or the loop predecessor's terminator
  // returns a value (which might contribute to determining the loop's
  // iteration space), insert a new preheader for the loop.
  if (!LoopPredecessor ||
      !LoopPredecessor->getTerminator()->getType()->isVoidTy())
    LoopPredecessor = InsertPreheaderForLoop(L, this);
  if (!LoopPredecessor)
    return MadeChange;

  SmallSet<BasicBlock *, 16> BBChanged;
  for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
    // The base address of each bucket is transformed into a phi and the others
    // are rewritten as offsets of that variable.

    const SCEVAddRecExpr *BasePtrSCEV =
      cast<SCEVAddRecExpr>(Buckets[i].begin()->first);
    if (!BasePtrSCEV->isAffine())
      continue;

    Instruction *MemI = Buckets[i].begin()->second;
    Value *BasePtr = GetPointerOperand(MemI);
    assert(BasePtr && "No pointer operand");

    Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
      BasePtr->getType()->getPointerAddressSpace());

    const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
    if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
      continue;

    const SCEVConstant *BasePtrIncSCEV =
      dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
    if (!BasePtrIncSCEV)
      continue;
    BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
    if (!isSafeToExpand(BasePtrStartSCEV, *SE))
      continue;

    PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
      MemI->hasName() ? MemI->getName() + ".phi" : "",
      Header->getFirstNonPHI());

    SCEVExpander SCEVE(*SE, "pistart");
    Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
      LoopPredecessor->getTerminator());

    // Note that LoopPredecessor might occur in the predecessor list multiple
    // times, and we need to add it the right number of times.
    for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
         PI != PE; ++PI) {
      if (*PI != LoopPredecessor)
        continue;

      NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
    }

    Instruction *InsPoint = Header->getFirstInsertionPt();
    GetElementPtrInst *PtrInc =
      GetElementPtrInst::Create(NewPHI, BasePtrIncSCEV->getValue(),
        MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
    PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
    for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
         PI != PE; ++PI) {
      if (*PI == LoopPredecessor)
        continue;

      NewPHI->addIncoming(PtrInc, *PI);
    }

    Instruction *NewBasePtr;
    if (PtrInc->getType() != BasePtr->getType())
      NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
        PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint);
    else
      NewBasePtr = PtrInc;

    if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
      BBChanged.insert(IDel->getParent());
    BasePtr->replaceAllUsesWith(NewBasePtr);
    RecursivelyDeleteTriviallyDeadInstructions(BasePtr);

    Value *LastNewPtr = NewBasePtr;
    for (Bucket::iterator I = std::next(Buckets[i].begin()),
         IE = Buckets[i].end(); I != IE; ++I) {
      Value *Ptr = GetPointerOperand(I->second);
      assert(Ptr && "No pointer operand");
      if (Ptr == LastNewPtr)
        continue;

      Instruction *RealNewPtr;
      const SCEVConstant *Diff =
        cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV));
      if (Diff->isZero()) {
        RealNewPtr = NewBasePtr;
      } else {
        Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
        if (PtrIP && isa<Instruction>(NewBasePtr) &&
            cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
          PtrIP = 0;
        else if (isa<PHINode>(PtrIP))
          PtrIP = PtrIP->getParent()->getFirstInsertionPt();
        else if (!PtrIP)
          PtrIP = I->second;
  
        GetElementPtrInst *NewPtr =
          GetElementPtrInst::Create(PtrInc, Diff->getValue(),
            I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
        if (!PtrIP)
          NewPtr->insertAfter(cast<Instruction>(PtrInc));
        NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
        RealNewPtr = NewPtr;
      }

      if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
        BBChanged.insert(IDel->getParent());

      Instruction *ReplNewPtr;
      if (Ptr->getType() != RealNewPtr->getType()) {
        ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
          Ptr->hasName() ? Ptr->getName() + ".cast" : "");
        ReplNewPtr->insertAfter(RealNewPtr);
      } else
        ReplNewPtr = RealNewPtr;

      Ptr->replaceAllUsesWith(ReplNewPtr);
      RecursivelyDeleteTriviallyDeadInstructions(Ptr);

      LastNewPtr = RealNewPtr;
    }

    MadeChange = true;
  }

  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I) {
    if (BBChanged.count(*I))
      DeleteDeadPHIs(*I);
  }

  return MadeChange;
}
Esempio n. 7
0
/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
/// invariants out to the preheader.
void MachineLICM::HoistRegionPostRA() {
  unsigned NumRegs = TRI->getNumRegs();
  unsigned *PhysRegDefs = new unsigned[NumRegs];
  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);

  SmallVector<CandidateInfo, 32> Candidates;
  SmallSet<int, 32> StoredFIs;

  // Walk the entire region, count number of defs for each register, and
  // collect potential LICM candidates.
  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
    MachineBasicBlock *BB = Blocks[i];
    // Conservatively treat live-in's as an external def.
    // FIXME: That means a reload that're reused in successor block(s) will not
    // be LICM'ed.
    for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
           E = BB->livein_end(); I != E; ++I) {
      unsigned Reg = *I;
      ++PhysRegDefs[Reg];
      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
        ++PhysRegDefs[*AS];
    }

    for (MachineBasicBlock::iterator
           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
      MachineInstr *MI = &*MII;
      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
    }
  }

  // Now evaluate whether the potential candidates qualify.
  // 1. Check if the candidate defined register is defined by another
  //    instruction in the loop.
  // 2. If the candidate is a load from stack slot (always true for now),
  //    check if the slot is stored anywhere in the loop.
  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
    if (Candidates[i].FI != INT_MIN &&
        StoredFIs.count(Candidates[i].FI))
      continue;

    if (PhysRegDefs[Candidates[i].Def] == 1) {
      bool Safe = true;
      MachineInstr *MI = Candidates[i].MI;
      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
        const MachineOperand &MO = MI->getOperand(j);
        if (!MO.isReg() || MO.isDef() || !MO.getReg())
          continue;
        if (PhysRegDefs[MO.getReg()]) {
          // If it's using a non-loop-invariant register, then it's obviously
          // not safe to hoist.
          Safe = false;
          break;
        }
      }
      if (Safe)
        HoistPostRA(MI, Candidates[i].Def);
    }
  }

  delete[] PhysRegDefs;
}
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
///
void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
  // Loop over all of the stack objects, assigning sequential addresses...
  MachineFrameInfo *MFI = Fn.getFrameInfo();
  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
  bool StackGrowsDown =
    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
  int64_t Offset = 0;
  unsigned MaxAlign = 0;
  StackProtector *SP = &getAnalysis<StackProtector>();

  // Make sure that the stack protector comes before the local variables on the
  // stack.
  SmallSet<int, 16> ProtectedObjs;
  if (MFI->getStackProtectorIndex() >= 0) {
    StackObjSet LargeArrayObjs;
    StackObjSet SmallArrayObjs;
    StackObjSet AddrOfObjs;

    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
                      StackGrowsDown, MaxAlign);

    // Assign large stack objects first.
    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
      if (MFI->isDeadObjectIndex(i))
        continue;
      if (MFI->getStackProtectorIndex() == (int)i)
        continue;

      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
      case StackProtector::SSPLK_None:
        continue;
      case StackProtector::SSPLK_SmallArray:
        SmallArrayObjs.insert(i);
        continue;
      case StackProtector::SSPLK_AddrOf:
        AddrOfObjs.insert(i);
        continue;
      case StackProtector::SSPLK_LargeArray:
        LargeArrayObjs.insert(i);
        continue;
      }
      llvm_unreachable("Unexpected SSPLayoutKind.");
    }

    AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign);
    AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign);
    AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign);
  }

  // Then assign frame offsets to stack objects that are not used to spill
  // callee saved registers.
  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
    if (MFI->isDeadObjectIndex(i))
      continue;
    if (MFI->getStackProtectorIndex() == (int)i)
      continue;
    if (ProtectedObjs.count(i))
      continue;

    AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
  }

  // Remember how big this blob of stack space is
  MFI->setLocalFrameSize(Offset);
  MFI->setLocalFrameMaxAlign(MaxAlign);
}
Esempio n. 9
0
/// finalizeBundle - Finalize a machine instruction bundle which includes
/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
/// This routine adds a BUNDLE instruction to represent the bundle, it adds
/// IsInternalRead markers to MachineOperands which are defined inside the
/// bundle, and it copies externally visible defs and uses to the BUNDLE
/// instruction.
void llvm::finalizeBundle(MachineBasicBlock &MBB,
                          MachineBasicBlock::instr_iterator FirstMI,
                          MachineBasicBlock::instr_iterator LastMI) {
  assert(FirstMI != LastMI && "Empty bundle?");
  MIBundleBuilder Bundle(MBB, FirstMI, LastMI);

  MachineFunction &MF = *MBB.getParent();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

  MachineInstrBuilder MIB =
      BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE));
  Bundle.prepend(MIB);

  SmallVector<unsigned, 32> LocalDefs;
  SmallSet<unsigned, 32> LocalDefSet;
  SmallSet<unsigned, 8> DeadDefSet;
  SmallSet<unsigned, 16> KilledDefSet;
  SmallVector<unsigned, 8> ExternUses;
  SmallSet<unsigned, 8> ExternUseSet;
  SmallSet<unsigned, 8> KilledUseSet;
  SmallSet<unsigned, 8> UndefUseSet;
  SmallVector<MachineOperand*, 4> Defs;
  for (; FirstMI != LastMI; ++FirstMI) {
    for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = FirstMI->getOperand(i);
      if (!MO.isReg())
        continue;
      if (MO.isDef()) {
        Defs.push_back(&MO);
        continue;
      }

      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;
      assert(TargetRegisterInfo::isPhysicalRegister(Reg));
      if (LocalDefSet.count(Reg)) {
        MO.setIsInternalRead();
        if (MO.isKill())
          // Internal def is now killed.
          KilledDefSet.insert(Reg);
      } else {
        if (ExternUseSet.insert(Reg).second) {
          ExternUses.push_back(Reg);
          if (MO.isUndef())
            UndefUseSet.insert(Reg);
        }
        if (MO.isKill())
          // External def is now killed.
          KilledUseSet.insert(Reg);
      }
    }

    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      MachineOperand &MO = *Defs[i];
      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;

      if (LocalDefSet.insert(Reg).second) {
        LocalDefs.push_back(Reg);
        if (MO.isDead()) {
          DeadDefSet.insert(Reg);
        }
      } else {
        // Re-defined inside the bundle, it's no longer killed.
        KilledDefSet.erase(Reg);
        if (!MO.isDead())
          // Previously defined but dead.
          DeadDefSet.erase(Reg);
      }

      if (!MO.isDead()) {
        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
          unsigned SubReg = *SubRegs;
          if (LocalDefSet.insert(SubReg).second)
            LocalDefs.push_back(SubReg);
        }
      }
    }

    Defs.clear();
  }

  SmallSet<unsigned, 32> Added;
  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
    unsigned Reg = LocalDefs[i];
    if (Added.insert(Reg).second) {
      // If it's not live beyond end of the bundle, mark it dead.
      bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
      MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
                 getImplRegState(true));
    }
  }

  for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
    unsigned Reg = ExternUses[i];
    bool isKill = KilledUseSet.count(Reg);
    bool isUndef = UndefUseSet.count(Reg);
    MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
               getImplRegState(true));
  }
}
Esempio n. 10
0
bool CastVerifier::IsSecurityRelatedCast(Value *value, Type *CastedTy,
                                          Type *OrigTy,
                                          SmallSet<Value *, 16> &Visited) {

  // Propagate the value originated from the bitcasted value.  If any of the
  // propagated value is 1) used for return/call/invoke instructions, 2) not
  // a local variable, or 3) touching beyond the boundary of the original
  // type (before the bit-casting), it is a security related static_cast. If
  // it is not, it is non-security static_cast and we remove the
  // instrumented instructions.

  if (Visited.count(value)) // Already visited.
    return false;

  Type *valueTy = value->getType();

  CVER_DEBUG("Visit : " << *value << " (" << *valueTy << ")\n");
  Visited.insert(value);

  // Check how the value is taken.
  if (isa<GlobalValue>(value)) {
    CVER_DEBUG("\t True (global)\n");
    return true;
  } else if (LoadInst *LI = dyn_cast<LoadInst>(value)) {
    // If the value is taken from the outside, then we lost the control and mark
    // it as security sensitive.
    if (LI->getType() != CastedTy) {
      CVER_DEBUG("\t True (heap?)\n");
      return true;
    }
  }

  // Check how the value is used.
  for (User *user : value->users()) {
    CVER_DEBUG("\t (user) " << *user << "\n");

    if (StoreInst *SI = dyn_cast<StoreInst>(user)) {
      // store <CastedTy> <value>, <CastedTy>* <ptr>
      if (SI->getValueOperand()->getType() == CastedTy ||
          SI->getValueOperand()->getType() == valueTy) {
        if (IsSecurityRelatedCast(SI->getPointerOperand(),
                                  CastedTy, OrigTy, Visited))
          return true;
      } else {
        return true;
      }
    } // end of StoreInst

    else if (LoadInst *LI = dyn_cast<LoadInst>(user)) {
      // <value> = load <ty>* <ptr>
      if (LI->getType() == CastedTy) {
        if (IsSecurityRelatedCast(user, CastedTy, OrigTy, Visited))
          return true;
      } else {
        return true;
      }
    }
    else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
      // <result> = bitcast <ty> <value> to <ty2>

      // If it is casted to larger types than the original type, this must be
      // security sensitive. If it is smaller, than all the following memory
      // acceses will be safe.
      // FIXME : use original type, not the casted type.
      if (BCI->getType()->getIntegerBitWidth() > OrigTy->getIntegerBitWidth()) {
        return true;
      }
    }

    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
      if (GEP->getPointerOperandType() == CastedTy)
        return true;
    }

    else if (isa<ReturnInst>(user) ||
             isa<CallInst>(user) ||
             isa<InvokeInst>(user)) {
      return true;
    }
  } // end of User loop.
  return false;
}
Esempio n. 11
0
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
/// there is one implicit_def for each use. Add isUndef marker to
/// implicit_def defs and their uses.
bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {

  DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
               << "********** Function: "
               << ((Value*)fn.getFunction())->getName() << '\n');

  bool Changed = false;

  TII = fn.getTarget().getInstrInfo();
  TRI = fn.getTarget().getRegisterInfo();
  MRI = &fn.getRegInfo();
  LV = &getAnalysis<LiveVariables>();

  SmallSet<unsigned, 8> ImpDefRegs;
  SmallVector<MachineInstr*, 8> ImpDefMIs;
  SmallVector<MachineInstr*, 4> RUses;
  SmallPtrSet<MachineBasicBlock*,16> Visited;
  SmallPtrSet<MachineInstr*, 8> ModInsts;

  MachineBasicBlock *Entry = fn.begin();
  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
       DFI != E; ++DFI) {
    MachineBasicBlock *MBB = *DFI;
    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
         I != E; ) {
      MachineInstr *MI = &*I;
      ++I;
      if (MI->isImplicitDef()) {
        ImpDefMIs.push_back(MI);
        // Is this a sub-register read-modify-write?
        if (MI->getOperand(0).readsReg())
          continue;
        unsigned Reg = MI->getOperand(0).getReg();
        ImpDefRegs.insert(Reg);
        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
          for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
            ImpDefRegs.insert(*SS);
        }
        continue;
      }

      // Eliminate %reg1032:sub<def> = COPY undef.
      if (MI->isCopy() && MI->getOperand(0).readsReg()) {
        MachineOperand &MO = MI->getOperand(1);
        if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
          if (MO.isKill()) {
            LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
            vi.removeKill(MI);
          }
          unsigned Reg = MI->getOperand(0).getReg();
          MI->eraseFromParent();
          Changed = true;

          // A REG_SEQUENCE may have been expanded into partial definitions.
          // If this was the last one, mark Reg as implicitly defined.
          if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg))
            ImpDefRegs.insert(Reg);
          continue;
        }
      }

      bool ChangedToImpDef = false;
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand& MO = MI->getOperand(i);
        if (!MO.isReg() || !MO.readsReg())
          continue;
        unsigned Reg = MO.getReg();
        if (!Reg)
          continue;
        if (!ImpDefRegs.count(Reg))
          continue;
        // Use is a copy, just turn it into an implicit_def.
        if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
          bool isKill = MO.isKill();
          MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
            MI->RemoveOperand(j);
          if (isKill) {
            ImpDefRegs.erase(Reg);
            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
            vi.removeKill(MI);
          }
          ChangedToImpDef = true;
          Changed = true;
          break;
        }

        Changed = true;
        MO.setIsUndef();
        // This is a partial register redef of an implicit def.
        // Make sure the whole register is defined by the instruction.
        if (MO.isDef()) {
          MI->addRegisterDefined(Reg);
          continue;
        }
        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
          // Make sure other reads of Reg are also marked <undef>.
          for (unsigned j = i+1; j != e; ++j) {
            MachineOperand &MOJ = MI->getOperand(j);
            if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
              MOJ.setIsUndef();
          }
          ImpDefRegs.erase(Reg);
        }
      }

      if (ChangedToImpDef) {
        // Backtrack to process this new implicit_def.
        --I;
      } else {
        for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
          MachineOperand& MO = MI->getOperand(i);
          if (!MO.isReg() || !MO.isDef())
            continue;
          ImpDefRegs.erase(MO.getReg());
        }
      }
    }

    // Any outstanding liveout implicit_def's?
    for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
      MachineInstr *MI = ImpDefMIs[i];
      unsigned Reg = MI->getOperand(0).getReg();
      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
          !ImpDefRegs.count(Reg)) {
        // Delete all "local" implicit_def's. That include those which define
        // physical registers since they cannot be liveout.
        MI->eraseFromParent();
        Changed = true;
        continue;
      }

      // If there are multiple defs of the same register and at least one
      // is not an implicit_def, do not insert implicit_def's before the
      // uses.
      bool Skip = false;
      SmallVector<MachineInstr*, 4> DeadImpDefs;
      for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
             DE = MRI->def_end(); DI != DE; ++DI) {
        MachineInstr *DeadImpDef = &*DI;
        if (!DeadImpDef->isImplicitDef()) {
          Skip = true;
          break;
        }
        DeadImpDefs.push_back(DeadImpDef);
      }
      if (Skip)
        continue;

      // The only implicit_def which we want to keep are those that are live
      // out of its block.
      for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
        DeadImpDefs[j]->eraseFromParent();
      Changed = true;

      // Process each use instruction once.
      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
             UE = MRI->use_end(); UI != UE; ++UI) {
        if (UI.getOperand().isUndef())
          continue;
        MachineInstr *RMI = &*UI;
        if (ModInsts.insert(RMI))
          RUses.push_back(RMI);
      }

      for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
        MachineInstr *RMI = RUses[i];

        // Turn a copy use into an implicit_def.
        if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
          RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));

          bool isKill = false;
          SmallVector<unsigned, 4> Ops;
          for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
            MachineOperand &RRMO = RMI->getOperand(j);
            if (RRMO.isReg() && RRMO.getReg() == Reg) {
              Ops.push_back(j);
              if (RRMO.isKill())
                isKill = true;
            }
          }
          // Leave the other operands along.
          for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
            unsigned OpIdx = Ops[j];
            RMI->RemoveOperand(OpIdx-j);
          }

          // Update LiveVariables varinfo if the instruction is a kill.
          if (isKill) {
            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
            vi.removeKill(RMI);
          }
          continue;
        }

        // Replace Reg with a new vreg that's marked implicit.
        const TargetRegisterClass* RC = MRI->getRegClass(Reg);
        unsigned NewVReg = MRI->createVirtualRegister(RC);
        bool isKill = true;
        for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
          MachineOperand &RRMO = RMI->getOperand(j);
          if (RRMO.isReg() && RRMO.getReg() == Reg) {
            RRMO.setReg(NewVReg);
            RRMO.setIsUndef();
            if (isKill) {
              // Only the first operand of NewVReg is marked kill.
              RRMO.setIsKill();
              isKill = false;
            }
          }
        }
      }
      RUses.clear();
      ModInsts.clear();
    }
    ImpDefRegs.clear();
    ImpDefMIs.clear();
  }

  return Changed;
}
Esempio n. 12
0
// Handle special instruction operand like early clobbers and tied ops when
// there are additional physreg defines.
void RAFast::handleThroughOperands(MachineInstr *MI,
                                   SmallVectorImpl<unsigned> &VirtDead) {
    DEBUG(dbgs() << "Scanning for through registers:");
    SmallSet<unsigned, 8> ThroughRegs;
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand &MO = MI->getOperand(i);
        if (!MO.isReg()) continue;
        unsigned Reg = MO.getReg();
        if (!TargetRegisterInfo::isVirtualRegister(Reg))
            continue;
        if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
                (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
            if (ThroughRegs.insert(Reg))
                DEBUG(dbgs() << ' ' << PrintReg(Reg));
        }
    }

    // If any physreg defines collide with preallocated through registers,
    // we must spill and reallocate.
    DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand &MO = MI->getOperand(i);
        if (!MO.isReg() || !MO.isDef()) continue;
        unsigned Reg = MO.getReg();
        if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
        UsedInInstr.set(Reg);
        if (ThroughRegs.count(PhysRegState[Reg]))
            definePhysReg(MI, Reg, regFree);
        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
            UsedInInstr.set(*AS);
            if (ThroughRegs.count(PhysRegState[*AS]))
                definePhysReg(MI, *AS, regFree);
        }
    }

    SmallVector<unsigned, 8> PartialDefs;
    DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand &MO = MI->getOperand(i);
        if (!MO.isReg()) continue;
        unsigned Reg = MO.getReg();
        if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
        if (MO.isUse()) {
            unsigned DefIdx = 0;
            if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
            DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
                  << DefIdx << ".\n");
            LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
            unsigned PhysReg = LRI->second.PhysReg;
            setPhysReg(MI, i, PhysReg);
            // Note: we don't update the def operand yet. That would cause the normal
            // def-scan to attempt spilling.
        } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
            DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
            // Reload the register, but don't assign to the operand just yet.
            // That would confuse the later phys-def processing pass.
            LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
            PartialDefs.push_back(LRI->second.PhysReg);
        } else if (MO.isEarlyClobber()) {
            // Note: defineVirtReg may invalidate MO.
            LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
            unsigned PhysReg = LRI->second.PhysReg;
            if (setPhysReg(MI, i, PhysReg))
                VirtDead.push_back(Reg);
        }
    }

    // Restore UsedInInstr to a state usable for allocating normal virtual uses.
    UsedInInstr.reset();
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand &MO = MI->getOperand(i);
        if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
        unsigned Reg = MO.getReg();
        if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
        DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n");
        UsedInInstr.set(Reg);
        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
            DEBUG(dbgs() << "\tSetting alias reg " << *AS << " as used in instr\n");
            UsedInInstr.set(*AS);
        }
    }

    // Also mark PartialDefs as used to avoid reallocation.
    for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
        UsedInInstr.set(PartialDefs[i]);
}
Esempio n. 13
0
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
  bool Modified = false;

  SmallSet<unsigned, 4> Defs;
  SmallSet<unsigned, 4> Uses;
  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  while (MBBI != E) {
    MachineInstr *MI = &*MBBI;
    DebugLoc dl = MI->getDebugLoc();
    unsigned PredReg = 0;
    ARMCC::CondCodes CC = getPredicate(MI, PredReg);
    if (CC == ARMCC::AL) {
      ++MBBI;
      continue;
    }

    Defs.clear();
    Uses.clear();
    TrackDefUses(MI, Defs, Uses);

    // Insert an IT instruction.
    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
      .addImm(CC);
    MachineBasicBlock::iterator InsertPos = MIB;
    ++MBBI;

    // Finalize IT mask.
    ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
    unsigned Mask = 0, Pos = 3;
    // Branches, including tricky ones like LDM_RET, need to end an IT
    // block so check the instruction we just put in the block.
    for (; MBBI != E && Pos &&
           (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
      if (MBBI->isDebugValue())
        continue;

      MachineInstr *NMI = &*MBBI;
      MI = NMI;

      unsigned NPredReg = 0;
      ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
      if (NCC == CC || NCC == OCC)
        Mask |= (NCC & 1) << Pos;
      else {
        unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
        if (NCC == ARMCC::AL &&
            TII->isMoveInstr(*NMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
          assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
                 "Sub-register indices still around?");
          // llvm models select's as two-address instructions. That means a copy
          // is inserted before a t2MOVccr, etc. If the copy is scheduled in
          // between selects we would end up creating multiple IT blocks.
          if (!Uses.count(DstReg) && !Defs.count(SrcReg)) {
            --MBBI;
            MBB.remove(NMI);
            MBB.insert(InsertPos, NMI);
            ++NumMovedInsts;
            continue;
          }
        }
        break;
      }
      TrackDefUses(NMI, Defs, Uses);
      --Pos;
    }

    Mask |= (1 << Pos);
    // Tag along (firstcond[0] << 4) with the mask.
    Mask |= (CC & 1) << 4;
    MIB.addImm(Mask);
    Modified = true;
    ++NumITs;
  }

  return Modified;
}
Esempio n. 14
0
/// Sink3AddrInstruction - A two-address instruction has been converted to a
/// three-address instruction to avoid clobbering a register. Try to sink it
/// past the instruction that would kill the above mentioned register to reduce
/// register pressure.
bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
                                           MachineInstr *MI, unsigned SavedReg,
                                           MachineBasicBlock::iterator OldPos) {
  // Check if it's safe to move this instruction.
  bool SeenStore = true; // Be conservative.
  if (!MI->isSafeToMove(TII, SeenStore, AA))
    return false;

  unsigned DefReg = 0;
  SmallSet<unsigned, 4> UseRegs;

  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    const MachineOperand &MO = MI->getOperand(i);
    if (!MO.isReg())
      continue;
    unsigned MOReg = MO.getReg();
    if (!MOReg)
      continue;
    if (MO.isUse() && MOReg != SavedReg)
      UseRegs.insert(MO.getReg());
    if (!MO.isDef())
      continue;
    if (MO.isImplicit())
      // Don't try to move it if it implicitly defines a register.
      return false;
    if (DefReg)
      // For now, don't move any instructions that define multiple registers.
      return false;
    DefReg = MO.getReg();
  }

  // Find the instruction that kills SavedReg.
  MachineInstr *KillMI = NULL;
  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg),
         UE = MRI->use_end(); UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    if (!UseMO.isKill())
      continue;
    KillMI = UseMO.getParent();
    break;
  }

  if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
    return false;

  // If any of the definitions are used by another instruction between the
  // position and the kill use, then it's not safe to sink it.
  // 
  // FIXME: This can be sped up if there is an easy way to query whether an
  // instruction is before or after another instruction. Then we can use
  // MachineRegisterInfo def / use instead.
  MachineOperand *KillMO = NULL;
  MachineBasicBlock::iterator KillPos = KillMI;
  ++KillPos;

  unsigned NumVisited = 0;
  for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) {
    MachineInstr *OtherMI = I;
    if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
      return false;
    ++NumVisited;
    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = OtherMI->getOperand(i);
      if (!MO.isReg())
        continue;
      unsigned MOReg = MO.getReg();
      if (!MOReg)
        continue;
      if (DefReg == MOReg)
        return false;

      if (MO.isKill()) {
        if (OtherMI == KillMI && MOReg == SavedReg)
          // Save the operand that kills the register. We want to unset the kill
          // marker if we can sink MI past it.
          KillMO = &MO;
        else if (UseRegs.count(MOReg))
          // One of the uses is killed before the destination.
          return false;
      }
    }
  }

  // Update kill and LV information.
  KillMO->setIsKill(false);
  KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
  KillMO->setIsKill(true);
  
  if (LV)
    LV->replaceKillInstruction(SavedReg, KillMI, MI);

  // Move instruction to its destination.
  MBB->remove(MI);
  MBB->insert(KillPos, MI);

  ++Num3AddrSunk;
  return true;
}
Esempio n. 15
0
/// optimizeCheck - replace the given check CallInst with the check's fast
/// version if all the source memory objects can be found and it is obvious
/// that none of them have been freed at the point where the check is made.
/// Returns the new call if possible and NULL otherwise.
///
/// This currently works only with memory objects that can't be freed:
/// * global variables
/// * allocas that trivially have function scope
/// * byval arguments
///
bool ExactCheckOpt::optimizeCheck(CallInst *CI, CheckInfoType *Info) {
  // Examined values
  SmallSet<Value*, 16> Visited;
  // Potential memory objects
  SmallSet<Value*, 4> Objects;

  std::queue<Value*> Q;
  // Start from the the pointer operand
  Value *StartPtr = CI->getArgOperand(Info->PtrArgNo)->stripPointerCasts();
  Q.push(StartPtr);

  // Use BFS to find all potential memory objects
  while(!Q.empty()) {
    Value *o = Q.front()->stripPointerCasts();
    Q.pop();
    if(Visited.count(o))
      continue;
    Visited.insert(o);

    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(o)) {
      if (CE->getOpcode() == Instruction::GetElementPtr) {
        Q.push(CE->getOperand(0));
      } else {
        // Exit early if any of the objects are unsupported.
        if (!isSimpleMemoryObject(o))
          return false;
        Objects.insert(o);
      }
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(o)) {
      Q.push(GEP->getPointerOperand());
      // It is fine to ignore the case of indexing into null with a pointer
      // because that case is invalid for LLVM-aware objects such as allocas,
      // globals, and objects pointed to by noalias pointers.
    } else if(PHINode *PHI = dyn_cast<PHINode>(o)) {
      for (unsigned i = 0, num = PHI->getNumIncomingValues(); i != num; ++i)
        Q.push(PHI->getIncomingValue(i));
    } else if (SelectInst *SI = dyn_cast<SelectInst>(o)) {
      Q.push(SI->getTrueValue());
      Q.push(SI->getFalseValue());
    } else {
      // Exit early if any of the objects are unsupported.
      if (!isSimpleMemoryObject(o))
        return false;
      Objects.insert(o);
    }
  }

  // Mapping from the initial value to the corresponding size and void pointer:
  // * memory object -> its size and pointer
  // * phi/select -> corresponding phi/select for the sizes and pointers
  // * anything else -> the corresponding size and pointer on the path
  std::map <Value*, PtrSizePair> M;

  Module &Mod = *CI->getParent()->getParent()->getParent();
  Type *SizeTy = getSizeType(Info, Mod);

  // Add non-instruction non-constant allocation object pointers to the front
  // of the function's entry block.
  BasicBlock &EntryBlock = CI->getParent()->getParent()->getEntryBlock();
  Instruction *FirstInsertionPoint = ++BasicBlock::iterator(EntryBlock.begin());

  for (SmallSet<Value*, 16>::const_iterator It = Objects.begin(),
       E = Objects.end();
       It != E;
       ++It) {
    // Obj is a memory object pointer: alloca, argument, load, callinst, etc.
    Value *Obj = *It;

    // Insert instruction-based allocation pointers just after the allocation.
    Instruction *InsertBefore = FirstInsertionPoint;
    if (Instruction *I = dyn_cast<Instruction>(Obj))
      InsertBefore = ++BasicBlock::iterator(I);
    IRBuilder<> Builder(InsertBefore);

    SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Obj);
    assert(ObjSizeEval->bothKnown(SizeOffset));
    assert(dyn_cast<ConstantInt>(SizeOffset.second)->isZero());
    Value *Size = Builder.CreateIntCast(SizeOffset.first, SizeTy,
                                        /*isSigned=*/false);

    Value *Ptr = Builder.CreatePointerCast(Obj, VoidPtrTy);
    M[Obj] = std::make_pair(Ptr, Size);
  }

  // Create the rest of the size values and object pointers.
  // The phi nodes will be finished later.
  for (SmallSet<Value*, 16>::const_iterator I = Visited.begin(),
       E = Visited.end();
       I != E;
       ++I) {
    getPtrAndSize(*I, SizeTy, M);
  }

  // Finalize the phi nodes.
  for (SmallSet<Value*, 16>::const_iterator I = Visited.begin(),
       E = Visited.end();
       I != E;
       ++I) {
    if (PHINode *PHI = dyn_cast<PHINode>(*I)) {
      assert(M.count(PHI));
      PHINode *PtrPHI = cast<PHINode>(M[PHI].first);
      PHINode *SizePHI = cast<PHINode>(M[PHI].second);

      for(unsigned i = 0, num = PHI->getNumIncomingValues(); i != num; ++i) {
        Value *IncomingValue = PHI->getIncomingValue(i)->stripPointerCasts();
        assert(M.count(IncomingValue));

        PtrPHI->addIncoming(M[IncomingValue].first, PHI->getIncomingBlock(i));
        SizePHI->addIncoming(M[IncomingValue].second, PHI->getIncomingBlock(i));
      }
    }
  }

  // Insert the fast version of the check just before the regular version.
  assert(M.count(StartPtr) && "The memory object and its size should be known");
  createFastCheck(Info, CI, M[StartPtr].first, M[StartPtr].second);
  return true;
}
Esempio n. 16
0
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
  StackProtector *SP = &getAnalysis<StackProtector>();

  bool StackGrowsDown =
    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;

  // Loop over all of the stack objects, assigning sequential addresses...
  MachineFrameInfo &MFI = Fn.getFrameInfo();

  // Start at the beginning of the local area.
  // The Offset is the distance from the stack top in the direction
  // of stack growth -- so it's always nonnegative.
  int LocalAreaOffset = TFI.getOffsetOfLocalArea();
  if (StackGrowsDown)
    LocalAreaOffset = -LocalAreaOffset;
  assert(LocalAreaOffset >= 0
         && "Local area offset should be in direction of stack growth");
  int64_t Offset = LocalAreaOffset;

  // Skew to be applied to alignment.
  unsigned Skew = TFI.getStackAlignmentSkew(Fn);

  // If there are fixed sized objects that are preallocated in the local area,
  // non-fixed objects can't be allocated right at the start of local area.
  // Adjust 'Offset' to point to the end of last fixed sized preallocated
  // object.
  for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
    int64_t FixedOff;
    if (StackGrowsDown) {
      // The maximum distance from the stack pointer is at lower address of
      // the object -- which is given by offset. For down growing stack
      // the offset is negative, so we negate the offset to get the distance.
      FixedOff = -MFI.getObjectOffset(i);
    } else {
      // The maximum distance from the start pointer is at the upper
      // address of the object.
      FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i);
    }
    if (FixedOff > Offset) Offset = FixedOff;
  }

  // First assign frame offsets to stack objects that are used to spill
  // callee saved registers.
  if (StackGrowsDown) {
    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
      // If the stack grows down, we need to add the size to find the lowest
      // address of the object.
      Offset += MFI.getObjectSize(i);

      unsigned Align = MFI.getObjectAlignment(i);
      // Adjust to alignment boundary
      Offset = alignTo(Offset, Align, Skew);

      DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
      MFI.setObjectOffset(i, -Offset);        // Set the computed offset
    }
  } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
    // Be careful about underflow in comparisons agains MinCSFrameIndex.
    for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
      if (MFI.isDeadObjectIndex(i))
        continue;

      unsigned Align = MFI.getObjectAlignment(i);
      // Adjust to alignment boundary
      Offset = alignTo(Offset, Align, Skew);

      DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
      MFI.setObjectOffset(i, Offset);
      Offset += MFI.getObjectSize(i);
    }
  }

  // FixedCSEnd is the stack offset to the end of the fixed and callee-save
  // stack area.
  int64_t FixedCSEnd = Offset;
  unsigned MaxAlign = MFI.getMaxAlignment();

  // Make sure the special register scavenging spill slot is closest to the
  // incoming stack pointer if a frame pointer is required and is closer
  // to the incoming rather than the final stack pointer.
  const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
  bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
                               TFI.isFPCloseToIncomingSP() &&
                               RegInfo->useFPForScavengingIndex(Fn) &&
                               !RegInfo->needsStackRealignment(Fn));
  if (RS && EarlyScavengingSlots) {
    SmallVector<int, 2> SFIs;
    RS->getScavengingFrameIndices(SFIs);
    for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
           IE = SFIs.end(); I != IE; ++I)
      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
  }

  // FIXME: Once this is working, then enable flag will change to a target
  // check for whether the frame is large enough to want to use virtual
  // frame index registers. Functions which don't want/need this optimization
  // will continue to use the existing code path.
  if (MFI.getUseLocalStackAllocationBlock()) {
    unsigned Align = MFI.getLocalFrameMaxAlign();

    // Adjust to alignment boundary.
    Offset = alignTo(Offset, Align, Skew);

    DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");

    // Resolve offsets for objects in the local block.
    for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) {
      std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i);
      int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
      DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
            FIOffset << "]\n");
      MFI.setObjectOffset(Entry.first, FIOffset);
    }
    // Allocate the local block
    Offset += MFI.getLocalFrameSize();

    MaxAlign = std::max(Align, MaxAlign);
  }

  // Retrieve the Exception Handler registration node.
  int EHRegNodeFrameIndex = INT_MAX;
  if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo())
    EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex;

  // Make sure that the stack protector comes before the local variables on the
  // stack.
  SmallSet<int, 16> ProtectedObjs;
  if (MFI.getStackProtectorIndex() >= 0) {
    StackObjSet LargeArrayObjs;
    StackObjSet SmallArrayObjs;
    StackObjSet AddrOfObjs;

    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown,
                      Offset, MaxAlign, Skew);

    // Assign large stack objects first.
    for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
      if (MFI.isObjectPreAllocated(i) &&
          MFI.getUseLocalStackAllocationBlock())
        continue;
      if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
        continue;
      if (RS && RS->isScavengingFrameIndex((int)i))
        continue;
      if (MFI.isDeadObjectIndex(i))
        continue;
      if (MFI.getStackProtectorIndex() == (int)i ||
          EHRegNodeFrameIndex == (int)i)
        continue;

      switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
      case StackProtector::SSPLK_None:
        continue;
      case StackProtector::SSPLK_SmallArray:
        SmallArrayObjs.insert(i);
        continue;
      case StackProtector::SSPLK_AddrOf:
        AddrOfObjs.insert(i);
        continue;
      case StackProtector::SSPLK_LargeArray:
        LargeArrayObjs.insert(i);
        continue;
      }
      llvm_unreachable("Unexpected SSPLayoutKind.");
    }

    AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign, Skew);
    AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign, Skew);
    AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign, Skew);
  }

  SmallVector<int, 8> ObjectsToAllocate;

  // Then prepare to assign frame offsets to stack objects that are not used to
  // spill callee saved registers.
  for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
    if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
      continue;
    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
      continue;
    if (RS && RS->isScavengingFrameIndex((int)i))
      continue;
    if (MFI.isDeadObjectIndex(i))
      continue;
    if (MFI.getStackProtectorIndex() == (int)i ||
        EHRegNodeFrameIndex == (int)i)
      continue;
    if (ProtectedObjs.count(i))
      continue;

    // Add the objects that we need to allocate to our working set.
    ObjectsToAllocate.push_back(i);
  }

  // Allocate the EH registration node first if one is present.
  if (EHRegNodeFrameIndex != INT_MAX)
    AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset,
                      MaxAlign, Skew);

  // Give the targets a chance to order the objects the way they like it.
  if (Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
      Fn.getTarget().Options.StackSymbolOrdering)
    TFI.orderFrameObjects(Fn, ObjectsToAllocate);

  // Keep track of which bytes in the fixed and callee-save range are used so we
  // can use the holes when allocating later stack objects.  Only do this if
  // stack protector isn't being used and the target requests it and we're
  // optimizing.
  BitVector StackBytesFree;
  if (!ObjectsToAllocate.empty() &&
      Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
      MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
    computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
                          FixedCSEnd, StackBytesFree);

  // Now walk the objects and actually assign base offsets to them.
  for (auto &Object : ObjectsToAllocate)
    if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
                           StackBytesFree))
      AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);

  // Make sure the special register scavenging spill slot is closest to the
  // stack pointer.
  if (RS && !EarlyScavengingSlots) {
    SmallVector<int, 2> SFIs;
    RS->getScavengingFrameIndices(SFIs);
    for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
           IE = SFIs.end(); I != IE; ++I)
      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
  }

  if (!TFI.targetHandlesStackFrameRounding()) {
    // If we have reserved argument space for call sites in the function
    // immediately on entry to the current function, count it as part of the
    // overall stack size.
    if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn))
      Offset += MFI.getMaxCallFrameSize();

    // Round up the size to a multiple of the alignment.  If the function has
    // any calls or alloca's, align to the target's StackAlignment value to
    // ensure that the callee's frame or the alloca data is suitably aligned;
    // otherwise, for leaf functions, align to the TransientStackAlignment
    // value.
    unsigned StackAlign;
    if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
        (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0))
      StackAlign = TFI.getStackAlignment();
    else
      StackAlign = TFI.getTransientStackAlignment();

    // If the frame pointer is eliminated, all frame offsets will be relative to
    // SP not FP. Align to MaxAlign so this works.
    StackAlign = std::max(StackAlign, MaxAlign);
    Offset = alignTo(Offset, StackAlign, Skew);
  }

  // Update frame info to pretend that this is part of the stack...
  int64_t StackSize = Offset - LocalAreaOffset;
  MFI.setStackSize(StackSize);
  NumBytesStackSpace += StackSize;
}
Esempio n. 17
0
MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
  // Splitting the critical edge to a landing pad block is non-trivial. Don't do
  // it in this generic function.
  if (Succ->isLandingPad())
    return nullptr;

  MachineFunction *MF = getParent();
  DebugLoc dl;  // FIXME: this is nowhere

  // Performance might be harmed on HW that implements branching using exec mask
  // where both sides of the branches are always executed.
  if (MF->getTarget().requiresStructuredCFG())
    return nullptr;

  // We may need to update this's terminator, but we can't do that if
  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
  SmallVector<MachineOperand, 4> Cond;
  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
    return nullptr;

  // Avoid bugpoint weirdness: A block may end with a conditional branch but
  // jumps to the same MBB is either case. We have duplicate CFG edges in that
  // case that we can't handle. Since this never happens in properly optimized
  // code, just skip those edges.
  if (TBB && TBB == FBB) {
    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
                 << getNumber() << '\n');
    return nullptr;
  }

  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
  MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
  DEBUG(dbgs() << "Splitting critical edge:"
        " BB#" << getNumber()
        << " -- BB#" << NMBB->getNumber()
        << " -- BB#" << Succ->getNumber() << '\n');

  LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
  SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
  if (LIS)
    LIS->insertMBBInMaps(NMBB);
  else if (Indexes)
    Indexes->insertMBBInMaps(NMBB);

  // On some targets like Mips, branches may kill virtual registers. Make sure
  // that LiveVariables is properly updated after updateTerminator replaces the
  // terminators.
  LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();

  // Collect a list of virtual registers killed by the terminators.
  SmallVector<unsigned, 4> KilledRegs;
  if (LV)
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = I;
      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0 ||
            !OI->isUse() || !OI->isKill() || OI->isUndef())
          continue;
        unsigned Reg = OI->getReg();
        if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
            LV->getVarInfo(Reg).removeKill(MI)) {
          KilledRegs.push_back(Reg);
          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
          OI->setIsKill(false);
        }
      }
    }

  SmallVector<unsigned, 4> UsedRegs;
  if (LIS) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = I;

      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0)
          continue;

        unsigned Reg = OI->getReg();
        if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
          UsedRegs.push_back(Reg);
      }
    }
  }

  ReplaceUsesOfBlockWith(Succ, NMBB);

  // If updateTerminator() removes instructions, we need to remove them from
  // SlotIndexes.
  SmallVector<MachineInstr*, 4> Terminators;
  if (Indexes) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      Terminators.push_back(I);
  }

  updateTerminator();

  if (Indexes) {
    SmallVector<MachineInstr*, 4> NewTerminators;
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      NewTerminators.push_back(I);

    for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
        E = Terminators.end(); I != E; ++I) {
      if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
          NewTerminators.end())
       Indexes->removeMachineInstrFromMaps(*I);
    }
  }

  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
  NMBB->addSuccessor(Succ);
  if (!NMBB->isLayoutSuccessor(Succ)) {
    Cond.clear();
    MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl);

    if (Indexes) {
      for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
           I != E; ++I) {
        // Some instructions may have been moved to NMBB by updateTerminator(),
        // so we first remove any instruction that already has an index.
        if (Indexes->hasIndex(I))
          Indexes->removeMachineInstrFromMaps(I);
        Indexes->insertMachineInstrInMaps(I);
      }
    }
  }

  // Fix PHI nodes in Succ so they refer to NMBB instead of this
  for (MachineBasicBlock::instr_iterator
         i = Succ->instr_begin(),e = Succ->instr_end();
       i != e && i->isPHI(); ++i)
    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
      if (i->getOperand(ni+1).getMBB() == this)
        i->getOperand(ni+1).setMBB(NMBB);

  // Inherit live-ins from the successor
  for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
         E = Succ->livein_end(); I != E; ++I)
    NMBB->addLiveIn(*I);

  // Update LiveVariables.
  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
  if (LV) {
    // Restore kills of virtual registers that were killed by the terminators.
    while (!KilledRegs.empty()) {
      unsigned Reg = KilledRegs.pop_back_val();
      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
          continue;
        if (TargetRegisterInfo::isVirtualRegister(Reg))
          LV->getVarInfo(Reg).Kills.push_back(I);
        DEBUG(dbgs() << "Restored terminator kill: " << *I);
        break;
      }
    }
    // Update relevant live-through information.
    LV->addNewBlock(NMBB, this, Succ);
  }

  if (LIS) {
    // After splitting the edge and updating SlotIndexes, live intervals may be
    // in one of two situations, depending on whether this block was the last in
    // the function. If the original block was the last in the function, all live
    // intervals will end prior to the beginning of the new split block. If the
    // original block was not at the end of the function, all live intervals will
    // extend to the end of the new split block.

    bool isLastMBB =
      std::next(MachineFunction::iterator(NMBB)) == getParent()->end();

    SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
    SlotIndex PrevIndex = StartIndex.getPrevSlot();
    SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);

    // Find the registers used from NMBB in PHIs in Succ.
    SmallSet<unsigned, 8> PHISrcRegs;
    for (MachineBasicBlock::instr_iterator
         I = Succ->instr_begin(), E = Succ->instr_end();
         I != E && I->isPHI(); ++I) {
      for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
        if (I->getOperand(ni+1).getMBB() == NMBB) {
          MachineOperand &MO = I->getOperand(ni);
          unsigned Reg = MO.getReg();
          PHISrcRegs.insert(Reg);
          if (MO.isUndef())
            continue;

          LiveInterval &LI = LIS->getInterval(Reg);
          VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
          assert(VNI && "PHI sources should be live out of their predecessors.");
          LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
        }
      }
    }

    MachineRegisterInfo *MRI = &getParent()->getRegInfo();
    for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
      unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
      if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
        continue;

      LiveInterval &LI = LIS->getInterval(Reg);
      if (!LI.liveAt(PrevIndex))
        continue;

      bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
      if (isLiveOut && isLastMBB) {
        VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
        assert(VNI && "LiveInterval should have VNInfo where it is live.");
        LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
      } else if (!isLiveOut && !isLastMBB) {
        LI.removeSegment(StartIndex, EndIndex);
      }
    }

    // Update all intervals for registers whose uses may have been modified by
    // updateTerminator().
    LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
  }

  if (MachineDominatorTree *MDT =
      P->getAnalysisIfAvailable<MachineDominatorTree>()) {
    // Update dominator information.
    MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);

    bool IsNewIDom = true;
    for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
         PI != E; ++PI) {
      MachineBasicBlock *PredBB = *PI;
      if (PredBB == NMBB)
        continue;
      if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
        IsNewIDom = false;
        break;
      }
    }

    // We know "this" dominates the newly created basic block.
    MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);

    // If all the other predecessors of "Succ" are dominated by "Succ" itself
    // then the new block is the new immediate dominator of "Succ". Otherwise,
    // the new block doesn't dominate anything.
    if (IsNewIDom)
      MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
  }

  if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NMBB joins loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == Succ &&
                 "Should not create irreducible loops!");
          if (MachineLoop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NMBB, MLI->getBase());
        }
      }
    }

  return NMBB;
}
Esempio n. 18
0
/// Walk the specified region of the CFG and hoist loop invariants out to the
/// preheader.
void MachineLICM::HoistRegionPostRA() {
  MachineBasicBlock *Preheader = getCurPreheader();
  if (!Preheader)
    return;

  unsigned NumRegs = TRI->getNumRegs();
  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.

  SmallVector<CandidateInfo, 32> Candidates;
  SmallSet<int, 32> StoredFIs;

  // Walk the entire region, count number of defs for each register, and
  // collect potential LICM candidates.
  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
    MachineBasicBlock *BB = Blocks[i];

    // If the header of the loop containing this basic block is a landing pad,
    // then don't try to hoist instructions out of this loop.
    const MachineLoop *ML = MLI->getLoopFor(BB);
    if (ML && ML->getHeader()->isEHPad()) continue;

    // Conservatively treat live-in's as an external def.
    // FIXME: That means a reload that're reused in successor block(s) will not
    // be LICM'ed.
    for (const auto &LI : BB->liveins()) {
      for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
        PhysRegDefs.set(*AI);
    }

    SpeculationState = SpeculateUnknown;
    for (MachineBasicBlock::iterator
           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
      MachineInstr *MI = &*MII;
      ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
    }
  }

  // Gather the registers read / clobbered by the terminator.
  BitVector TermRegs(NumRegs);
  MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
  if (TI != Preheader->end()) {
    for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = TI->getOperand(i);
      if (!MO.isReg())
        continue;
      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;
      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
        TermRegs.set(*AI);
    }
  }

  // Now evaluate whether the potential candidates qualify.
  // 1. Check if the candidate defined register is defined by another
  //    instruction in the loop.
  // 2. If the candidate is a load from stack slot (always true for now),
  //    check if the slot is stored anywhere in the loop.
  // 3. Make sure candidate def should not clobber
  //    registers read by the terminator. Similarly its def should not be
  //    clobbered by the terminator.
  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
    if (Candidates[i].FI != INT_MIN &&
        StoredFIs.count(Candidates[i].FI))
      continue;

    unsigned Def = Candidates[i].Def;
    if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
      bool Safe = true;
      MachineInstr *MI = Candidates[i].MI;
      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
        const MachineOperand &MO = MI->getOperand(j);
        if (!MO.isReg() || MO.isDef() || !MO.getReg())
          continue;
        unsigned Reg = MO.getReg();
        if (PhysRegDefs.test(Reg) ||
            PhysRegClobbers.test(Reg)) {
          // If it's using a non-loop-invariant register, then it's obviously
          // not safe to hoist.
          Safe = false;
          break;
        }
      }
      if (Safe)
        HoistPostRA(MI, Candidates[i].Def);
    }
  }
}
Esempio n. 19
0
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
                                                        Pass &P) {
  if (!canSplitCriticalEdge(Succ))
    return nullptr;

  MachineFunction *MF = getParent();
  DebugLoc DL;  // FIXME: this is nowhere

  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
  MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
  DEBUG(dbgs() << "Splitting critical edge:"
        " BB#" << getNumber()
        << " -- BB#" << NMBB->getNumber()
        << " -- BB#" << Succ->getNumber() << '\n');

  LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();
  SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>();
  if (LIS)
    LIS->insertMBBInMaps(NMBB);
  else if (Indexes)
    Indexes->insertMBBInMaps(NMBB);

  // On some targets like Mips, branches may kill virtual registers. Make sure
  // that LiveVariables is properly updated after updateTerminator replaces the
  // terminators.
  LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>();

  // Collect a list of virtual registers killed by the terminators.
  SmallVector<unsigned, 4> KilledRegs;
  if (LV)
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = &*I;
      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0 ||
            !OI->isUse() || !OI->isKill() || OI->isUndef())
          continue;
        unsigned Reg = OI->getReg();
        if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
            LV->getVarInfo(Reg).removeKill(*MI)) {
          KilledRegs.push_back(Reg);
          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
          OI->setIsKill(false);
        }
      }
    }

  SmallVector<unsigned, 4> UsedRegs;
  if (LIS) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = &*I;

      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0)
          continue;

        unsigned Reg = OI->getReg();
        if (!is_contained(UsedRegs, Reg))
          UsedRegs.push_back(Reg);
      }
    }
  }

  ReplaceUsesOfBlockWith(Succ, NMBB);

  // If updateTerminator() removes instructions, we need to remove them from
  // SlotIndexes.
  SmallVector<MachineInstr*, 4> Terminators;
  if (Indexes) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      Terminators.push_back(&*I);
  }

  updateTerminator();

  if (Indexes) {
    SmallVector<MachineInstr*, 4> NewTerminators;
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      NewTerminators.push_back(&*I);

    for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
        E = Terminators.end(); I != E; ++I) {
      if (!is_contained(NewTerminators, *I))
        Indexes->removeMachineInstrFromMaps(**I);
    }
  }

  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
  NMBB->addSuccessor(Succ);
  if (!NMBB->isLayoutSuccessor(Succ)) {
    SmallVector<MachineOperand, 4> Cond;
    const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
    TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);

    if (Indexes) {
      for (MachineInstr &MI : NMBB->instrs()) {
        // Some instructions may have been moved to NMBB by updateTerminator(),
        // so we first remove any instruction that already has an index.
        if (Indexes->hasIndex(MI))
          Indexes->removeMachineInstrFromMaps(MI);
        Indexes->insertMachineInstrInMaps(MI);
      }
    }
  }

  // Fix PHI nodes in Succ so they refer to NMBB instead of this
  for (MachineBasicBlock::instr_iterator
         i = Succ->instr_begin(),e = Succ->instr_end();
       i != e && i->isPHI(); ++i)
    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
      if (i->getOperand(ni+1).getMBB() == this)
        i->getOperand(ni+1).setMBB(NMBB);

  // Inherit live-ins from the successor
  for (const auto &LI : Succ->liveins())
    NMBB->addLiveIn(LI);

  // Update LiveVariables.
  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
  if (LV) {
    // Restore kills of virtual registers that were killed by the terminators.
    while (!KilledRegs.empty()) {
      unsigned Reg = KilledRegs.pop_back_val();
      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
          continue;
        if (TargetRegisterInfo::isVirtualRegister(Reg))
          LV->getVarInfo(Reg).Kills.push_back(&*I);
        DEBUG(dbgs() << "Restored terminator kill: " << *I);
        break;
      }
    }
    // Update relevant live-through information.
    LV->addNewBlock(NMBB, this, Succ);
  }

  if (LIS) {
    // After splitting the edge and updating SlotIndexes, live intervals may be
    // in one of two situations, depending on whether this block was the last in
    // the function. If the original block was the last in the function, all
    // live intervals will end prior to the beginning of the new split block. If
    // the original block was not at the end of the function, all live intervals
    // will extend to the end of the new split block.

    bool isLastMBB =
      std::next(MachineFunction::iterator(NMBB)) == getParent()->end();

    SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
    SlotIndex PrevIndex = StartIndex.getPrevSlot();
    SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);

    // Find the registers used from NMBB in PHIs in Succ.
    SmallSet<unsigned, 8> PHISrcRegs;
    for (MachineBasicBlock::instr_iterator
         I = Succ->instr_begin(), E = Succ->instr_end();
         I != E && I->isPHI(); ++I) {
      for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
        if (I->getOperand(ni+1).getMBB() == NMBB) {
          MachineOperand &MO = I->getOperand(ni);
          unsigned Reg = MO.getReg();
          PHISrcRegs.insert(Reg);
          if (MO.isUndef())
            continue;

          LiveInterval &LI = LIS->getInterval(Reg);
          VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
          assert(VNI &&
                 "PHI sources should be live out of their predecessors.");
          LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
        }
      }
    }

    MachineRegisterInfo *MRI = &getParent()->getRegInfo();
    for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
      unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
      if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
        continue;

      LiveInterval &LI = LIS->getInterval(Reg);
      if (!LI.liveAt(PrevIndex))
        continue;

      bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
      if (isLiveOut && isLastMBB) {
        VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
        assert(VNI && "LiveInterval should have VNInfo where it is live.");
        LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
      } else if (!isLiveOut && !isLastMBB) {
        LI.removeSegment(StartIndex, EndIndex);
      }
    }

    // Update all intervals for registers whose uses may have been modified by
    // updateTerminator().
    LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
  }

  if (MachineDominatorTree *MDT =
          P.getAnalysisIfAvailable<MachineDominatorTree>())
    MDT->recordSplitCriticalEdge(this, Succ, NMBB);

  if (MachineLoopInfo *MLI = P.getAnalysisIfAvailable<MachineLoopInfo>())
    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NMBB joins loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == Succ &&
                 "Should not create irreducible loops!");
          if (MachineLoop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NMBB, MLI->getBase());
        }
      }
    }

  return NMBB;
}
Esempio n. 20
0
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
                                  SmallSet<unsigned,8> &PhysRefs,
                                  SmallVectorImpl<unsigned> &PhysDefs,
                                  bool &NonLocal) const {
  // For now conservatively returns false if the common subexpression is
  // not in the same basic block as the given instruction. The only exception
  // is if the common subexpression is in the sole predecessor block.
  const MachineBasicBlock *MBB = MI->getParent();
  const MachineBasicBlock *CSMBB = CSMI->getParent();

  bool CrossMBB = false;
  if (CSMBB != MBB) {
    if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
      return false;

    for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
      if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
        // Avoid extending live range of physical registers if they are
        //allocatable or reserved.
        return false;
    }
    CrossMBB = true;
  }
  MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
  MachineBasicBlock::const_iterator E = MI;
  MachineBasicBlock::const_iterator EE = CSMBB->end();
  unsigned LookAheadLeft = LookAheadLimit;
  while (LookAheadLeft) {
    // Skip over dbg_value's.
    while (I != E && I != EE && I->isDebugValue())
      ++I;

    if (I == EE) {
      assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
      (void)CrossMBB;
      CrossMBB = false;
      NonLocal = true;
      I = MBB->begin();
      EE = MBB->end();
      continue;
    }

    if (I == E)
      return true;

    for (const MachineOperand &MO : I->operands()) {
      // RegMasks go on instructions like calls that clobber lots of physregs.
      // Don't attempt to CSE across such an instruction.
      if (MO.isRegMask())
        return false;
      if (!MO.isReg() || !MO.isDef())
        continue;
      unsigned MOReg = MO.getReg();
      if (TargetRegisterInfo::isVirtualRegister(MOReg))
        continue;
      if (PhysRefs.count(MOReg))
        return false;
    }

    --LookAheadLeft;
    ++I;
  }

  return false;
}
Esempio n. 21
0
void TMS320C64X::SchedulerBase::BuildSchedGraph(AliasAnalysis *AA) {
  // Most of this code is copied from ScheduleDAGInstrs::BuildSchedGraph. We
  // needed to make small changes; this version is supposed to build a
  // single graph for the whole block under consideration. We went for a
  // modified copy of that code since modifying it directly would break the
  // postpass scheduler.

  // We'll be allocating one SUnit for each instruction, plus one for
  // the region exit node.
  SUnits.reserve(BB->size());

  // We build scheduling units by walking a block's instruction list from bottom
  // to top.

  // Remember where a generic side-effecting instruction is as we procede.
  SUnit *BarrierChain = 0, *AliasChain = 0;

  // Memory references to specific known memory locations are tracked
  // so that they can be given more precise dependencies. We track
  // separately the known memory locations that may alias and those
  // that are known not to alias
  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;

  // Keep track of dangling debug references to registers.
  // GB: changed 3 lines
  // std::map<unsigned, std::pair<MachineInstr*, unsigned> >
  //   DanglingDebugValue(TRI->getNumRegs(),
  //   std::make_pair(static_cast<MachineInstr*>(0), 0));
  // GB: added 3 lines
  std::map<unsigned, std::pair<MachineInstr*, unsigned> > DanglingDebugValue;
  std::map<unsigned, std::vector<SUnit *> > Defs;
  std::map<unsigned, std::vector<SUnit *> > Uses;

  // Check to see if the scheduler cares about latencies.
  bool UnitLatencies = ForceUnitLatencies();

  // Ask the target if address-backscheduling is desirable, and if so how much.
  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();

  // Remove any stale debug info; sometimes BuildSchedGraph is called again
  // without emitting the info from the previous call.
  DbgValueVec.clear();

  // Model data dependencies between instructions being scheduled and the
  // ExitSU.
  // AJO: don't bother
  //AddSchedBarrierDeps();

  // Walk the list of instructions, from bottom moving up.
  // GB: added 1 line
  SUnit *PreviousBarrier = NULL;
  for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
       MII != MIE; --MII) {
    MachineInstr *MI = prior(MII);
    // DBG_VALUE does not have SUnit's built, so just remember these for later
    // reinsertion.
    if (MI->isDebugValue()) {
      if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() &&
          MI->getOperand(0).getReg())
        DanglingDebugValue[MI->getOperand(0).getReg()] =
             std::make_pair(MI, DbgValueVec.size());
      DbgValueVec.push_back(MI);
      continue;
    }
    const TargetInstrDesc &TID = MI->getDesc();
    // GB: changed 2 lines
    // assert(!TID.isTerminator() && !MI->isLabel() &&
    //        "Cannot schedule terminators or labels!");
    // GB: added 17 lines
    // For barrier instructions, build a barrier SUnit. That's a normal
    // SUnit, except that we insert extra dependences to make sure that
    // previously created SUnits will not be scheduled past this barrier.
    if (TID.isTerminator() || MI->isLabel() ||
        TII->isSchedulingBoundary(MI, BB, MF)) {
      SUnit *BarrierSU = NewSUnit(MI);
      BarrierSU->Latency = (UnitLatencies ? 1 : 0);
      SUnit *I = (PreviousBarrier ? PreviousBarrier : &SUnits.front());
      while (I != BarrierSU) {
        if (I->Preds.empty()) {
          I->addPred(SDep(BarrierSU, SDep::Order, /* latency = */ 0));
        }
        I++;
      }
      PreviousBarrier = BarrierSU;
      continue;
    }
    // Create the SUnit for this MI.
    SUnit *SU = NewSUnit(MI);
    SU->isCall = TID.isCall();
    SU->isCommutable = TID.isCommutable();

    // The Regs used by SU
    SmallSet<unsigned, 8> CurrentUses;

    // Assign the Latency field of SU using target-provided information.
    if (UnitLatencies)
      SU->Latency = 1;
    else
      ComputeLatency(SU);

    // Add register-based dependencies (data, anti, and output).
    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
      const MachineOperand &MO = MI->getOperand(j);
      if (!MO.isReg()) continue;
      unsigned Reg = MO.getReg();
      if (Reg == 0) continue;

      // GB: changed 1 lines
      // assert(TRI->isPhysicalRegister(Reg)&&"Virtual register encountered!");

      if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
        SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first);
        DbgValueVec[DanglingDebugValue[Reg].second] = 0;
        DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
      }

      std::vector<SUnit *> &UseList = Uses[Reg];
      std::vector<SUnit *> &DefList = Defs[Reg];
      // Optionally add output and anti dependencies. For anti
      // dependencies we use a latency of 0 because for a multi-issue
      // target we want to allow the defining instruction to issue
      // in the same cycle as the using instruction.
      // TODO: Using a latency of 1 here for output dependencies assumes
      //       there's no cost for reusing registers.
      SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
        SUnit *DefSU = DefList[i];
        if (DefSU == &ExitSU)
          continue;
        if (DefSU != SU &&
            (Kind != SDep::Output || !MO.isDead() ||
             !DefSU->getInstr()->registerDefIsDead(Reg)))
          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
      }
      // GB: added 1 lines
      if (TRI->isPhysicalRegister(Reg)) {
        // GB: changed 12 lines
        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
          std::vector<SUnit *> &DefList = Defs[*Alias];
          for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
            SUnit *DefSU = DefList[i];
            // if (DefSU == &ExitSU)
            //   continue;
            if (DefSU != SU &&
                (Kind != SDep::Output || !MO.isDead() ||
                 !DefSU->getInstr()->registerDefIsDead(*Alias)))
              DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
          }
        }
      // GB: added 1 lines
      }

      if (MO.isDef()) {
        // Add any data dependencies.
        unsigned DataLatency = SU->Latency;
        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
          SUnit *UseSU = UseList[i];
          if (UseSU == SU)
            continue;
          // GB: added 2 lines
          if (UseSU == &ExitSU)
            continue;
          unsigned LDataLatency = DataLatency;
          // Optionally add in a special extra latency for nodes that
          // feed addresses.
          // TODO: Do this for register aliases too.
          // TODO: Perhaps we should get rid of
          // SpecialAddressLatency and just move this into
          // adjustSchedDependency for the targets that care about it.
          if (SpecialAddressLatency != 0 && !UnitLatencies &&
              UseSU != &ExitSU) {
            MachineInstr *UseMI = UseSU->getInstr();
            const TargetInstrDesc &UseTID = UseMI->getDesc();
            int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
            assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
            if (RegUseIndex >= 0 &&
                (UseTID.mayLoad() || UseTID.mayStore()) &&
                (unsigned)RegUseIndex < UseTID.getNumOperands() &&
                UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
              LDataLatency += SpecialAddressLatency;
          }
          // Adjust the dependence latency using operand def/use
          // information (if any), and then allow the target to
          // perform its own adjustments.
          const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
          if (!UnitLatencies) {
            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
          }
          UseSU->addPred(dep);
        }
        // GB: added 1 lines
        if (TRI->isPhysicalRegister(Reg)) {
          // GB: changed 14 lines
          for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
            std::vector<SUnit *> &UseList = Uses[*Alias];
            for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
              SUnit *UseSU = UseList[i];
              if (UseSU == SU || UseSU == &ExitSU)
                continue;
              const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
              if (!UnitLatencies) {
                ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
                ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
              }
              UseSU->addPred(dep);
            }
          }
        // GB: added 1 lines
        }

        // If a def is going to wrap back around to the top of the loop,
        // backschedule it.
        if (!UnitLatencies && DefList.empty()) {
          LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
          if (I != LoopRegs.Deps.end()) {
            const MachineOperand *UseMO = I->second.first;
            unsigned Count = I->second.second;
            const MachineInstr *UseMI = UseMO->getParent();
            unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
            const TargetInstrDesc &UseTID = UseMI->getDesc();
            // TODO: If we knew the total depth of the region here, we could
            // handle the case where the whole loop is inside the region but
            // is large enough that the isScheduleHigh trick isn't needed.
            if (UseMOIdx < UseTID.getNumOperands()) {
              // Currently, we only support scheduling regions consisting of
              // single basic blocks. Check to see if the instruction is in
              // the same region by checking to see if it has the same parent.
              if (UseMI->getParent() != MI->getParent()) {
                unsigned Latency = SU->Latency;
                if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
                  Latency += SpecialAddressLatency;
                // This is a wild guess as to the portion of the latency which
                // will be overlapped by work done outside the current
                // scheduling region.
                Latency -= std::min(Latency, Count);
                // Add the artifical edge.
                // GB: changed 4 lines
                // ExitSU.addPred(SDep(SU, SDep::Order, Latency,
                //                     /*Reg=*/0, /*isNormalMemory=*/false,
                //                     /*isMustAlias=*/false,
                //                     /*isArtificial=*/true));
              } else if (SpecialAddressLatency > 0 &&
                         UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
                // The entire loop body is within the current scheduling region
                // and the latency of this operation is assumed to be greater
                // than the latency of the loop.
                // TODO: Recursively mark data-edge predecessors as
                //       isScheduleHigh too.
                SU->isScheduleHigh = true;
              }
            }
            LoopRegs.Deps.erase(I);
          }
        }

        UseList.clear();
        // AJO the uses have been cleared, but uses by the current SU need to be
        // restored (eg. calls may use some of the registers that are clobbered
        // by the callee (imp-def))
        if (CurrentUses.count(Reg))
          UseList.push_back(SU);

        if (!MO.isDead())
          DefList.clear();
        DefList.push_back(SU);
      } else {
        UseList.push_back(SU);
        CurrentUses.insert(Reg);
      }
    }

    // Add chain dependencies.
    // Chain dependencies used to enforce memory order should have
    // latency of 0 (except for true dependency of Store followed by
    // aliased Load... we estimate that with a single cycle of latency
    // assuming the hardware will bypass)
    // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
    // after stack slots are lowered to actual addresses.
    // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
    // produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
    unsigned TrueMemOrderLatency = 0;
    if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
        (MI->hasVolatileMemoryRef() &&
         (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
      // Be conservative with these and add dependencies on all memory
      // references, even those that are known to not alias.
      for (std::map<const Value *, SUnit *>::iterator I =
             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
      }
      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
      }
      NonAliasMemDefs.clear();
      NonAliasMemUses.clear();
      // Add SU to the barrier chain.
      if (BarrierChain)
        BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
      BarrierChain = SU;

      // fall-through
    new_alias_chain:
      // Chain all possibly aliasing memory references though SU.
      if (AliasChain)
        AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
      AliasChain = SU;
      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
        PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
           E = AliasMemDefs.end(); I != E; ++I) {
        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
      }
      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
      }
      PendingLoads.clear();
      AliasMemDefs.clear();
      AliasMemUses.clear();
    } else if (TID.mayStore()) {
      bool MayAlias = true;
      TrueMemOrderLatency = STORE_LOAD_LATENCY;
      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
        // A store to a specific PseudoSourceValue. Add precise dependencies.
        // Record the def in MemDefs, first adding a dep if there is
        // an existing def.
        std::map<const Value *, SUnit *>::iterator I =
          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
        std::map<const Value *, SUnit *>::iterator IE =
          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
        if (I != IE) {
          // XXX
          // XXX AJO: aliasing stores must not be issued in the same cycle (hack
          // XXX for TMS320C64X).
          // XXX
          I->second->addPred(SDep(SU, SDep::Order,
                                  /*Latency=*/TrueMemOrderLatency,
                                  /*Reg=*/0, /*isNormalMemory=*/true));
          I->second = SU;
        } else {
          if (MayAlias)
            AliasMemDefs[V] = SU;
          else
            NonAliasMemDefs[V] = SU;
        }
        // Handle the uses in MemUses, if there are any.
        std::map<const Value *, std::vector<SUnit *> >::iterator J =
          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
        if (J != JE) {
          for (unsigned i = 0, e = J->second.size(); i != e; ++i)
            J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
                                       /*Reg=*/0, /*isNormalMemory=*/true));
          J->second.clear();
        }
        if (MayAlias) {
          // Add dependencies from all the PendingLoads, i.e. loads
          // with no underlying object.
          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
            PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
          // Add dependence on alias chain, if needed.
          if (AliasChain)
            AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
        }
        // Add dependence on barrier chain, if needed.
        if (BarrierChain)
          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
      } else {
        // Treat all other stores conservatively.
        goto new_alias_chain;
      }

      // GB: changed 7 lines
      // if (!ExitSU.isPred(SU))
      //   // Push store's up a bit to avoid them getting in between cmp
      //   // and branches.
      //   ExitSU.addPred(SDep(SU, SDep::Order, 0,
      //                       /*Reg=*/0, /*isNormalMemory=*/false,
      //                       /*isMustAlias=*/false,
      //                       /*isArtificial=*/true));
    } else if (TID.mayLoad()) {
      bool MayAlias = true;
      TrueMemOrderLatency = 0;
      if (MI->isInvariantLoad(AA)) {
        // Invariant load, no chain dependencies needed!
      } else {
        if (const Value *V =
            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
          // A load from a specific PseudoSourceValue. Add precise dependencies.
          std::map<const Value *, SUnit *>::iterator I =
            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
          std::map<const Value *, SUnit *>::iterator IE =
            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
          if (I != IE)
            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
                                    /*isNormalMemory=*/true));
          if (MayAlias)
            AliasMemUses[V].push_back(SU);
          else
            NonAliasMemUses[V].push_back(SU);
        } else {
          // A load with no underlying object. Depend on all
          // potentially aliasing stores.
          for (std::map<const Value *, SUnit *>::iterator I =
                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));

          PendingLoads.push_back(SU);
          MayAlias = true;
        }

        // Add dependencies on alias and barrier chains, if needed.
        if (MayAlias && AliasChain)
          AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
        if (BarrierChain)
          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
      }
    }
    // GB: added 8 lines
    // This "&& SU->Succs.empty()" is fishy. It prevents us from adding
    // barrier edges that should really be present, since the number of
    // successors isn't really relevant to whether the barrier is reachable
    // from one of those successors! However, this still seems to work this
    // way, so don't touch it.
    if (PreviousBarrier != NULL && SU->Succs.empty()) {
      PreviousBarrier->addPred(SDep(SU, SDep::Order, SU->Latency));
    }
  }

  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
    Defs[i].clear();
    Uses[i].clear();
  }
  PendingLoads.clear();
}
Esempio n. 22
0
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  if (skipOptnoneFunction(*MF.getFunction()))
    return false;

  DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
  DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');

  if (DisablePeephole)
    return false;

  TM  = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;

  bool Changed = false;

  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    bool SeenMoveImm = false;
    SmallPtrSet<MachineInstr*, 8> LocalMIs;
    SmallSet<unsigned, 4> ImmDefRegs;
    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    SmallSet<unsigned, 16> FoldAsLoadDefCandidates;

    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
      // We may be erasing MI below, increment MII now.
      ++MII;
      LocalMIs.insert(MI);

      // Skip debug values. They should not affect this peephole optimization.
      if (MI->isDebugValue())
          continue;

      // If there exists an instruction which belongs to the following
      // categories, we will discard the load candidates.
      if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() ||
          MI->hasUnmodeledSideEffects()) {
        FoldAsLoadDefCandidates.clear();
        continue;
      }
      if (MI->mayStore() || MI->isCall())
        FoldAsLoadDefCandidates.clear();

      if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
          (MI->isSelect() && optimizeSelect(MI))) {
        // MI is deleted.
        LocalMIs.erase(MI);
        Changed = true;
        continue;
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
        SeenMoveImm = true;
      } else {
        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
        // optimizeExtInstr might have created new instructions after MI
        // and before the already incremented MII. Adjust MII so that the
        // next iteration sees the new instructions.
        MII = MI;
        ++MII;
        if (SeenMoveImm)
          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
      }

      // Check whether MI is a load candidate for folding into a later
      // instruction. If MI is not a candidate, check whether we can fold an
      // earlier load into MI.
      if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
          !FoldAsLoadDefCandidates.empty()) {
        const MCInstrDesc &MIDesc = MI->getDesc();
        for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
             ++i) {
          const MachineOperand &MOp = MI->getOperand(i);
          if (!MOp.isReg())
            continue;
          unsigned FoldAsLoadDefReg = MOp.getReg();
          if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
            // We need to fold load after optimizeCmpInstr, since
            // optimizeCmpInstr can enable folding by converting SUB to CMP.
            // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
            // we need it for markUsesInDebugValueAsUndef().
            unsigned FoldedReg = FoldAsLoadDefReg;
            MachineInstr *DefMI = nullptr;
            MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
                                                          FoldAsLoadDefReg,
                                                          DefMI);
            if (FoldMI) {
              // Update LocalMIs since we replaced MI with FoldMI and deleted
              // DefMI.
              DEBUG(dbgs() << "Replacing: " << *MI);
              DEBUG(dbgs() << "     With: " << *FoldMI);
              LocalMIs.erase(MI);
              LocalMIs.erase(DefMI);
              LocalMIs.insert(FoldMI);
              MI->eraseFromParent();
              DefMI->eraseFromParent();
              MRI->markUsesInDebugValueAsUndef(FoldedReg);
              FoldAsLoadDefCandidates.erase(FoldedReg);
              ++NumLoadFold;
              // MI is replaced with FoldMI.
              Changed = true;
              break;
            }
          }
        }
      }
    }
  }

  return Changed;
}