// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
                            SmallSet<unsigned, 32>& RegDefs,
                            SmallSet<unsigned, 32>& RegUses) {
  // If MI is a call or return, just examine the explicit non-variadic operands.
  MCInstrDesc MCID = MI->getDesc();
  unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
                                                MI->getNumOperands();

  // Add RA to RegDefs to prevent users of RA from going into delay slot.
  if (MI->isCall())
    RegDefs.insert(Mips::RA);

  for (unsigned i = 0; i != e; ++i) {
    const MachineOperand &MO = MI->getOperand(i);
    unsigned Reg;

    if (!MO.isReg() || !(Reg = MO.getReg()))
      continue;

    if (MO.isDef())
      RegDefs.insert(Reg);
    else if (MO.isUse())
      RegUses.insert(Reg);
  }
}
bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
                            bool &sawLoad, bool &sawStore,
                            SmallSet<unsigned, 32> &RegDefs,
                            SmallSet<unsigned, 32> &RegUses) {
  if (candidate->isImplicitDef() || candidate->isKill())
    return true;

  // Loads or stores cannot be moved past a store to the delay slot
  // and stores cannot be moved past a load.
  if (candidate->mayLoad()) {
    if (sawStore)
      return true;
    sawLoad = true;
  }

  if (candidate->mayStore()) {
    if (sawStore)
      return true;
    sawStore = true;
    if (sawLoad)
      return true;
  }

  assert((!candidate->isCall() && !candidate->isReturn()) &&
         "Cannot put calls or returns in delay slot.");

  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
    const MachineOperand &MO = candidate->getOperand(i);
    unsigned Reg;

    if (!MO.isReg() || !(Reg = MO.getReg()))
      continue; // skip

    if (MO.isDef()) {
      // check whether Reg is defined or used before delay slot.
      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
        return true;
    }
    if (MO.isUse()) {
      // check whether Reg is defined before delay slot.
      if (IsRegInSet(RegDefs, Reg))
        return true;
    }
  }
  return false;
}
/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
bool
TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
                                       bool IsSimple,
                                       MachineBasicBlock &TailBB) {
    // Only duplicate blocks that end with unconditional branches.
    if (TailBB.canFallThrough())
        return false;

    // Don't try to tail-duplicate single-block loops.
    if (TailBB.isSuccessor(&TailBB))
        return false;

    // Set the limit on the cost to duplicate. When optimizing for size,
    // duplicate only one, because one branch instruction can be eliminated to
    // compensate for the duplication.
    unsigned MaxDuplicateCount;
    if (TailDuplicateSize.getNumOccurrences() == 0 &&
            MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
        MaxDuplicateCount = 1;
    else
        MaxDuplicateCount = TailDuplicateSize;

    // If the target has hardware branch prediction that can handle indirect
    // branches, duplicating them can often make them predictable when there
    // are common paths through the code.  The limit needs to be high enough
    // to allow undoing the effects of tail merging and other optimizations
    // that rearrange the predecessors of the indirect branch.

    bool HasIndirectbr = false;
    if (!TailBB.empty())
        HasIndirectbr = TailBB.back().isIndirectBranch();

    if (HasIndirectbr && PreRegAlloc)
        MaxDuplicateCount = 20;

    // Check the instructions in the block to determine whether tail-duplication
    // is invalid or unlikely to be profitable.
    unsigned InstrCount = 0;
    for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
        // Non-duplicable things shouldn't be tail-duplicated.
        if (I->isNotDuplicable())
            return false;

        // Do not duplicate 'return' instructions if this is a pre-regalloc run.
        // A return may expand into a lot more instructions (e.g. reload of callee
        // saved registers) after PEI.
        if (PreRegAlloc && I->isReturn())
            return false;

        // Avoid duplicating calls before register allocation. Calls presents a
        // barrier to register allocation so duplicating them may end up increasing
        // spills.
        if (PreRegAlloc && I->isCall())
            return false;

        if (!I->isPHI() && !I->isDebugValue())
            InstrCount += 1;

        if (InstrCount > MaxDuplicateCount)
            return false;
    }

    if (HasIndirectbr && PreRegAlloc)
        return true;

    if (IsSimple)
        return true;

    if (!PreRegAlloc)
        return true;

    return canCompletelyDuplicateBB(TailBB);
}
bool PatmosDelaySlotKiller::killDelaySlots(MachineBasicBlock &MBB) {
  bool Changed = false;

  DEBUG( dbgs() << "Killing slots in BB#" << MBB.getNumber()
                << " (" << MBB.getFullName() << ")\n" );

  // consider the basic block from top to bottom
  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
    // Control-flow instructions ("proper" delay slots)
    if (I->hasDelaySlot()) {
      assert( ( I->isCall() || I->isReturn() || I->isBranch() )
              && "Unexpected instruction with delay slot.");

      MachineBasicBlock::instr_iterator MI = *I;
      if (I->isBundle()) { ++MI; }

      unsigned Opcode = MI->getOpcode();

      if (Opcode == Patmos::BR ||
          Opcode == Patmos::BRu ||
          Opcode == Patmos::BRR ||
          Opcode == Patmos::BRRu ||
          Opcode == Patmos::BRT ||
          Opcode == Patmos::BRTu ||
          Opcode == Patmos::BRCF ||
          Opcode == Patmos::BRCFu ||
          Opcode == Patmos::BRCFR ||
          Opcode == Patmos::BRCFRu ||
          Opcode == Patmos::BRCFT ||
          Opcode == Patmos::BRCFTu ||
          Opcode == Patmos::CALL ||
          Opcode == Patmos::CALLR ||
          Opcode == Patmos::RET ||
          Opcode == Patmos::XRET) {

        bool onlyNops = true;
        unsigned maxCount = TM.getSubtargetImpl()->getDelaySlotCycles(&*I);
        unsigned count = 0;
        for (MachineBasicBlock::iterator K = llvm::next(I), E = MBB.end();
             K != E && count < maxCount; ++K, ++count) {
          TII->skipPseudos(MBB, K);
          if (K->getOpcode() != Patmos::NOP) {
            onlyNops = false;
          }
        }
        if (onlyNops) {
          unsigned NewOpcode = 0;
          switch(Opcode) {
          case Patmos::BR:     NewOpcode = Patmos::BRND; break;
          case Patmos::BRu:    NewOpcode = Patmos::BRNDu; break;
          case Patmos::BRR:    NewOpcode = Patmos::BRRND; break;
          case Patmos::BRRu:   NewOpcode = Patmos::BRRNDu; break;
          case Patmos::BRT:    NewOpcode = Patmos::BRTND; break;
          case Patmos::BRTu:   NewOpcode = Patmos::BRTNDu; break;
          case Patmos::BRCF:   NewOpcode = Patmos::BRCFND; break;
          case Patmos::BRCFu:  NewOpcode = Patmos::BRCFNDu; break;
          case Patmos::BRCFR:  NewOpcode = Patmos::BRCFRND; break;
          case Patmos::BRCFRu: NewOpcode = Patmos::BRCFRNDu; break;
          case Patmos::BRCFT:  NewOpcode = Patmos::BRCFTND; break;
          case Patmos::BRCFTu: NewOpcode = Patmos::BRCFTNDu; break;
          case Patmos::CALL:   NewOpcode = Patmos::CALLND; break;
          case Patmos::CALLR:  NewOpcode = Patmos::CALLRND; break;
          case Patmos::RET:    NewOpcode = Patmos::RETND; break;
          case Patmos::XRET:   NewOpcode = Patmos::XRETND; break;
          }
          const MCInstrDesc &nonDelayed = TII->get(NewOpcode);
          MI->setDesc(nonDelayed);

          unsigned killCount = 0;
          MachineBasicBlock::iterator K = llvm::next(I);
          for (MachineBasicBlock::iterator E = MBB.end();
               K != E && killCount < count; ++K, ++killCount) {
            TII->skipPseudos(MBB, K);
            KilledSlots++;
          }
          MBB.erase(llvm::next(I), K);
        }
      }
      Changed = true; // pass result
    }
  }
  return Changed;
}
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const AArch64InstrInfo *TII =
      static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  DebugLoc DL = MBBI->getDebugLoc();
  unsigned RetOpcode = MBBI->getOpcode();

  int NumBytes = MFI->getStackSize();
  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  // Initial and residual are named for consitency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t ArgumentPopSize = 0;
  if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
    MachineOperand &StackAdjust = MBBI->getOperand(1);

    // For a tail-call in a callee-pops-arguments environment, some or all of
    // the stack may actually be in use for the call's arguments, this is
    // calculated during LowerCall and consumed here...
    ArgumentPopSize = StackAdjust.getImm();
  } else {
    // ... otherwise the amount to pop is *all* of the argument space,
    // conveniently stored in the MachineFunctionInfo by
    // LowerFormalArguments. This will, of course, be zero for the C calling
    // convention.
    ArgumentPopSize = AFI->getArgumentStackToRestore();
  }

  // The stack frame should be like below,
  //
  //      ----------------------                     ---
  //      |                    |                      |
  //      | BytesInStackArgArea|              CalleeArgStackSize
  //      | (NumReusableBytes) |                (of tail call)
  //      |                    |                     ---
  //      |                    |                      |
  //      ---------------------|        ---           |
  //      |                    |         |            |
  //      |   CalleeSavedReg   |         |            |
  //      | (NumRestores * 16) |         |            |
  //      |                    |         |            |
  //      ---------------------|         |         NumBytes
  //      |                    |     StackSize  (StackAdjustUp)
  //      |   LocalStackSize   |         |            |
  //      | (covering callee   |         |            |
  //      |       args)        |         |            |
  //      |                    |         |            |
  //      ----------------------        ---          ---
  //
  // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
  //             = StackSize + ArgumentPopSize
  //
  // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
  // it as the 2nd argument of AArch64ISD::TC_RETURN.
  NumBytes += ArgumentPopSize;

  unsigned NumRestores = 0;
  // Move past the restores of the callee-saved registers.
  MachineBasicBlock::iterator LastPopI = MBBI;
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
  if (LastPopI != MBB.begin()) {
    do {
      ++NumRestores;
      --LastPopI;
    } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
    if (!isCSRestore(LastPopI, CSRegs)) {
      ++LastPopI;
      --NumRestores;
    }
  }
  NumBytes -= NumRestores * 16;
  assert(NumBytes >= 0 && "Negative stack allocation size!?");

  if (!hasFP(MF)) {
    // If this was a redzone leaf function, we don't need to restore the
    // stack pointer.
    if (!canUseRedZone(MF))
      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
                      TII);
    return;
  }

  // Restore the original stack pointer.
  // FIXME: Rather than doing the math here, we should instead just use
  // non-post-indexed loads for the restores if we aren't actually going to
  // be able to save any instructions.
  if (NumBytes || MFI->hasVarSizedObjects())
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                    -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
}