bool MipsExpandPseudo::expandAtomicCmpSwapSubword(
    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
    MachineBasicBlock::iterator &NMBBI) {

  MachineFunction *MF = BB.getParent();

  const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
  DebugLoc DL = I->getDebugLoc();
  unsigned LL, SC;

  unsigned ZERO = Mips::ZERO;
  unsigned BNE = Mips::BNE;
  unsigned BEQ = Mips::BEQ;
  unsigned SEOp =
      I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_POSTRA ? Mips::SEB : Mips::SEH;

  if (STI->inMicroMipsMode()) {
      LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
      SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
      BNE = STI->hasMips32r6() ? Mips::BNEC_MMR6 : Mips::BNE_MM;
      BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
  } else {
    LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
                            : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
    SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
                            : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
  }

  unsigned Dest = I->getOperand(0).getReg();
  unsigned Ptr = I->getOperand(1).getReg();
  unsigned Mask = I->getOperand(2).getReg();
  unsigned ShiftCmpVal = I->getOperand(3).getReg();
  unsigned Mask2 = I->getOperand(4).getReg();
  unsigned ShiftNewVal = I->getOperand(5).getReg();
  unsigned ShiftAmnt = I->getOperand(6).getReg();
  unsigned Scratch = I->getOperand(7).getReg();
  unsigned Scratch2 = I->getOperand(8).getReg();

  // insert new blocks after the current block
  const BasicBlock *LLVM_BB = BB.getBasicBlock();
  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineFunction::iterator It = ++BB.getIterator();
  MF->insert(It, loop1MBB);
  MF->insert(It, loop2MBB);
  MF->insert(It, sinkMBB);
  MF->insert(It, exitMBB);

  // Transfer the remainder of BB and its successor edges to exitMBB.
  exitMBB->splice(exitMBB->begin(), &BB,
                  std::next(MachineBasicBlock::iterator(I)), BB.end());
  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);

  //  thisMBB:
  //    ...
  //    fallthrough --> loop1MBB
  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
  loop1MBB->addSuccessor(sinkMBB);
  loop1MBB->addSuccessor(loop2MBB);
  loop1MBB->normalizeSuccProbs();
  loop2MBB->addSuccessor(loop1MBB);
  loop2MBB->addSuccessor(sinkMBB);
  loop2MBB->normalizeSuccProbs();
  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());

  // loop1MBB:
  //   ll dest, 0(ptr)
  //   and Mask', dest, Mask
  //   bne Mask', ShiftCmpVal, exitMBB
  BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0);
  BuildMI(loop1MBB, DL, TII->get(Mips::AND), Scratch2)
      .addReg(Scratch)
      .addReg(Mask);
  BuildMI(loop1MBB, DL, TII->get(BNE))
    .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB);

  // loop2MBB:
  //   and dest, dest, mask2
  //   or dest, dest, ShiftNewVal
  //   sc dest, dest, 0(ptr)
  //   beq dest, $0, loop1MBB
  BuildMI(loop2MBB, DL, TII->get(Mips::AND), Scratch)
      .addReg(Scratch, RegState::Kill)
      .addReg(Mask2);
  BuildMI(loop2MBB, DL, TII->get(Mips::OR), Scratch)
      .addReg(Scratch, RegState::Kill)
      .addReg(ShiftNewVal);
  BuildMI(loop2MBB, DL, TII->get(SC), Scratch)
      .addReg(Scratch, RegState::Kill)
      .addReg(Ptr)
      .addImm(0);
  BuildMI(loop2MBB, DL, TII->get(BEQ))
      .addReg(Scratch, RegState::Kill)
      .addReg(ZERO)
      .addMBB(loop1MBB);

  //  sinkMBB:
  //    srl     srlres, Mask', shiftamt
  //    sign_extend dest,srlres
  BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest)
      .addReg(Scratch2)
      .addReg(ShiftAmnt);
  if (STI->hasMips32r2()) {
    BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest);
  } else {
    const unsigned ShiftImm =
        I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I16_POSTRA ? 16 : 24;
    BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest)
        .addReg(Dest, RegState::Kill)
        .addImm(ShiftImm);
    BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest)
        .addReg(Dest, RegState::Kill)
        .addImm(ShiftImm);
  }

  LivePhysRegs LiveRegs;
  computeAndAddLiveIns(LiveRegs, *loop1MBB);
  computeAndAddLiveIns(LiveRegs, *loop2MBB);
  computeAndAddLiveIns(LiveRegs, *sinkMBB);
  computeAndAddLiveIns(LiveRegs, *exitMBB);

  NMBBI = BB.end();
  I->eraseFromParent();
  return true;
}
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register.  This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
bool MipsRegisterInfo::
hasFP(const MachineFunction &MF) const {
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  return NoFramePointerElim || MFI->hasVarSizedObjects();
}
/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
/// portion of the loop contiguous with the header. This usually makes the loop
/// contiguous, provided that AnalyzeBranch can handle all the relevant
/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
/// for an example of this.
bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
                                                   MachineLoop *L) {
  bool Changed = false;
  MachineBasicBlock *TopMBB = L->getTopBlock();
  MachineBasicBlock *BotMBB = L->getBottomBlock();

  // Determine a position to move orphaned loop blocks to. If TopMBB is not
  // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
  // to the top of the loop to avoid loosing that fallthrough. Otherwise append
  // them to the bottom, even if it previously had a fallthrough, on the theory
  // that it's worth an extra branch to keep the loop contiguous.
  MachineFunction::iterator InsertPt =
    llvm::next(MachineFunction::iterator(BotMBB));
  bool InsertAtTop = false;
  if (TopMBB != MF.begin() &&
      !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
      HasFallthrough(BotMBB)) {
    InsertPt = TopMBB;
    InsertAtTop = true;
  }

  // Keep a record of which blocks are in the portion of the loop contiguous
  // with the loop header.
  SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
  for (MachineFunction::iterator I = TopMBB,
       E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
    ContiguousBlocks.insert(I);

  // Find non-contigous blocks and fix them.
  if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
    for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
         BI != BE; ++BI) {
      MachineBasicBlock *BB = *BI;

      // Verify that we can analyze all the loop entry edges before beginning
      // any changes which will require us to be able to analyze them.
      if (!HasAnalyzableTerminator(BB))
        continue;
      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
        continue;

      // If the layout predecessor is part of the loop, this block will be
      // processed along with it. This keeps them in their relative order.
      if (BB != MF.begin() &&
          L->contains(prior(MachineFunction::iterator(BB))))
        continue;

      // Check to see if this block is already contiguous with the main
      // portion of the loop.
      if (!ContiguousBlocks.insert(BB))
        continue;

      // Move the block.
      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
                   << " to be contiguous with loop.\n");
      Changed = true;

      // Process this block and all loop blocks contiguous with it, to keep
      // them in their relative order.
      MachineFunction::iterator Begin = BB;
      MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
      for (; End != MF.end(); ++End) {
        if (!L->contains(End)) break;
        if (!HasAnalyzableTerminator(End)) break;
        ContiguousBlocks.insert(End);
        ++NumIntraMoved;
      }

      // If we're inserting at the bottom of the loop, and the code we're
      // moving originally had fall-through successors, bring the sucessors
      // up with the loop blocks to preserve the fall-through edges.
      if (!InsertAtTop)
        for (; End != MF.end(); ++End) {
          if (L->contains(End)) break;
          if (!HasAnalyzableTerminator(End)) break;
          if (!HasFallthrough(prior(End))) break;
        }

      // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
      // we don't need them anymore at this point.
      Splice(MF, InsertPt, Begin, End);
    }

  return Changed;
}
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  const SIRegisterInfo &TRI = TII->getRegisterInfo();
  std::vector<unsigned> I1Defs;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
      if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
        const MachineOperand &Src = MI.getOperand(1);

        if (Src.isImm()) {
          if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
            MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
        }

        continue;
      }

      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
        continue;

      if (!canShrink(MI, TII, TRI, MRI)) {
        // Try commuting the instruction and see if that enables us to shrink
        // it.
        if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
            !canShrink(MI, TII, TRI, MRI))
          continue;
      }

      // getVOPe32 could be -1 here if we started with an instruction that had
      // a 32-bit encoding and then commuted it to an instruction that did not.
      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
        continue;

      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());

      if (TII->isVOPC(Op32)) {
        unsigned DstReg = MI.getOperand(0).getReg();
        if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
          // VOPC instructions can only write to the VCC register. We can't
          // force them to use VCC here, because this is only one register and
          // cannot deal with sequences which would require multiple copies of
          // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
          //
          // So, instead of forcing the instruction to write to VCC, we provide
          // a hint to the register allocator to use VCC and then we we will run
          // this pass again after RA and shrink it if it outputs to VCC.
          MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
          continue;
        }
        if (DstReg != AMDGPU::VCC)
          continue;
      }

      if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
        // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
        // instructions.
        const MachineOperand *Src2 =
            TII->getNamedOperand(MI, AMDGPU::OpName::src2);
        if (!Src2->isReg())
          continue;
        unsigned SReg = Src2->getReg();
        if (TargetRegisterInfo::isVirtualRegister(SReg)) {
          MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
          continue;
        }
        if (SReg != AMDGPU::VCC)
          continue;
      }

      // We can shrink this instruction
      DEBUG(dbgs() << "Shrinking " << MI);

      MachineInstrBuilder Inst32 =
          BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));

      // Add the dst operand if the 32-bit encoding also has an explicit $dst.
      // For VOPC instructions, this is replaced by an implicit def of vcc.
      int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::dst);
      if (Op32DstIdx != -1) {
        // dst
        Inst32.addOperand(MI.getOperand(0));
      } else {
        assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
               "Unexpected case");
      }


      Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));

      const MachineOperand *Src1 =
          TII->getNamedOperand(MI, AMDGPU::OpName::src1);
      if (Src1)
        Inst32.addOperand(*Src1);

      const MachineOperand *Src2 =
        TII->getNamedOperand(MI, AMDGPU::OpName::src2);
      if (Src2) {
        int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
        if (Op32Src2Idx != -1) {
          Inst32.addOperand(*Src2);
        } else {
          // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
          // replaced with an implicit read of vcc.
          assert(Src2->getReg() == AMDGPU::VCC &&
                 "Unexpected missing register operand");
          Inst32.addOperand(copyRegOperandAsImplicit(*Src2));
        }
      }

      ++NumInstructionsShrunk;
      MI.eraseFromParent();

      foldImmediates(*Inst32, TII, MRI);
      DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');


    }
  }
  return false;
}
示例#5
0
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register.  This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
bool MBlazeFrameInfo::hasFP(const MachineFunction &MF) const {
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
}
void PatmosFrameLowering::processFunctionBeforeCalleeSavedScan(
                                  MachineFunction& MF, RegScavenger* RS) const {

  const TargetInstrInfo *TII = TM.getInstrInfo();
  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
  MachineRegisterInfo &MRI = MF.getRegInfo();
  MachineFrameInfo &MFI = *MF.getFrameInfo();
  PatmosMachineFunctionInfo &PMFI = *MF.getInfo<PatmosMachineFunctionInfo>();

  // Insert instructions at the beginning of the entry block;
  // callee-saved-reg spills are inserted at front afterwards
  MachineBasicBlock &EntryMBB = MF.front();

  DebugLoc DL;

  // Do not emit anything for naked functions
  if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) {
    return;
  }

  if (hasFP(MF)) {
    // if framepointer enabled, set it to point to the stack pointer.
    // Set frame pointer: FP = SP
    AddDefaultPred(BuildMI(EntryMBB, EntryMBB.begin(), DL,
          TII->get(Patmos::MOV), Patmos::RFP)).addReg(Patmos::RSP);
    // Mark RFP as used
    MRI.setPhysRegUsed(Patmos::RFP);
  }

  // load the current function base if it needs to be passed to call sites
  if (MFI.hasCalls()) {
    // If we have calls, we need to spill the call link registers
    MRI.setPhysRegUsed(Patmos::SRB);
    MRI.setPhysRegUsed(Patmos::SRO);
  } else {
    MRI.setPhysRegUnused(Patmos::SRB);
    MRI.setPhysRegUnused(Patmos::SRO);
  }

  // mark all predicate registers as used, for single path support
  // S0 is saved/restored as whole anyway
  if (PatmosSinglePathInfo::isEnabled(MF)) {
    MRI.setPhysRegUsed(Patmos::S0);
    MRI.setPhysRegUsed(Patmos::R26);
  }

  // If we need to spill S0, try to find an unused scratch register that we can
  // use instead. This only works if we do not have calls that may clobber
  // the register though.
  // It also makes no sense if we single-path convert the function,
  // because the SP converter introduces spill slots anyway.
  if (MRI.isPhysRegUsed(Patmos::S0) && !MF.getFrameInfo()->hasCalls()
      && !PatmosSinglePathInfo::isEnabled(MF)) {
    unsigned SpillReg = 0;
    BitVector Reserved = MRI.getReservedRegs();
    BitVector CalleeSaved(TRI->getNumRegs());
    const uint16_t *saved = TRI->getCalleeSavedRegs(&MF);
    while (*saved) {
      CalleeSaved.set(*saved++);
    }
    for (TargetRegisterClass::iterator i = Patmos::RRegsRegClass.begin(),
         e = Patmos::RRegsRegClass.end(); i != e; ++i) {
      if (MRI.isPhysRegUsed(*i) || *i == Patmos::R9) continue;
      if (Reserved[*i] || CalleeSaved[*i]) continue;
      SpillReg = *i;
      break;
    }
    if (SpillReg) {
      // Remember the register for the prologe-emitter and mark as used
      PMFI.setS0SpillReg(SpillReg);
      MRI.setPhysRegUsed(SpillReg);
    }
  }

  if (TRI->requiresRegisterScavenging(MF)) {
    const TargetRegisterClass *RC = &Patmos::RRegsRegClass;
    int fi = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), false);
    RS->addScavengingFrameIndex(fi);
    PMFI.setRegScavengingFI(fi);
  }
}
bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
  return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
}
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  const SIRegisterInfo &TRI = TII->getRegisterInfo();
  std::vector<unsigned> I1Defs;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
      if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
        const MachineOperand &Src = MI.getOperand(1);

        if (Src.isImm()) {
          if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
            MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
        }

        continue;
      }

      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
        continue;

      if (!canShrink(MI, TII, TRI, MRI)) {
        // Try commuting the instruction and see if that enables us to shrink
        // it.
        if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
            !canShrink(MI, TII, TRI, MRI))
          continue;
      }

      // getVOPe32 could be -1 here if we started with an instruction that had
      // a 32-bit encoding and then commuted it to an instruction that did not.
      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
        continue;

      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());

      if (TII->isVOPC(Op32)) {
        unsigned DstReg = MI.getOperand(0).getReg();
        if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
          // VOPC instructions can only write to the VCC register.  We can't
          // force them to use VCC here, because the register allocator has
          // trouble with sequences like this, which cause the allocator to run
          // out of registers if vreg0 and vreg1 belong to the VCCReg register
          // class:
          // vreg0 = VOPC;
          // vreg1 = VOPC;
          // S_AND_B64 vreg0, vreg1
          //
          // So, instead of forcing the instruction to write to VCC, we provide
          // a hint to the register allocator to use VCC and then we we will run
          // this pass again after RA and shrink it if it outputs to VCC.
          MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
          continue;
        }
        if (DstReg != AMDGPU::VCC)
          continue;
      }

      if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
        // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
        // instructions.
        const MachineOperand *Src2 =
            TII->getNamedOperand(MI, AMDGPU::OpName::src2);
        if (!Src2->isReg())
          continue;
        unsigned SReg = Src2->getReg();
        if (TargetRegisterInfo::isVirtualRegister(SReg)) {
          MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
          continue;
        }
        if (SReg != AMDGPU::VCC)
          continue;
      }

      // We can shrink this instruction
      DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);

      MachineInstrBuilder Inst32 =
          BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));

      // dst
      Inst32.addOperand(MI.getOperand(0));

      Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));

      const MachineOperand *Src1 =
          TII->getNamedOperand(MI, AMDGPU::OpName::src1);
      if (Src1)
        Inst32.addOperand(*Src1);

      const MachineOperand *Src2 =
          TII->getNamedOperand(MI, AMDGPU::OpName::src2);
      if (Src2)
        Inst32.addOperand(*Src2);

      ++NumInstructionsShrunk;
      MI.eraseFromParent();

      foldImmediates(*Inst32, TII, MRI);
      DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');


    }
  }
示例#9
0
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo *MRI = &MF.getRegInfo();
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  const TargetMachine &TM = MF.getTarget();

  LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
                    << " -------------------- \n");
  LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");

  std::vector<uint32_t> RegMask;

  // Compute the size of the bit vector to represent all the registers.
  // The bit vector is broken into 32-bit chunks, thus takes the ceil of
  // the number of registers divided by 32 for the size.
  unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
  RegMask.resize(RegMaskSize, ~((uint32_t)0));

  const Function &F = MF.getFunction();

  PhysicalRegisterUsageInfo &PRUI = getAnalysis<PhysicalRegisterUsageInfo>();
  PRUI.setTargetMachine(TM);

  LLVM_DEBUG(dbgs() << "Clobbered Registers: ");

  BitVector SavedRegs;
  computeCalleeSavedRegs(SavedRegs, MF);

  const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
  auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
    RegMask[Reg / 32] &= ~(1u << Reg % 32);
  };
  // Scan all the physical registers. When a register is defined in the current
  // function set it and all the aliasing registers as defined in the regmask.
  for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
    // Don't count registers that are saved and restored.
    if (SavedRegs.test(PReg))
      continue;
    // If a register is defined by an instruction mark it as defined together
    // with all it's unsaved aliases.
    if (!MRI->def_empty(PReg)) {
      for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
        if (!SavedRegs.test(*AI))
          SetRegAsDefined(*AI);
      continue;
    }
    // If a register is in the UsedPhysRegsMask set then mark it as defined.
    // All clobbered aliases will also be in the set, so we can skip setting
    // as defined all the aliases here.
    if (UsedPhysRegsMask.test(PReg))
      SetRegAsDefined(PReg);
  }

  if (TargetFrameLowering::isSafeForNoCSROpt(F)) {
    ++NumCSROpt;
    LLVM_DEBUG(dbgs() << MF.getName()
                      << " function optimized for not having CSR.\n");
  }

  for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
    if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
      LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " ");

  LLVM_DEBUG(dbgs() << " \n----------------------------------------\n");

  PRUI.storeUpdateRegUsageInfo(F, RegMask);

  return false;
}
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
  MRI = &MF.getRegInfo();
  TII = MF.getTarget().getInstrInfo();
  DT = &getAnalysis<MachineDominatorTree>();
  LI = &getAnalysis<LiveIntervals>();

  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
         BBI != BBE && BBI->isPHI(); ++BBI) {
      unsigned DestReg = BBI->getOperand(0).getReg();
      addReg(DestReg);
      PHISrcDefs[I].push_back(BBI);

      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
        MachineOperand &SrcMO = BBI->getOperand(i);
        unsigned SrcReg = SrcMO.getReg();
        addReg(SrcReg);
        unionRegs(DestReg, SrcReg);

        MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
        if (DefMI)
          PHISrcDefs[DefMI->getParent()].push_back(DefMI);
      }
    }
  }

  // Perform a depth-first traversal of the dominator tree, splitting
  // interferences amongst PHI-congruence classes.
  DenseMap<unsigned, unsigned> CurrentDominatingParent;
  DenseMap<unsigned, unsigned> ImmediateDominatingParent;
  for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
    SplitInterferencesForBasicBlock(*DI->getBlock(),
                                    CurrentDominatingParent,
                                    ImmediateDominatingParent);
  }

  // Insert copies for all PHI source and destination registers.
  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
         BBI != BBE && BBI->isPHI(); ++BBI) {
      InsertCopiesForPHI(BBI, I);
    }
  }

  // FIXME: Preserve the equivalence classes during copy insertion and use
  // the preversed equivalence classes instead of recomputing them.
  RegNodeMap.clear();
  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
         BBI != BBE && BBI->isPHI(); ++BBI) {
      unsigned DestReg = BBI->getOperand(0).getReg();
      addReg(DestReg);

      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
        unsigned SrcReg = BBI->getOperand(i).getReg();
        addReg(SrcReg);
        unionRegs(DestReg, SrcReg);
      }
    }
  }

  DenseMap<unsigned, unsigned> RegRenamingMap;
  bool Changed = false;
  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
    MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    while (BBI != BBE && BBI->isPHI()) {
      MachineInstr *PHI = BBI;

      assert(PHI->getNumOperands() > 0);

      unsigned SrcReg = PHI->getOperand(1).getReg();
      unsigned SrcColor = getRegColor(SrcReg);
      unsigned NewReg = RegRenamingMap[SrcColor];
      if (!NewReg) {
        NewReg = SrcReg;
        RegRenamingMap[SrcColor] = SrcReg;
      }
      MergeLIsAndRename(SrcReg, NewReg);

      unsigned DestReg = PHI->getOperand(0).getReg();
      if (!InsertedDestCopies.count(DestReg))
        MergeLIsAndRename(DestReg, NewReg);

      for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
        unsigned SrcReg = PHI->getOperand(i).getReg();
        MergeLIsAndRename(SrcReg, NewReg);
      }

      ++BBI;
      LI->RemoveMachineInstrFromMaps(PHI);
      PHI->eraseFromParent();
      Changed = true;
    }
  }

  // Due to the insertion of copies to split live ranges, the live intervals are
  // guaranteed to not overlap, except in one case: an original PHI source and a
  // PHI destination copy. In this case, they have the same value and thus don't
  // truly intersect, so we merge them into the value live at that point.
  // FIXME: Is there some better way we can handle this?
  for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
       E = InsertedDestCopies.end(); I != E; ++I) {
    unsigned DestReg = I->first;
    unsigned DestColor = getRegColor(DestReg);
    unsigned NewReg = RegRenamingMap[DestColor];

    LiveInterval &DestLI = LI->getInterval(DestReg);
    LiveInterval &NewLI = LI->getInterval(NewReg);

    assert(DestLI.ranges.size() == 1
           && "PHI destination copy's live interval should be a single live "
               "range from the beginning of the BB to the copy instruction.");
    LiveRange *DestLR = DestLI.begin();
    VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
    if (!NewVNI) {
      NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
      MachineInstr *CopyInstr = I->second;
      CopyInstr->getOperand(1).setIsKill(true);
    }

    LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
    NewLI.addRange(NewLR);

    LI->removeInterval(DestReg);
    MRI->replaceRegWith(DestReg, NewReg);
  }

  // Adjust the live intervals of all PHI source registers to handle the case
  // where the PHIs in successor blocks were the only later uses of the source
  // register.
  for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
       E = InsertedSrcCopySet.end(); I != E; ++I) {
    MachineBasicBlock *MBB = I->first;
    unsigned SrcReg = I->second;
    if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
      SrcReg = RenamedRegister;

    LiveInterval &SrcLI = LI->getInterval(SrcReg);

    bool isLiveOut = false;
    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
         SE = MBB->succ_end(); SI != SE; ++SI) {
      if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
        isLiveOut = true;
        break;
      }
    }

    if (isLiveOut)
      continue;

    MachineOperand *LastUse = findLastUse(MBB, SrcReg);
    assert(LastUse);
    SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
    SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
    LastUse->setIsKill(true);
  }

  LI->renumber();

  Allocator.Reset();
  RegNodeMap.clear();
  PHISrcDefs.clear();
  InsertedSrcCopySet.clear();
  InsertedSrcCopyMap.clear();
  InsertedDestCopies.clear();

  return Changed;
}
示例#11
0
void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                               SelectionDAG *DAG) {
  const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();

  Fn = &fn;
  MF = &mf;
  RegInfo = &MF->getRegInfo();

  // Check whether the function can return without sret-demotion.
  SmallVector<ISD::OutputArg, 4> Outs;
  GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI);
  CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
                                       Fn->isVarArg(),
                                       Outs, Fn->getContext());

  // Initialize the mapping of values to registers.  This is only set up for
  // instruction values that are used outside of the block that defines
  // them.
  Function::const_iterator BB = Fn->begin(), EB = Fn->end();
  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
    if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
      // Don't fold inalloca allocas or other dynamic allocas into the initial
      // stack frame allocation, even if they are in the entry block.
      if (!AI->isStaticAlloca())
        continue;

      if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
        Type *Ty = AI->getAllocatedType();
        uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
        unsigned Align =
          std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
                   AI->getAlignment());

        TySize *= CUI->getZExtValue();   // Get total allocated size.
        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.

        StaticAllocaMap[AI] =
          MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
      }
    }

  for (; BB != EB; ++BB)
    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
         I != E; ++I) {
      // Look for dynamic allocas.
      if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
        if (!AI->isStaticAlloca()) {
          unsigned Align = std::max(
              (unsigned)TLI->getDataLayout()->getPrefTypeAlignment(
                AI->getAllocatedType()),
              AI->getAlignment());
          unsigned StackAlign =
              TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
          if (Align <= StackAlign)
            Align = 0;
          // Inform the Frame Information that we have variable-sized objects.
          MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI);
        }
      }

      // Look for inline asm that clobbers the SP register.
      if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
        ImmutableCallSite CS(I);
        if (isa<InlineAsm>(CS.getCalledValue())) {
          unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
          std::vector<TargetLowering::AsmOperandInfo> Ops =
            TLI->ParseConstraints(CS);
          for (size_t I = 0, E = Ops.size(); I != E; ++I) {
            TargetLowering::AsmOperandInfo &Op = Ops[I];
            if (Op.Type == InlineAsm::isClobber) {
              // Clobbers don't have SDValue operands, hence SDValue().
              TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
              std::pair<unsigned, const TargetRegisterClass*> PhysReg =
                TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
                                                  Op.ConstraintVT);
              if (PhysReg.first == SP)
                MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
            }
          }
        }
      }

      // Mark values used outside their block as exported, by allocating
      // a virtual register for them.
      if (isUsedOutsideOfDefiningBlock(I))
        if (!isa<AllocaInst>(I) ||
            !StaticAllocaMap.count(cast<AllocaInst>(I)))
          InitializeRegForValue(I);

      // Collect llvm.dbg.declare information. This is done now instead of
      // during the initial isel pass through the IR so that it is done
      // in a predictable order.
      if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
        MachineModuleInfo &MMI = MF->getMMI();
        DIVariable DIVar(DI->getVariable());
        assert((!DIVar || DIVar.isVariable()) &&
          "Variable in DbgDeclareInst should be either null or a DIVariable.");
        if (MMI.hasDebugInfo() &&
            DIVar &&
            !DI->getDebugLoc().isUnknown()) {
          // Don't handle byval struct arguments or VLAs, for example.
          // Non-byval arguments are handled here (they refer to the stack
          // temporary alloca at this point).
          const Value *Address = DI->getAddress();
          if (Address) {
            if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
              Address = BCI->getOperand(0);
            if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
              DenseMap<const AllocaInst *, int>::iterator SI =
                StaticAllocaMap.find(AI);
              if (SI != StaticAllocaMap.end()) { // Check for VLAs.
                int FI = SI->second;
                MMI.setVariableDbgInfo(DI->getVariable(),
                                       FI, DI->getDebugLoc());
              }
            }
          }
        }
      }
    }

  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
  // also creates the initial PHI MachineInstrs, though none of the input
  // operands are populated.
  for (BB = Fn->begin(); BB != EB; ++BB) {
    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
    MBBMap[BB] = MBB;
    MF->push_back(MBB);

    // Transfer the address-taken flag. This is necessary because there could
    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
    // the first one should be marked.
    if (BB->hasAddressTaken())
      MBB->setHasAddressTaken();

    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
    // appropriate.
    for (BasicBlock::const_iterator I = BB->begin();
         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
      if (PN->use_empty()) continue;

      // Skip empty types
      if (PN->getType()->isEmptyTy())
        continue;

      DebugLoc DL = PN->getDebugLoc();
      unsigned PHIReg = ValueMap[PN];
      assert(PHIReg && "PHI node does not have an assigned virtual register!");

      SmallVector<EVT, 4> ValueVTs;
      ComputeValueVTs(*TLI, PN->getType(), ValueVTs);
      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
        EVT VT = ValueVTs[vti];
        unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
        const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
        for (unsigned i = 0; i != NumRegisters; ++i)
          BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
        PHIReg += NumRegisters;
      }
    }
  }

  // Mark landing pad blocks.
  for (BB = Fn->begin(); BB != EB; ++BB)
    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
      MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
}
示例#12
0
MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
  // Splitting the critical edge to a landing pad block is non-trivial. Don't do
  // it in this generic function.
  if (Succ->isEHPad())
    return nullptr;

  MachineFunction *MF = getParent();
  DebugLoc DL;  // FIXME: this is nowhere

  // Performance might be harmed on HW that implements branching using exec mask
  // where both sides of the branches are always executed.
  if (MF->getTarget().requiresStructuredCFG())
    return nullptr;

  // We may need to update this's terminator, but we can't do that if
  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
  SmallVector<MachineOperand, 4> Cond;
  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
    return nullptr;

  // Avoid bugpoint weirdness: A block may end with a conditional branch but
  // jumps to the same MBB is either case. We have duplicate CFG edges in that
  // case that we can't handle. Since this never happens in properly optimized
  // code, just skip those edges.
  if (TBB && TBB == FBB) {
    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
                 << getNumber() << '\n');
    return nullptr;
  }

  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
  MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
  DEBUG(dbgs() << "Splitting critical edge:"
        " BB#" << getNumber()
        << " -- BB#" << NMBB->getNumber()
        << " -- BB#" << Succ->getNumber() << '\n');

  LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
  SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
  if (LIS)
    LIS->insertMBBInMaps(NMBB);
  else if (Indexes)
    Indexes->insertMBBInMaps(NMBB);

  // On some targets like Mips, branches may kill virtual registers. Make sure
  // that LiveVariables is properly updated after updateTerminator replaces the
  // terminators.
  LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();

  // Collect a list of virtual registers killed by the terminators.
  SmallVector<unsigned, 4> KilledRegs;
  if (LV)
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = &*I;
      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0 ||
            !OI->isUse() || !OI->isKill() || OI->isUndef())
          continue;
        unsigned Reg = OI->getReg();
        if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
            LV->getVarInfo(Reg).removeKill(MI)) {
          KilledRegs.push_back(Reg);
          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
          OI->setIsKill(false);
        }
      }
    }

  SmallVector<unsigned, 4> UsedRegs;
  if (LIS) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = &*I;

      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0)
          continue;

        unsigned Reg = OI->getReg();
        if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
          UsedRegs.push_back(Reg);
      }
    }
  }

  ReplaceUsesOfBlockWith(Succ, NMBB);

  // If updateTerminator() removes instructions, we need to remove them from
  // SlotIndexes.
  SmallVector<MachineInstr*, 4> Terminators;
  if (Indexes) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      Terminators.push_back(&*I);
  }

  updateTerminator();

  if (Indexes) {
    SmallVector<MachineInstr*, 4> NewTerminators;
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      NewTerminators.push_back(&*I);

    for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
        E = Terminators.end(); I != E; ++I) {
      if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
          NewTerminators.end())
       Indexes->removeMachineInstrFromMaps(*I);
    }
  }

  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
  NMBB->addSuccessor(Succ);
  if (!NMBB->isLayoutSuccessor(Succ)) {
    Cond.clear();
    TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);

    if (Indexes) {
      for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
           I != E; ++I) {
        // Some instructions may have been moved to NMBB by updateTerminator(),
        // so we first remove any instruction that already has an index.
        if (Indexes->hasIndex(&*I))
          Indexes->removeMachineInstrFromMaps(&*I);
        Indexes->insertMachineInstrInMaps(&*I);
      }
    }
  }

  // Fix PHI nodes in Succ so they refer to NMBB instead of this
  for (MachineBasicBlock::instr_iterator
         i = Succ->instr_begin(),e = Succ->instr_end();
       i != e && i->isPHI(); ++i)
    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
      if (i->getOperand(ni+1).getMBB() == this)
        i->getOperand(ni+1).setMBB(NMBB);

  // Inherit live-ins from the successor
  for (const auto &LI : Succ->liveins())
    NMBB->addLiveIn(LI);

  // Update LiveVariables.
  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
  if (LV) {
    // Restore kills of virtual registers that were killed by the terminators.
    while (!KilledRegs.empty()) {
      unsigned Reg = KilledRegs.pop_back_val();
      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
          continue;
        if (TargetRegisterInfo::isVirtualRegister(Reg))
          LV->getVarInfo(Reg).Kills.push_back(&*I);
        DEBUG(dbgs() << "Restored terminator kill: " << *I);
        break;
      }
    }
    // Update relevant live-through information.
    LV->addNewBlock(NMBB, this, Succ);
  }

  if (LIS) {
    // After splitting the edge and updating SlotIndexes, live intervals may be
    // in one of two situations, depending on whether this block was the last in
    // the function. If the original block was the last in the function, all
    // live intervals will end prior to the beginning of the new split block. If
    // the original block was not at the end of the function, all live intervals
    // will extend to the end of the new split block.

    bool isLastMBB =
      std::next(MachineFunction::iterator(NMBB)) == getParent()->end();

    SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
    SlotIndex PrevIndex = StartIndex.getPrevSlot();
    SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);

    // Find the registers used from NMBB in PHIs in Succ.
    SmallSet<unsigned, 8> PHISrcRegs;
    for (MachineBasicBlock::instr_iterator
         I = Succ->instr_begin(), E = Succ->instr_end();
         I != E && I->isPHI(); ++I) {
      for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
        if (I->getOperand(ni+1).getMBB() == NMBB) {
          MachineOperand &MO = I->getOperand(ni);
          unsigned Reg = MO.getReg();
          PHISrcRegs.insert(Reg);
          if (MO.isUndef())
            continue;

          LiveInterval &LI = LIS->getInterval(Reg);
          VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
          assert(VNI &&
                 "PHI sources should be live out of their predecessors.");
          LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
        }
      }
    }

    MachineRegisterInfo *MRI = &getParent()->getRegInfo();
    for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
      unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
      if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
        continue;

      LiveInterval &LI = LIS->getInterval(Reg);
      if (!LI.liveAt(PrevIndex))
        continue;

      bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
      if (isLiveOut && isLastMBB) {
        VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
        assert(VNI && "LiveInterval should have VNInfo where it is live.");
        LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
      } else if (!isLiveOut && !isLastMBB) {
        LI.removeSegment(StartIndex, EndIndex);
      }
    }

    // Update all intervals for registers whose uses may have been modified by
    // updateTerminator().
    LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
  }

  if (MachineDominatorTree *MDT =
      P->getAnalysisIfAvailable<MachineDominatorTree>())
    MDT->recordSplitCriticalEdge(this, Succ, NMBB);

  if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NMBB joins loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == Succ &&
                 "Should not create irreducible loops!");
          if (MachineLoop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NMBB, MLI->getBase());
        }
      }
    }

  return NMBB;
}
示例#13
0
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
  Instructions.clear();
  Blocks.clear();
  LiveMaskQueries.clear();
  LowerToCopyInstrs.clear();
  CallingConv = MF.getFunction().getCallingConv();

  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();

  TII = ST.getInstrInfo();
  TRI = &TII->getRegisterInfo();
  MRI = &MF.getRegInfo();
  LIS = &getAnalysis<LiveIntervals>();

  char GlobalFlags = analyzeFunction(MF);
  unsigned LiveMaskReg = 0;
  if (!(GlobalFlags & StateWQM)) {
    lowerLiveMaskQueries(AMDGPU::EXEC);
    if (!(GlobalFlags & StateWWM))
      return !LiveMaskQueries.empty();
  } else {
    // Store a copy of the original live mask when required
    MachineBasicBlock &Entry = MF.front();
    MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();

    if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
      LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
      MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(),
                                 TII->get(AMDGPU::COPY), LiveMaskReg)
                             .addReg(AMDGPU::EXEC);
      LIS->InsertMachineInstrInMaps(*MI);
    }

    lowerLiveMaskQueries(LiveMaskReg);

    if (GlobalFlags == StateWQM) {
      // For a shader that needs only WQM, we can just set it once.
      BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
              AMDGPU::EXEC)
          .addReg(AMDGPU::EXEC);

      lowerCopyInstrs();
      // EntryMI may become invalid here
      return true;
    }
  }

  LLVM_DEBUG(printInfo());

  lowerCopyInstrs();

  // Handle the general case
  for (auto BII : Blocks)
    processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());

  // Physical registers like SCC aren't tracked by default anyway, so just
  // removing the ranges we computed is the simplest option for maintaining
  // the analysis results.
  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));

  return true;
}
示例#14
0
// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
                                       std::vector<WorkItem> &Worklist) {
  char GlobalFlags = 0;
  bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs");
  SmallVector<MachineInstr *, 4> SetInactiveInstrs;

  // We need to visit the basic blocks in reverse post-order so that we visit
  // defs before uses, in particular so that we don't accidentally mark an
  // instruction as needing e.g. WQM before visiting it and realizing it needs
  // WQM disabled.
  ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
  for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {
    MachineBasicBlock &MBB = **BI;
    BlockInfo &BBI = Blocks[&MBB];

    for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
      MachineInstr &MI = *II;
      InstrInfo &III = Instructions[&MI];
      unsigned Opcode = MI.getOpcode();
      char Flags = 0;

      if (TII->isWQM(Opcode)) {
        // Sampling instructions don't need to produce results for all pixels
        // in a quad, they just require all inputs of a quad to have been
        // computed for derivatives.
        markInstructionUses(MI, StateWQM, Worklist);
        GlobalFlags |= StateWQM;
        continue;
      } else if (Opcode == AMDGPU::WQM) {
        // The WQM intrinsic requires its output to have all the helper lanes
        // correct, so we need it to be in WQM.
        Flags = StateWQM;
        LowerToCopyInstrs.push_back(&MI);
      } else if (Opcode == AMDGPU::WWM) {
        // The WWM intrinsic doesn't make the same guarantee, and plus it needs
        // to be executed in WQM or Exact so that its copy doesn't clobber
        // inactive lanes.
        markInstructionUses(MI, StateWWM, Worklist);
        GlobalFlags |= StateWWM;
        LowerToCopyInstrs.push_back(&MI);
        continue;
      } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
                 Opcode == AMDGPU::V_SET_INACTIVE_B64) {
        III.Disabled = StateWWM;
        MachineOperand &Inactive = MI.getOperand(2);
        if (Inactive.isReg()) {
          if (Inactive.isUndef()) {
            LowerToCopyInstrs.push_back(&MI);
          } else {
            unsigned Reg = Inactive.getReg();
            if (TargetRegisterInfo::isVirtualRegister(Reg)) {
              for (MachineInstr &DefMI : MRI->def_instructions(Reg))
                markInstruction(DefMI, StateWWM, Worklist);
            }
          }
        }
        SetInactiveInstrs.push_back(&MI);
        continue;
      } else if (TII->isDisableWQM(MI)) {
        BBI.Needs |= StateExact;
        if (!(BBI.InNeeds & StateExact)) {
          BBI.InNeeds |= StateExact;
          Worklist.push_back(&MBB);
        }
        GlobalFlags |= StateExact;
        III.Disabled = StateWQM | StateWWM;
        continue;
      } else {
        if (Opcode == AMDGPU::SI_PS_LIVE) {
          LiveMaskQueries.push_back(&MI);
        } else if (WQMOutputs) {
          // The function is in machine SSA form, which means that physical
          // VGPRs correspond to shader inputs and outputs. Inputs are
          // only used, outputs are only defined.
          for (const MachineOperand &MO : MI.defs()) {
            if (!MO.isReg())
              continue;

            unsigned Reg = MO.getReg();

            if (!TRI->isVirtualRegister(Reg) &&
                TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
              Flags = StateWQM;
              break;
            }
          }
        }

        if (!Flags)
          continue;
      }

      markInstruction(MI, Flags, Worklist);
      GlobalFlags |= Flags;
    }
  }

  // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is
  // ever used anywhere in the function. This implements the corresponding
  // semantics of @llvm.amdgcn.set.inactive.
  if (GlobalFlags & StateWQM) {
    for (MachineInstr *MI : SetInactiveInstrs)
      markInstruction(*MI, StateWQM, Worklist);
  }

  return GlobalFlags;
}
MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
                                 MipsAsmPrinter &asmprinter)
  : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  BitVector Reserved(getNumRegs());
  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();

  // Set the stack-pointer register and its aliases as reserved.
  for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
       ++I)
    Reserved.set(*I);

  // Set the instruction pointer register and its aliases as reserved.
  for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid();
       ++I)
    Reserved.set(*I);

  // Set the frame-pointer register and its aliases as reserved if needed.
  if (TFI->hasFP(MF)) {
    for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid();
         ++I)
      Reserved.set(*I);
  }

  // Set the base-pointer register and its aliases as reserved if needed.
  if (hasBasePointer(MF)) {
    CallingConv::ID CC = MF.getFunction()->getCallingConv();
    const uint32_t *RegMask = getCallPreservedMask(MF, CC);
    if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
      report_fatal_error(
        "Stack realignment in presence of dynamic allocas is not supported with"
        "this calling convention.");

    unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), MVT::i64,
                                              false);
    for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
         I.isValid(); ++I)
      Reserved.set(*I);
  }

  // Mark the segment registers as reserved.
  Reserved.set(X86::CS);
  Reserved.set(X86::SS);
  Reserved.set(X86::DS);
  Reserved.set(X86::ES);
  Reserved.set(X86::FS);
  Reserved.set(X86::GS);

  // Mark the floating point stack registers as reserved.
  for (unsigned n = 0; n != 8; ++n)
    Reserved.set(X86::ST0 + n);

  // Reserve the registers that only exist in 64-bit mode.
  if (!Is64Bit) {
    // These 8-bit registers are part of the x86-64 extension even though their
    // super-registers are old 32-bits.
    Reserved.set(X86::SIL);
    Reserved.set(X86::DIL);
    Reserved.set(X86::BPL);
    Reserved.set(X86::SPL);

    for (unsigned n = 0; n != 8; ++n) {
      // R8, R9, ...
      for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
        Reserved.set(*AI);

      // XMM8, XMM9, ...
      for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
        Reserved.set(*AI);
    }
  }
  if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
    for (unsigned n = 16; n != 32; ++n) {
      for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
        Reserved.set(*AI);
    }
  }

  // @LOCALMOD-START
  const X86Subtarget& Subtarget = MF.getSubtarget<X86Subtarget>();
  const bool RestrictR15 = FlagRestrictR15;
  assert(FlagUseZeroBasedSandbox || RestrictR15);
  if (Subtarget.isTargetNaCl64()) {
    if (RestrictR15) {
      Reserved.set(X86::R15);
      Reserved.set(X86::R15D);
      Reserved.set(X86::R15W);
      Reserved.set(X86::R15B);
    }
    Reserved.set(X86::RBP);
    Reserved.set(X86::EBP);
    Reserved.set(X86::BP);
    Reserved.set(X86::BPL);
    const bool RestrictR11 = FlagHideSandboxBase && !FlagUseZeroBasedSandbox;
    if (RestrictR11) {
      // Restrict r11 so that it can be used for indirect jump
      // sequences that don't leak the sandbox base address onto the
      // stack.
      Reserved.set(X86::R11);
      Reserved.set(X86::R11D);
      Reserved.set(X86::R11W);
      Reserved.set(X86::R11B);
    }
  }
  // @LOCALMOD-END

  return Reserved;
}
unsigned PatmosFrameLowering::assignFrameObjects(MachineFunction &MF,
                                                 bool UseStackCache) const
{
  MachineFrameInfo &MFI = *MF.getFrameInfo();
  PatmosMachineFunctionInfo &PMFI = *MF.getInfo<PatmosMachineFunctionInfo>();
  unsigned maxFrameSize = MFI.getMaxCallFrameSize();

  // defaults to false (all objects are assigned to shadow stack)
  BitVector SCFIs(MFI.getObjectIndexEnd());

  if (UseStackCache) {
    assignFIsToStackCache(MF, SCFIs);
  }

  // assign new offsets to FIs

  // next stack slot in stack cache
  unsigned int SCOffset = 0;
  // next stack slot in shadow stack
  // Also reserve space for the call frame if we do not use a frame pointer.
  // This must be in sync with PatmosRegisterInfo::eliminateCallFramePseudoInstr
  unsigned int SSOffset = (hasFP(MF) ? 0 : maxFrameSize);

  DEBUG(dbgs() << "PatmosSC: " << MF.getFunction()->getName() << "\n");
  DEBUG(MFI.print(MF, dbgs()));
  for(unsigned FI = 0, FIe = MFI.getObjectIndexEnd(); FI != FIe; FI++) {
    if (MFI.isDeadObjectIndex(FI))
      continue;

    unsigned FIalignment = MFI.getObjectAlignment(FI);
    int64_t FIsize = MFI.getObjectSize(FI);

    if (FIsize > INT_MAX) {
      report_fatal_error("Frame objects with size > INT_MAX not supported.");
    }

    // be sure to catch some special stack objects not expected for Patmos
    assert(!MFI.isFixedObjectIndex(FI) && !MFI.isObjectPreAllocated(FI));

    // assigned to stack cache or shadow stack?
    if (SCFIs[FI]) {
      // alignment
      unsigned int next_SCOffset = align(SCOffset, FIalignment);

      // check if the FI still fits into the SC
      if (align(next_SCOffset + FIsize, getEffectiveStackCacheBlockSize()) <=
          getEffectiveStackCacheSize()) {
        DEBUG(dbgs() << "PatmosSC: FI: " << FI << " on SC: " << next_SCOffset
                    << "(" << MFI.getObjectOffset(FI) << ")\n");

        // reassign stack offset
        MFI.setObjectOffset(FI, next_SCOffset);

        // reserve space on the stack cache
        SCOffset = next_SCOffset + FIsize;

        // the FI is assigned to the SC, process next FI
        continue;
      }
      else {
        // the FI did not fit in the SC -- fall-through and put it on the 
        // shadow stack
        SCFIs[FI] = false;
        FIsNotFitSC++;
      }
    }

    // assign the FI to the shadow stack
    {
      assert(!SCFIs[FI]);

      // alignment
      SSOffset = align(SSOffset, FIalignment);

      DEBUG(dbgs() << "PatmosSC: FI: " << FI << " on SS: " << SSOffset
                   << "(" << MFI.getObjectOffset(FI) << ")\n");

      // reassign stack offset
      MFI.setObjectOffset(FI, SSOffset);

      // reserve space on the shadow stack
      SSOffset += FIsize;
    }
  }

  // align stack frame on stack cache
  unsigned stackCacheSize = align(SCOffset, getEffectiveStackCacheBlockSize());

  assert(stackCacheSize <= getEffectiveStackCacheSize());

  // align shadow stack. call arguments are already included in SSOffset
  unsigned stackSize = align(SSOffset, getStackAlignment());

  // update offset of fixed objects
  for(unsigned FI = MFI.getObjectIndexBegin(), FIe = 0; FI != FIe; FI++) {
    // reassign stack offset
    MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) + stackSize);
  }

  DEBUG(MFI.print(MF, dbgs()));

  // store assignment information
  PMFI.setStackCacheReservedBytes(stackCacheSize);
  PMFI.setStackCacheFIs(SCFIs);

  PMFI.setStackReservedBytes(stackSize);
  MFI.setStackSize(stackSize);

  return stackSize;
}
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
  return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
bool PatmosFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
  return !MF.getFrameInfo()->hasVarSizedObjects();
}
示例#20
0
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
    : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
      InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
  MachineBasicBlock::iterator MBBI = MBB.begin();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
  DebugLoc dl = (MBBI != MBB.end() ?
                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());

  bool FP = hasFP(MF);

  // Work out frame sizes.
  int FrameSize = MFI->getStackSize();

  assert(FrameSize%4 == 0 && "Misaligned frame size");
  
  FrameSize/=4;
  
  bool isU6 = isImmU6(FrameSize);

  if (!isU6 && !isImmU16(FrameSize)) {
    // FIXME could emit multiple instructions.
    cerr << "emitPrologue Frame size too big: " << FrameSize << "\n";
    abort();
  }
  bool emitFrameMoves = needsFrameMoves(MF);

  // Do we need to allocate space on the stack?
  if (FrameSize) {
    bool saveLR = XFI->getUsesLR();
    bool LRSavedOnEntry = false;
    int Opcode;
    if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
      Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
      MBB.addLiveIn(XCore::LR);
      saveLR = false;
      LRSavedOnEntry = true;
    } else {
      Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
    }
    BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
    
    if (emitFrameMoves) {
      std::vector<MachineMove> &Moves = MMI->getFrameMoves();
      
      // Show update of SP.
      unsigned FrameLabelId = MMI->NextLabelID();
      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId);
      
      MachineLocation SPDst(MachineLocation::VirtualFP);
      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
      Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
      
      if (LRSavedOnEntry) {
        MachineLocation CSDst(MachineLocation::VirtualFP, 0);
        MachineLocation CSSrc(XCore::LR);
        Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
      }
    }
    if (saveLR) {
      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
      storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl);
      MBB.addLiveIn(XCore::LR);
      
      if (emitFrameMoves) {
        unsigned SaveLRLabelId = MMI->NextLabelID();
        BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveLRLabelId);
        MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
        MachineLocation CSSrc(XCore::LR);
        MMI->getFrameMoves().push_back(MachineMove(SaveLRLabelId,
                                                   CSDst, CSSrc));
      }
    }
  }
  
  if (FP) {
    // Save R10 to the stack.
    int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
    storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl);
    // R10 is live-in. It is killed at the spill.
    MBB.addLiveIn(XCore::R10);
    if (emitFrameMoves) {
      unsigned SaveR10LabelId = MMI->NextLabelID();
      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveR10LabelId);
      MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
      MachineLocation CSSrc(XCore::R10);
      MMI->getFrameMoves().push_back(MachineMove(SaveR10LabelId,
                                                 CSDst, CSSrc));
    }
    // Set the FP from the SP.
    unsigned FramePtr = XCore::R10;
    BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
      .addImm(0);
    if (emitFrameMoves) {
      // Show FP is now valid.
      unsigned FrameLabelId = MMI->NextLabelID();
      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId);
      MachineLocation SPDst(FramePtr);
      MachineLocation SPSrc(MachineLocation::VirtualFP);
      MMI->getFrameMoves().push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
    }
  }
  
  if (emitFrameMoves) {
    // Frame moves for callee saved.
    std::vector<MachineMove> &Moves = MMI->getFrameMoves();
    std::vector<std::pair<unsigned, CalleeSavedInfo> >&SpillLabels =
        XFI->getSpillLabels();
    for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
      unsigned SpillLabel = SpillLabels[I].first;
      CalleeSavedInfo &CSI = SpillLabels[I].second;
      int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
      unsigned Reg = CSI.getReg();
      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
      MachineLocation CSSrc(Reg);
      Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
    }
  }
}
示例#22
0
bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
  return !MF.getFrameInfo()->hasVarSizedObjects();
}
示例#23
0
bool IRTranslator::runOnMachineFunction(MachineFunction &MF) {
  const Function &F = *MF.getFunction();
  if (F.empty())
    return false;
  CLI = MF.getSubtarget().getCallLowering();
  MIRBuilder.setMF(MF);
  EntryBuilder.setMF(MF);
  MRI = &MF.getRegInfo();
  DL = &F.getParent()->getDataLayout();
  TPC = &getAnalysis<TargetPassConfig>();

  assert(PendingPHIs.empty() && "stale PHIs");

  // Setup the arguments.
  MachineBasicBlock &MBB = getOrCreateBB(F.front());
  MIRBuilder.setMBB(MBB);
  SmallVector<unsigned, 8> VRegArgs;
  for (const Argument &Arg: F.args())
    VRegArgs.push_back(getOrCreateVReg(Arg));
  bool Succeeded = CLI->lowerFormalArguments(MIRBuilder, F, VRegArgs);
  if (!Succeeded) {
    if (!TPC->isGlobalISelAbortEnabled()) {
      MIRBuilder.getMF().getProperties().set(
          MachineFunctionProperties::Property::FailedISel);
      return false;
    }
    report_fatal_error("Unable to lower arguments");
  }

  // Now that we've got the ABI handling code, it's safe to set a location for
  // any Constants we find in the IR.
  if (MBB.empty())
    EntryBuilder.setMBB(MBB);
  else
    EntryBuilder.setInstr(MBB.back(), /* Before */ false);

  for (const BasicBlock &BB: F) {
    MachineBasicBlock &MBB = getOrCreateBB(BB);
    // Set the insertion point of all the following translations to
    // the end of this basic block.
    MIRBuilder.setMBB(MBB);

    for (const Instruction &Inst: BB) {
      bool Succeeded = translate(Inst);
      if (!Succeeded) {
        if (TPC->isGlobalISelAbortEnabled())
          reportTranslationError(Inst, "unable to translate instruction");
        MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
        break;
      }
    }
  }

  finalizeFunction();

  // Now that the MachineFrameInfo has been configured, no further changes to
  // the reserved registers are possible.
  MRI->freezeReservedRegs(MF);

  return false;
}
示例#24
0
void MCS51FrameLowering::emitEpilogue(MachineFunction &MF,
                                       MachineBasicBlock &MBB) const {
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>();
  const MCS51InstrInfo &TII =
    *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo());

  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  unsigned RetOpcode = MBBI->getOpcode();
  DebugLoc DL = MBBI->getDebugLoc();

  switch (RetOpcode) {
  case MCS51::RET:
  case MCS51::RETI: break;  // These are ok
  default:
    llvm_unreachable("Can only insert epilog into returning blocks");
  }

  // Get the number of bytes to allocate from the FrameInfo
  uint64_t StackSize = MFI->getStackSize();
  unsigned CSSize = MCS51FI->getCalleeSavedFrameSize();
  uint64_t NumBytes = 0;

  if (hasFP(MF)) {
    // Calculate required stack adjustment
    uint64_t FrameSize = StackSize - 2;
    NumBytes = FrameSize - CSSize;

    // pop FPW.
    BuildMI(MBB, MBBI, DL, TII.get(MCS51::POP16r), MCS51::FPW);
  } else
    NumBytes = StackSize - CSSize;

  // Skip the callee-saved pop instructions.
  while (MBBI != MBB.begin()) {
    MachineBasicBlock::iterator PI = prior(MBBI);
    unsigned Opc = PI->getOpcode();
    if (Opc != MCS51::POP16r && !PI->isTerminator())
      break;
    --MBBI;
  }

  DL = MBBI->getDebugLoc();

  // If there is an ADD16ri or SUB16ri of SPW immediately before this
  // instruction, merge the two instructions.
  //if (NumBytes || MFI->hasVarSizedObjects())
  //  mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);

  if (MFI->hasVarSizedObjects()) {
    BuildMI(MBB, MBBI, DL,
            TII.get(MCS51::MOV16rr), MCS51::SPW).addReg(MCS51::FPW);
    if (CSSize) {
      MachineInstr *MI =
        BuildMI(MBB, MBBI, DL,
                TII.get(MCS51::SUB16ri), MCS51::SPW)
        .addReg(MCS51::SPW).addImm(CSSize);
      // The SRW implicit def is dead.
      MI->getOperand(3).setIsDead();
    }
  } else {
    // adjust stack pointer back: SPW += numbytes
    if (NumBytes) {
      MachineInstr *MI =
        BuildMI(MBB, MBBI, DL, TII.get(MCS51::ADD16ri), MCS51::SPW)
        .addReg(MCS51::SPW).addImm(NumBytes);
      // The SRW implicit def is dead.
      MI->getOperand(3).setIsDead();
    }
  }
}
void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
{
  MachineFrameInfo *MFI = MF.getFrameInfo();
  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
  unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8;
  bool HasGP = MipsFI->needGPSaveRestore();

  // Min and Max CSI FrameIndex.
  int MinCSFI = -1, MaxCSFI = -1; 

  // See the description at MipsMachineFunction.h
  int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;

  // Replace the dummy '0' SPOffset by the negative offsets, as explained on 
  // LowerFormalArguments. Leaving '0' for while is necessary to avoid
  // the approach done by calculateFrameObjectOffsets to the stack frame.
  MipsFI->adjustLoadArgsFI(MFI);
  MipsFI->adjustStoreVarArgsFI(MFI); 

  // It happens that the default stack frame allocation order does not directly 
  // map to the convention used for mips. So we must fix it. We move the callee 
  // save register slots after the local variables area, as described in the
  // stack frame above.
  unsigned CalleeSavedAreaSize = 0;
  if (!CSI.empty()) {
    MinCSFI = CSI[0].getFrameIdx();
    MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
  }
  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
    CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());

  unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize)
                : (Subtarget.isABI_O32() ? 16 : 0);

  // Adjust local variables. They should come on the stack right
  // after the arguments.
  int LastOffsetFI = -1;
  for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
    if (i >= MinCSFI && i <= MaxCSFI)
      continue;
    if (MFI->isDeadObjectIndex(i))
      continue;
    unsigned Offset = 
      StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
    if (LastOffsetFI == -1)
      LastOffsetFI = i;
    if (Offset > MFI->getObjectOffset(LastOffsetFI))
      LastOffsetFI = i;
    MFI->setObjectOffset(i, Offset);
  }

  // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
  // be saved in this CPU Area. This whole area must be aligned to the 
  // default Stack Alignment requirements.
  if (LastOffsetFI >= 0)
    StackOffset = MFI->getObjectOffset(LastOffsetFI)+ 
                  MFI->getObjectSize(LastOffsetFI);
  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);

  for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
    if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass)
      break;
    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
    TopCPUSavedRegOff = StackOffset;
    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
  }

  // Stack locations for FP and RA. If only one of them is used, 
  // the space must be allocated for both, otherwise no space at all.
  if (hasFP(MF) || MFI->hasCalls()) {
    // FP stack location
    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), 
                         StackOffset);
    MipsFI->setFPStackOffset(StackOffset);
    TopCPUSavedRegOff = StackOffset;
    StackOffset += RegSize;

    // SP stack location
    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
                         StackOffset);
    MipsFI->setRAStackOffset(StackOffset);
    StackOffset += RegSize;

    if (MFI->hasCalls())
      TopCPUSavedRegOff += RegSize;
  }

  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
  
  // Adjust FPU Callee Saved Registers Area. This Area must be 
  // aligned to the default Stack Alignment requirements.
  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
    if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
      continue;
    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
    TopFPUSavedRegOff = StackOffset;
    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
  }
  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);

  // Update frame info
  MFI->setStackSize(StackOffset);

  // Recalculate the final tops offset. The final values must be '0'
  // if there isn't a callee saved register for CPU or FPU, otherwise
  // a negative offset is needed.
  if (TopCPUSavedRegOff >= 0)
    MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);

  if (TopFPUSavedRegOff >= 0)
    MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
}
示例#26
0
void MCS51FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator I) const {
  const MCS51InstrInfo &TII =
    *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo());
  unsigned StackAlign = getStackAlignment();

  if (!hasReservedCallFrame(MF)) {
    // If the stack pointer can be changed after prologue, turn the
    // adjcallstackup instruction into a 'sub SPW, <amt>' and the
    // adjcallstackdown instruction into 'add SPW, <amt>'
    // TODO: consider using push / pop instead of sub + store / add
    MachineInstr *Old = I;
    uint64_t Amount = Old->getOperand(0).getImm();
    if (Amount != 0) {
      // We need to keep the stack aligned properly.  To do this, we round the
      // amount of space needed for the outgoing arguments up to the next
      // alignment boundary.
      Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;

      MachineInstr *New = 0;
      if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
        New = BuildMI(MF, Old->getDebugLoc(),
                      TII.get(MCS51::SUB16ri), MCS51::SPW)
          .addReg(MCS51::SPW).addImm(Amount);
      } else {
        assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
        // factor out the amount the callee already popped.
        uint64_t CalleeAmt = Old->getOperand(1).getImm();
        Amount -= CalleeAmt;
        if (Amount)
          New = BuildMI(MF, Old->getDebugLoc(),
                        TII.get(MCS51::ADD16ri), MCS51::SPW)
            .addReg(MCS51::SPW).addImm(Amount);
      }

      if (New) {
        // The SRW implicit def is dead.
        New->getOperand(3).setIsDead();

        // Replace the pseudo instruction with a new instruction...
        MBB.insert(I, New);
      }
    }
  } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
    // If we are performing frame pointer elimination and if the callee pops
    // something off the stack pointer, add it back.
    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
      MachineInstr *Old = I;
      MachineInstr *New =
        BuildMI(MF, Old->getDebugLoc(), TII.get(MCS51::SUB16ri),
                MCS51::SPW).addReg(MCS51::SPW).addImm(CalleeAmt);
      // The SRW implicit def is dead.
      New->getOperand(3).setIsDead();

      MBB.insert(I, New);
    }
  }

  MBB.erase(I);
}
/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
/// to the loop top to the top of the loop so that they have a fall through.
/// This can introduce a branch on entry to the loop, but it can eliminate a
/// branch within the loop. See the @simple case in
/// test/CodeGen/X86/loop_blocks.ll for an example of this.
bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
                                                        MachineLoop *L) {
  bool Changed = false;
  MachineBasicBlock *TopMBB = L->getTopBlock();

  bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());

  if (TopMBB == MF.begin() ||
      HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
  new_top:
    for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
         PE = TopMBB->pred_end(); PI != PE; ++PI) {
      MachineBasicBlock *Pred = *PI;
      if (Pred == TopMBB) continue;
      if (HasFallthrough(Pred)) continue;
      if (!L->contains(Pred)) continue;

      // Verify that we can analyze all the loop entry edges before beginning
      // any changes which will require us to be able to analyze them.
      if (Pred == MF.begin())
        continue;
      if (!HasAnalyzableTerminator(Pred))
        continue;
      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
        continue;

      // Move the block.
      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
                   << " to top of loop.\n");
      Changed = true;

      // Move it and all the blocks that can reach it via fallthrough edges
      // exclusively, to keep existing fallthrough edges intact.
      MachineFunction::iterator Begin = Pred;
      MachineFunction::iterator End = llvm::next(Begin);
      while (Begin != MF.begin()) {
        MachineFunction::iterator Prior = prior(Begin);
        if (Prior == MF.begin())
          break;
        // Stop when a non-fallthrough edge is found.
        if (!HasFallthrough(Prior))
          break;
        // Stop if a block which could fall-through out of the loop is found.
        if (Prior->isSuccessor(End))
          break;
        // If we've reached the top, stop scanning.
        if (Prior == MachineFunction::iterator(TopMBB)) {
          // We know top currently has a fall through (because we just checked
          // it) which would be lost if we do the transformation, so it isn't
          // worthwhile to do the transformation unless it would expose a new
          // fallthrough edge.
          if (!Prior->isSuccessor(End))
            goto next_pred;
          // Otherwise we can stop scanning and procede to move the blocks.
          break;
        }
        // If we hit a switch or something complicated, don't move anything
        // for this predecessor.
        if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
          break;
        // Ok, the block prior to Begin will be moved along with the rest.
        // Extend the range to include it.
        Begin = Prior;
        ++NumIntraMoved;
      }

      // Move the blocks.
      Splice(MF, TopMBB, Begin, End);

      // Update TopMBB.
      TopMBB = L->getTopBlock();

      // We have a new loop top. Iterate on it. We shouldn't have to do this
      // too many times if BranchFolding has done a reasonable job.
      goto new_top;
    next_pred:;
    }
  }

  // If the loop previously didn't exit with a fall-through and it now does,
  // we eliminated a branch.
  if (Changed &&
      !BotHasFallthrough &&
      HasFallthrough(L->getBottomBlock())) {
    ++NumIntraElim;
  }

  return Changed;
}
示例#28
0
void MCS51FrameLowering::emitPrologue(MachineFunction &MF) const {
  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
  MachineFrameInfo *MFI = MF.getFrameInfo();
  MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>();
  const MCS51InstrInfo &TII =
    *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo());

  MachineBasicBlock::iterator MBBI = MBB.begin();
  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

  // Get the number of bytes to allocate from the FrameInfo.
  uint64_t StackSize = MFI->getStackSize();

  uint64_t NumBytes = 0;
  if (hasFP(MF)) {
    // Calculate required stack adjustment
    uint64_t FrameSize = StackSize - 2;
    NumBytes = FrameSize - MCS51FI->getCalleeSavedFrameSize();

    // Get the offset of the stack slot for the EBP register... which is
    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
    // Update the frame offset adjustment.
    MFI->setOffsetAdjustment(-NumBytes);

    // Save FPW into the appropriate stack slot...
    BuildMI(MBB, MBBI, DL, TII.get(MCS51::PUSH16r))
      .addReg(MCS51::FPW, RegState::Kill);

    // Update FPW with the new base value...
    BuildMI(MBB, MBBI, DL, TII.get(MCS51::MOV16rr), MCS51::FPW)
      .addReg(MCS51::SPW);

    // Mark the FramePtr as live-in in every block except the entry.
    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
         I != E; ++I)
      I->addLiveIn(MCS51::FPW);

  } else
    NumBytes = StackSize - MCS51FI->getCalleeSavedFrameSize();

  // Skip the callee-saved push instructions.
  while (MBBI != MBB.end() && (MBBI->getOpcode() == MCS51::PUSH16r))
    ++MBBI;

  if (MBBI != MBB.end())
    DL = MBBI->getDebugLoc();

  if (NumBytes) { // adjust stack pointer: SPW -= numbytes
    // If there is an SUB16ri of SPW immediately before this instruction, merge
    // the two.
    //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
    // If there is an ADD16ri or SUB16ri of SPW immediately after this
    // instruction, merge the two instructions.
    // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);

    if (NumBytes) {
      MachineInstr *MI =
        BuildMI(MBB, MBBI, DL, TII.get(MCS51::SUB16ri), MCS51::SPW)
        .addReg(MCS51::SPW).addImm(NumBytes);
      // The SRW implicit def is dead.
      MI->getOperand(3).setIsDead();
    }
  }
}
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
  DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
               << "********** Function: " << MF.getName() << '\n');
  TII = MF.getSubtarget().getInstrInfo();
  DomTree = &getAnalysis<MachineDominatorTree>();

  bool Changed = false;

  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
  // cmp-conversions from the same head block.
  // Note that updateDomTree() modifies the children of the DomTree node
  // currently being visited. The df_iterator supports that; it doesn't look at
  // child_begin() / child_end() until after a node has been visited.
  for (MachineDomTreeNode *I : depth_first(DomTree)) {
    MachineBasicBlock *HBB = I->getBlock();

    SmallVector<MachineOperand, 4> HeadCond;
    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    if (TII->AnalyzeBranch(*HBB, TBB, FBB, HeadCond)) {
      continue;
    }

    // Equivalence check is to skip loops.
    if (!TBB || TBB == HBB) {
      continue;
    }

    SmallVector<MachineOperand, 4> TrueCond;
    MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
    if (TII->AnalyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
      continue;
    }

    MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
    if (!HeadCmpMI) {
      continue;
    }

    MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
    if (!TrueCmpMI) {
      continue;
    }

    AArch64CC::CondCode HeadCmp;
    if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
      continue;
    }

    AArch64CC::CondCode TrueCmp;
    if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
      continue;
    }

    const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
    const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();

    DEBUG(dbgs() << "Head branch:\n");
    DEBUG(dbgs() << "\tcondition: "
          << AArch64CC::getCondCodeName(HeadCmp) << '\n');
    DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');

    DEBUG(dbgs() << "True branch:\n");
    DEBUG(dbgs() << "\tcondition: "
          << AArch64CC::getCondCodeName(TrueCmp) << '\n');
    DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');

    if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
         (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
        std::abs(TrueImm - HeadImm) == 2) {
      // This branch transforms machine instructions that correspond to
      //
      // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
      // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
      //
      // into
      //
      // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
      // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)

      CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
      CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
      if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
          std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
        modifyCmp(HeadCmpMI, HeadCmpInfo);
        modifyCmp(TrueCmpMI, TrueCmpInfo);
        Changed = true;
      }
    } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
                (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
                std::abs(TrueImm - HeadImm) == 1) {
      // This branch transforms machine instructions that correspond to
      //
      // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
      // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
      //
      // into
      //
      // 1) (a <= {NewImm} && ...) || (a >  {NewImm} && ...)
      // 2) (a <  {NewImm} && ...) || (a >= {NewImm} && ...)

      // GT -> GE transformation increases immediate value, so picking the
      // smaller one; LT -> LE decreases immediate value so invert the choice.
      bool adjustHeadCond = (HeadImm < TrueImm);
      if (HeadCmp == AArch64CC::LT) {
          adjustHeadCond = !adjustHeadCond;
      }

      if (adjustHeadCond) {
        Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
      } else {
        Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
      }
    }
    // Other transformation cases almost never occur due to generation of < or >
    // comparisons instead of <= and >=.
  }

  return Changed;
}
bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
                                         MachineBasicBlock::iterator I,
                                         MachineBasicBlock::iterator &NMBBI,
                                         unsigned Size) {
  MachineFunction *MF = BB.getParent();

  const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
  DebugLoc DL = I->getDebugLoc();

  unsigned LL, SC, ZERO, BEQ;

  if (Size == 4) {
    if (STI->inMicroMipsMode()) {
      LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
      SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
      BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
    } else {
      LL = STI->hasMips32r6()
               ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
               : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
      SC = STI->hasMips32r6()
               ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
               : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
      BEQ = Mips::BEQ;
    }

    ZERO = Mips::ZERO;
  } else {
    LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
    SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
    ZERO = Mips::ZERO_64;
    BEQ = Mips::BEQ64;
  }

  unsigned OldVal = I->getOperand(0).getReg();
  unsigned Ptr = I->getOperand(1).getReg();
  unsigned Incr = I->getOperand(2).getReg();
  unsigned Scratch = I->getOperand(3).getReg();

  unsigned Opcode = 0;
  unsigned OR = 0;
  unsigned AND = 0;
  unsigned NOR = 0;
  bool IsNand = false;
  switch (I->getOpcode()) {
  case Mips::ATOMIC_LOAD_ADD_I32_POSTRA:
    Opcode = Mips::ADDu;
    break;
  case Mips::ATOMIC_LOAD_SUB_I32_POSTRA:
    Opcode = Mips::SUBu;
    break;
  case Mips::ATOMIC_LOAD_AND_I32_POSTRA:
    Opcode = Mips::AND;
    break;
  case Mips::ATOMIC_LOAD_OR_I32_POSTRA:
    Opcode = Mips::OR;
    break;
  case Mips::ATOMIC_LOAD_XOR_I32_POSTRA:
    Opcode = Mips::XOR;
    break;
  case Mips::ATOMIC_LOAD_NAND_I32_POSTRA:
    IsNand = true;
    AND = Mips::AND;
    NOR = Mips::NOR;
    break;
  case Mips::ATOMIC_SWAP_I32_POSTRA:
    OR = Mips::OR;
    break;
  case Mips::ATOMIC_LOAD_ADD_I64_POSTRA:
    Opcode = Mips::DADDu;
    break;
  case Mips::ATOMIC_LOAD_SUB_I64_POSTRA:
    Opcode = Mips::DSUBu;
    break;
  case Mips::ATOMIC_LOAD_AND_I64_POSTRA:
    Opcode = Mips::AND64;
    break;
  case Mips::ATOMIC_LOAD_OR_I64_POSTRA:
    Opcode = Mips::OR64;
    break;
  case Mips::ATOMIC_LOAD_XOR_I64_POSTRA:
    Opcode = Mips::XOR64;
    break;
  case Mips::ATOMIC_LOAD_NAND_I64_POSTRA:
    IsNand = true;
    AND = Mips::AND64;
    NOR = Mips::NOR64;
    break;
  case Mips::ATOMIC_SWAP_I64_POSTRA:
    OR = Mips::OR64;
    break;
  default:
    llvm_unreachable("Unknown pseudo atomic!");
  }

  const BasicBlock *LLVM_BB = BB.getBasicBlock();
  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineFunction::iterator It = ++BB.getIterator();
  MF->insert(It, loopMBB);
  MF->insert(It, exitMBB);

  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);

  BB.addSuccessor(loopMBB, BranchProbability::getOne());
  loopMBB->addSuccessor(exitMBB);
  loopMBB->addSuccessor(loopMBB);
  loopMBB->normalizeSuccProbs();

  BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
  assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!");
  assert((OldVal != Incr) && "Clobbered the wrong reg!");
  if (Opcode) {
    BuildMI(loopMBB, DL, TII->get(Opcode), Scratch).addReg(OldVal).addReg(Incr);
  } else if (IsNand) {
    assert(AND && NOR &&
           "Unknown nand instruction for atomic pseudo expansion");
    BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr);
    BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch);
  } else {
    assert(OR && "Unknown instruction for atomic pseudo expansion!");
    BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO);
  }

  BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0);
  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB);

  NMBBI = BB.end();
  I->eraseFromParent();

  LivePhysRegs LiveRegs;
  computeAndAddLiveIns(LiveRegs, *loopMBB);
  computeAndAddLiveIns(LiveRegs, *exitMBB);

  return true;
}