Ejemplo n.º 1
0
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.begin();
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  const Function *Fn = MF.getFunction();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  MachineModuleInfo &MMI = MF.getMMI();
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
  bool HasFP = hasFP(MF);

  // Debug location must be unknown since the first debug location is used
  // to determine the end of the prologue.
  DebugLoc DL;

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  int NumBytes = (int)MFI->getStackSize();
  if (!AFI->hasStackFrame()) {
    assert(!HasFP && "unexpected function without stack frame but with FP");

    // All of the stack allocation is for locals.
    AFI->setLocalStackSize(NumBytes);

    // Label used to tie together the PROLOG_LABEL and the MachineMoves.
    MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();

    // REDZONE: If the stack size is less than 128 bytes, we don't need
    // to actually allocate.
    if (NumBytes && !canUseRedZone(MF)) {
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);

      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else if (NumBytes) {
      ++NumRedZoneFunctions;
    }

    return;
  }

  // Only set up FP if we actually need to.
  int FPOffset = 0;
  if (HasFP)
    // Frame pointer is fp = sp - 16.
    FPOffset = AFI->getCalleeSavedStackSize() - 16;

  // Move past the saves of the callee-saved registers.
  MachineBasicBlock::iterator End = MBB.end();
  while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup))
    ++MBBI;
  NumBytes -= AFI->getCalleeSavedStackSize();
  assert(NumBytes >= 0 && "Negative stack allocation size!?");
  if (HasFP) {
    // Issue    sub fp, sp, FPOffset or
    //          mov fp,sp          when FPOffset is zero.
    // Note: All stores of callee-saved registers are marked as "FrameSetup".
    // This code marks the instruction(s) that set the FP also.
    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
                    MachineInstr::FrameSetup);
  }

  // All of the remaining stack allocations are for locals.
  AFI->setLocalStackSize(NumBytes);

  // Allocate space for the rest of the frame.

  const unsigned Alignment = MFI->getMaxAlignment();
  const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
  unsigned scratchSPReg = AArch64::SP;
  if (NumBytes && NeedsRealignment) {
    scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
    assert(scratchSPReg != AArch64::NoRegister);
  }

  // If we're a leaf function, try using the red zone.
  if (NumBytes && !canUseRedZone(MF))
    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
    // the correct value here, as NumBytes also includes padding bytes,
    // which shouldn't be counted here.
    emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
                    MachineInstr::FrameSetup);

  if (NumBytes && NeedsRealignment) {
    const unsigned NrBitsToZero = countTrailingZeros(Alignment);
    assert(NrBitsToZero > 1);
    assert(scratchSPReg != AArch64::SP);

    // SUB X9, SP, NumBytes
    //   -- X9 is temporary register, so shouldn't contain any live data here,
    //   -- free to use. This is already produced by emitFrameOffset above.
    // AND SP, X9, 0b11111...0000
    // The logical immediates have a non-trivial encoding. The following
    // formula computes the encoded immediate with all ones but
    // NrBitsToZero zero bits as least significant bits.
    uint32_t andMaskEncoded =
        (1                   <<12) // = N
      | ((64-NrBitsToZero)   << 6) // immr
      | ((64-NrBitsToZero-1) << 0) // imms
      ;
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
      .addReg(scratchSPReg, RegState::Kill)
      .addImm(andMaskEncoded);
  }

  // If we need a base pointer, set it up here. It's whatever the value of the
  // stack pointer is at this point. Any variable size objects will be allocated
  // after this, so we can still use the base pointer to reference locals.
  //
  // FIXME: Clarify FrameSetup flags here.
  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
  // needed.
  if (RegInfo->hasBasePointer(MF)) {
    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
                     false);
  }

  if (needsFrameMoves) {
    const DataLayout &TD = MF.getDataLayout();
    const int StackGrowth = -TD.getPointerSize(0);
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
    // An example of the prologue:
    //
    //     .globl __foo
    //     .align 2
    //  __foo:
    // Ltmp0:
    //     .cfi_startproc
    //     .cfi_personality 155, ___gxx_personality_v0
    // Leh_func_begin:
    //     .cfi_lsda 16, Lexception33
    //
    //     stp  xa,bx, [sp, -#offset]!
    //     ...
    //     stp  x28, x27, [sp, #offset-32]
    //     stp  fp, lr, [sp, #offset-16]
    //     add  fp, sp, #offset - 16
    //     sub  sp, sp, #1360
    //
    // The Stack:
    //       +-------------------------------------------+
    // 10000 | ........ | ........ | ........ | ........ |
    // 10004 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10008 | ........ | ........ | ........ | ........ |
    // 1000c | ........ | ........ | ........ | ........ |
    //       +===========================================+
    // 10010 |                X28 Register               |
    // 10014 |                X28 Register               |
    //       +-------------------------------------------+
    // 10018 |                X27 Register               |
    // 1001c |                X27 Register               |
    //       +===========================================+
    // 10020 |                Frame Pointer              |
    // 10024 |                Frame Pointer              |
    //       +-------------------------------------------+
    // 10028 |                Link Register              |
    // 1002c |                Link Register              |
    //       +===========================================+
    // 10030 | ........ | ........ | ........ | ........ |
    // 10034 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10038 | ........ | ........ | ........ | ........ |
    // 1003c | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    //
    //     [sp] = 10030        ::    >>initial value<<
    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    //     fp = sp == 10020    ::  mov fp, sp
    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    //     sp == 10010         ::    >>final value<<
    //
    // The frame pointer (w29) points to address 10020. If we use an offset of
    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    // for w27, and -32 for w28:
    //
    //  Ltmp1:
    //     .cfi_def_cfa w29, 16
    //  Ltmp2:
    //     .cfi_offset w30, -8
    //  Ltmp3:
    //     .cfi_offset w29, -16
    //  Ltmp4:
    //     .cfi_offset w27, -24
    //  Ltmp5:
    //     .cfi_offset w28, -32

    if (HasFP) {
      // Define the current CFA rule to use the provided FP.
      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else {
      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    }

    // Now emit the moves for whatever callee saved regs we have (including FP,
    // LR if those are saved).
    emitCalleeSavedFrameMoves(MBB, MBBI);
  }
}
Ejemplo n.º 2
0
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
/// under the assuption that it needs to be lowered in a way that supports
/// atomic execution of PHIs.  This lowering method is always correct all of the
/// time.
///
void PHIElimination::LowerAtomicPHINode(
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator AfterPHIsIt) {
  ++NumAtomic;
  // Unlink the PHI node from the basic block, but don't delete the PHI yet.
  MachineInstr *MPhi = MBB.remove(MBB.begin());

  unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
  unsigned DestReg = MPhi->getOperand(0).getReg();
  assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
  bool isDead = MPhi->getOperand(0).isDead();

  // Create a new register for the incoming PHI arguments.
  MachineFunction &MF = *MBB.getParent();
  unsigned IncomingReg = 0;
  bool reusedIncoming = false;  // Is IncomingReg reused from an earlier PHI?

  // Insert a register to register copy at the top of the current block (but
  // after any remaining phi nodes) which copies the new incoming register
  // into the phi node destination.
  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
  if (isSourceDefinedByImplicitDef(MPhi, MRI))
    // If all sources of a PHI node are implicit_def, just emit an
    // implicit_def instead of a copy.
    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
            TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
  else {
    // Can we reuse an earlier PHI node? This only happens for critical edges,
    // typically those created by tail duplication.
    unsigned &entry = LoweredPHIs[MPhi];
    if (entry) {
      // An identical PHI node was already lowered. Reuse the incoming register.
      IncomingReg = entry;
      reusedIncoming = true;
      ++NumReused;
      DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
    } else {
      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
      entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
    }
    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
            TII->get(TargetOpcode::COPY), DestReg)
      .addReg(IncomingReg);
  }

  // Update live variable information if there is any.
  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
  if (LV) {
    MachineInstr *PHICopy = prior(AfterPHIsIt);

    if (IncomingReg) {
      LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);

      // Increment use count of the newly created virtual register.
      VI.NumUses++;
      LV->setPHIJoin(IncomingReg);

      // When we are reusing the incoming register, it may already have been
      // killed in this block. The old kill will also have been inserted at
      // AfterPHIsIt, so it appears before the current PHICopy.
      if (reusedIncoming)
        if (MachineInstr *OldKill = VI.findKill(&MBB)) {
          DEBUG(dbgs() << "Remove old kill from " << *OldKill);
          LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
          DEBUG(MBB.dump());
        }

      // Add information to LiveVariables to know that the incoming value is
      // killed.  Note that because the value is defined in several places (once
      // each for each incoming block), the "def" block and instruction fields
      // for the VarInfo is not filled in.
      LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
    }

    // Since we are going to be deleting the PHI node, if it is the last use of
    // any registers, or if the value itself is dead, we need to move this
    // information over to the new copy we just inserted.
    LV->removeVirtualRegistersKilled(MPhi);

    // If the result is dead, update LV.
    if (isDead) {
      LV->addVirtualRegisterDead(DestReg, PHICopy);
      LV->removeVirtualRegisterDead(DestReg, MPhi);
    }
  }

  // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
    --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
                                 MPhi->getOperand(i).getReg())];

  // Now loop over all of the incoming arguments, changing them to copy into the
  // IncomingReg register in the corresponding predecessor basic block.
  SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
  for (int i = NumSrcs - 1; i >= 0; --i) {
    unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
    unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();

    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
           "Machine PHI Operands must all be virtual registers!");

    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
    // path the PHI.
    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();

    // If source is defined by an implicit def, there is no need to insert a
    // copy.
    MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
    if (DefMI->isImplicitDef()) {
      ImpDefs.insert(DefMI);
      continue;
    }

    // Check to make sure we haven't already emitted the copy for this block.
    // This can happen because PHI nodes may have multiple entries for the same
    // basic block.
    if (!MBBsInsertedInto.insert(&opBlock))
      continue;  // If the copy has already been emitted, we're done.

    // Find a safe location to insert the copy, this may be the first terminator
    // in the block (or end()).
    MachineBasicBlock::iterator InsertPos =
      findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);

    // Insert the copy.
    if (!reusedIncoming && IncomingReg)
      BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
              TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);

    // Now update live variable information if we have it.  Otherwise we're done
    if (!LV) continue;

    // We want to be able to insert a kill of the register if this PHI (aka, the
    // copy we just inserted) is the last use of the source value.  Live
    // variable analysis conservatively handles this by saying that the value is
    // live until the end of the block the PHI entry lives in.  If the value
    // really is dead at the PHI copy, there will be no successor blocks which
    // have the value live-in.

    // Also check to see if this register is in use by another PHI node which
    // has not yet been eliminated.  If so, it will be killed at an appropriate
    // point later.

    // Is it used by any PHI instructions in this block?
    bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];

    // Okay, if we now know that the value is not live out of the block, we can
    // add a kill marker in this block saying that it kills the incoming value!
    if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
      // In our final twist, we have to decide which instruction kills the
      // register.  In most cases this is the copy, however, the first
      // terminator instruction at the end of the block may also use the value.
      // In this case, we should mark *it* as being the killing block, not the
      // copy.
      MachineBasicBlock::iterator KillInst;
      MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
      if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
        KillInst = Term;

        // Check that no other terminators use values.
#ifndef NDEBUG
        for (MachineBasicBlock::iterator TI = llvm::next(Term);
             TI != opBlock.end(); ++TI) {
          if (TI->isDebugValue())
            continue;
          assert(!TI->readsRegister(SrcReg) &&
                 "Terminator instructions cannot use virtual registers unless"
                 "they are the first terminator in a block!");
        }
#endif
      } else if (reusedIncoming || !IncomingReg) {
        // We may have to rewind a bit if we didn't insert a copy this time.
        KillInst = Term;
        while (KillInst != opBlock.begin()) {
          --KillInst;
          if (KillInst->isDebugValue())
            continue;
          if (KillInst->readsRegister(SrcReg))
            break;
        }
      } else {
        // We just inserted this copy.
        KillInst = prior(InsertPos);
      }
      assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");

      // Finally, mark it killed.
      LV->addVirtualRegisterKilled(SrcReg, KillInst);

      // This vreg no longer lives all of the way through opBlock.
      unsigned opBlockNum = opBlock.getNumber();
      LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
    }
  }

  // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
  if (reusedIncoming || !IncomingReg)
    MF.DeleteMachineInstr(MPhi);
}
Ejemplo n.º 3
0
/// insertCSRSpillsAndRestores - Insert spill and restore code for
/// callee saved registers used in the function, handling shrink wrapping.
///
void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
  // Get callee saved register information.
  MachineFrameInfo *MFI = Fn.getFrameInfo();
  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();

  MFI->setCalleeSavedInfoValid(true);

  // Early exit if no callee saved registers are modified!
  if (CSI.empty())
    return;

  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
  MachineBasicBlock::iterator I;

  if (!ShrinkWrapThisFunction) {
    // Spill using target interface.
    I = EntryBlock->begin();
    if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
        // Add the callee-saved register as live-in.
        // It's killed at the spill.
        EntryBlock->addLiveIn(CSI[i].getReg());

        // Insert the spill to the stack frame.
        unsigned Reg = CSI[i].getReg();
        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
        TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
                                CSI[i].getFrameIdx(), RC, TRI);
      }
    }

    // Restore using target interface.
    for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
      MachineBasicBlock* MBB = ReturnBlocks[ri];
      I = MBB->end(); --I;

      // Skip over all terminator instructions, which are part of the return
      // sequence.
      MachineBasicBlock::iterator I2 = I;
      while (I2 != MBB->begin() && (--I2)->isTerminator())
        I = I2;

      bool AtStart = I == MBB->begin();
      MachineBasicBlock::iterator BeforeI = I;
      if (!AtStart)
        --BeforeI;

      // Restore all registers immediately before the return and any
      // terminators that precede it.
      if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
        for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
          unsigned Reg = CSI[i].getReg();
          const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
          TII.loadRegFromStackSlot(*MBB, I, Reg,
                                   CSI[i].getFrameIdx(),
                                   RC, TRI);
          assert(I != MBB->begin() &&
                 "loadRegFromStackSlot didn't insert any code!");
          // Insert in reverse order.  loadRegFromStackSlot can insert
          // multiple instructions.
          if (AtStart)
            I = MBB->begin();
          else {
            I = BeforeI;
            ++I;
          }
        }
      }
    }
    return;
  }

  // Insert spills.
  std::vector<CalleeSavedInfo> blockCSI;
  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
         BE = CSRSave.end(); BI != BE; ++BI) {
    MachineBasicBlock* MBB = BI->first;
    CSRegSet save = BI->second;

    if (save.empty())
      continue;

    blockCSI.clear();
    for (CSRegSet::iterator RI = save.begin(),
           RE = save.end(); RI != RE; ++RI) {
      blockCSI.push_back(CSI[*RI]);
    }
    assert(blockCSI.size() > 0 &&
           "Could not collect callee saved register info");

    I = MBB->begin();

    // When shrink wrapping, use stack slot stores/loads.
    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
      // Add the callee-saved register as live-in.
      // It's killed at the spill.
      MBB->addLiveIn(blockCSI[i].getReg());

      // Insert the spill to the stack frame.
      unsigned Reg = blockCSI[i].getReg();
      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
      TII.storeRegToStackSlot(*MBB, I, Reg,
                              true,
                              blockCSI[i].getFrameIdx(),
                              RC, TRI);
    }
  }

  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
         BE = CSRRestore.end(); BI != BE; ++BI) {
    MachineBasicBlock* MBB = BI->first;
    CSRegSet restore = BI->second;

    if (restore.empty())
      continue;

    blockCSI.clear();
    for (CSRegSet::iterator RI = restore.begin(),
           RE = restore.end(); RI != RE; ++RI) {
      blockCSI.push_back(CSI[*RI]);
    }
    assert(blockCSI.size() > 0 &&
           "Could not find callee saved register info");

    // If MBB is empty and needs restores, insert at the _beginning_.
    if (MBB->empty()) {
      I = MBB->begin();
    } else {
      I = MBB->end();
      --I;

      // Skip over all terminator instructions, which are part of the
      // return sequence.
      if (! I->isTerminator()) {
        ++I;
      } else {
        MachineBasicBlock::iterator I2 = I;
        while (I2 != MBB->begin() && (--I2)->isTerminator())
          I = I2;
      }
    }

    bool AtStart = I == MBB->begin();
    MachineBasicBlock::iterator BeforeI = I;
    if (!AtStart)
      --BeforeI;

    // Restore all registers immediately before the return and any
    // terminators that precede it.
    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
      unsigned Reg = blockCSI[i].getReg();
      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
      TII.loadRegFromStackSlot(*MBB, I, Reg,
                               blockCSI[i].getFrameIdx(),
                               RC, TRI);
      assert(I != MBB->begin() &&
             "loadRegFromStackSlot didn't insert any code!");
      // Insert in reverse order.  loadRegFromStackSlot can insert
      // multiple instructions.
      if (AtStart)
        I = MBB->begin();
      else {
        I = BeforeI;
        ++I;
      }
    }
  }
}
Ejemplo n.º 4
0
// Branch analysis.
// Note: If the condition register is set to CTR or CTR8 then this is a
// BDNZ (imm == 1) or BDZ (imm == 0) branch.
bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
                                 MachineBasicBlock *&FBB,
                                 SmallVectorImpl<MachineOperand> &Cond,
                                 bool AllowModify) const {
  bool isPPC64 = TM.getSubtargetImpl()->isPPC64();

  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin())
    return false;
  --I;
  while (I->isDebugValue()) {
    if (I == MBB.begin())
      return false;
    --I;
  }
  if (!isUnpredicatedTerminator(I))
    return false;

  // Get the last instruction in the block.
  MachineInstr *LastInst = I;

  // If there is only one terminator instruction, process it.
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    if (LastInst->getOpcode() == PPC::B) {
      if (!LastInst->getOperand(0).isMBB())
        return true;
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    } else if (LastInst->getOpcode() == PPC::BCC) {
      if (!LastInst->getOperand(2).isMBB())
        return true;
      // Block ends with fall-through condbranch.
      TBB = LastInst->getOperand(2).getMBB();
      Cond.push_back(LastInst->getOperand(0));
      Cond.push_back(LastInst->getOperand(1));
      return false;
    } else if (LastInst->getOpcode() == PPC::BDNZ8 ||
               LastInst->getOpcode() == PPC::BDNZ) {
      if (!LastInst->getOperand(0).isMBB())
        return true;
      if (DisableCTRLoopAnal)
        return true;
      TBB = LastInst->getOperand(0).getMBB();
      Cond.push_back(MachineOperand::CreateImm(1));
      Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
                                               true));
      return false;
    } else if (LastInst->getOpcode() == PPC::BDZ8 ||
               LastInst->getOpcode() == PPC::BDZ) {
      if (!LastInst->getOperand(0).isMBB())
        return true;
      if (DisableCTRLoopAnal)
        return true;
      TBB = LastInst->getOperand(0).getMBB();
      Cond.push_back(MachineOperand::CreateImm(0));
      Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
                                               true));
      return false;
    }

    // Otherwise, don't know what this is.
    return true;
  }

  // Get the instruction before it if it's a terminator.
  MachineInstr *SecondLastInst = I;

  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() &&
      isUnpredicatedTerminator(--I))
    return true;

  // If the block ends with PPC::B and PPC:BCC, handle it.
  if (SecondLastInst->getOpcode() == PPC::BCC &&
      LastInst->getOpcode() == PPC::B) {
    if (!SecondLastInst->getOperand(2).isMBB() ||
        !LastInst->getOperand(0).isMBB())
      return true;
    TBB =  SecondLastInst->getOperand(2).getMBB();
    Cond.push_back(SecondLastInst->getOperand(0));
    Cond.push_back(SecondLastInst->getOperand(1));
    FBB = LastInst->getOperand(0).getMBB();
    return false;
  } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
              SecondLastInst->getOpcode() == PPC::BDNZ) &&
      LastInst->getOpcode() == PPC::B) {
    if (!SecondLastInst->getOperand(0).isMBB() ||
        !LastInst->getOperand(0).isMBB())
      return true;
    if (DisableCTRLoopAnal)
      return true;
    TBB = SecondLastInst->getOperand(0).getMBB();
    Cond.push_back(MachineOperand::CreateImm(1));
    Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
                                             true));
    FBB = LastInst->getOperand(0).getMBB();
    return false;
  } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 ||
              SecondLastInst->getOpcode() == PPC::BDZ) &&
      LastInst->getOpcode() == PPC::B) {
    if (!SecondLastInst->getOperand(0).isMBB() ||
        !LastInst->getOperand(0).isMBB())
      return true;
    if (DisableCTRLoopAnal)
      return true;
    TBB = SecondLastInst->getOperand(0).getMBB();
    Cond.push_back(MachineOperand::CreateImm(0));
    Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
                                             true));
    FBB = LastInst->getOperand(0).getMBB();
    return false;
  }

  // If the block ends with two PPC:Bs, handle it.  The second one is not
  // executed, so remove it.
  if (SecondLastInst->getOpcode() == PPC::B &&
      LastInst->getOpcode() == PPC::B) {
    if (!SecondLastInst->getOperand(0).isMBB())
      return true;
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
  bool Modified = false;

  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  while (MBBI != E) {
    MachineInstr &MI = *MBBI;
    MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);

    unsigned Opcode = MI.getOpcode();
    switch (Opcode) {
    default: break;
    case ARM::tLDRpci_pic: 
    case ARM::t2LDRpci_pic: {
      unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
        ? ARM::tLDRpci : ARM::t2LDRpci;
      unsigned DstReg = MI.getOperand(0).getReg();
      if (!MI.getOperand(0).isDead()) {
        MachineInstr *NewMI =
          AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                 TII->get(NewLdOpc), DstReg)
                         .addOperand(MI.getOperand(1)));
        NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
          .addReg(DstReg, getDefRegState(true))
          .addReg(DstReg)
          .addOperand(MI.getOperand(2));
      }
      MI.eraseFromParent();
      Modified = true;
      break;
    }
    case ARM::t2MOVi32imm: {
      unsigned DstReg = MI.getOperand(0).getReg();
      if (!MI.getOperand(0).isDead()) {
        const MachineOperand &MO = MI.getOperand(1);
        MachineInstrBuilder LO16, HI16;

        LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
                       DstReg);
        HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
          .addReg(DstReg, getDefRegState(true)).addReg(DstReg);

        if (MO.isImm()) {
          unsigned Imm = MO.getImm();
          unsigned Lo16 = Imm & 0xffff;
          unsigned Hi16 = (Imm >> 16) & 0xffff;
          LO16 = LO16.addImm(Lo16);
          HI16 = HI16.addImm(Hi16);
        } else {
          GlobalValue *GV = MO.getGlobal();
          unsigned TF = MO.getTargetFlags();
          LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
          HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
          // FIXME: What's about memoperands?
        }
        AddDefaultPred(LO16);
        AddDefaultPred(HI16);
      }
      MI.eraseFromParent();
      Modified = true;
    }
    // FIXME: expand t2MOVi32imm
    }
Ejemplo n.º 6
0
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
  bool Modified = false;

  SmallSet<unsigned, 4> Defs;
  SmallSet<unsigned, 4> Uses;
  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  while (MBBI != E) {
    MachineInstr *MI = &*MBBI;
    DebugLoc dl = MI->getDebugLoc();
    unsigned PredReg = 0;
    ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
    if (CC == ARMCC::AL) {
      ++MBBI;
      continue;
    }

    Defs.clear();
    Uses.clear();
    TrackDefUses(MI, Defs, Uses, TRI);

    // Insert an IT instruction.
    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
      .addImm(CC);

    // Add implicit use of ITSTATE to IT block instructions.
    MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
                                             true/*isImp*/, false/*isKill*/));

    MachineInstr *LastITMI = MI;
    MachineBasicBlock::iterator InsertPos = MIB;
    ++MBBI;

    // Form IT block.
    ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
    unsigned Mask = 0, Pos = 3;
    // Branches, including tricky ones like LDM_RET, need to end an IT
    // block so check the instruction we just put in the block.
    for (; MBBI != E && Pos &&
           (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
      if (MBBI->isDebugValue())
        continue;

      MachineInstr *NMI = &*MBBI;
      MI = NMI;

      unsigned NPredReg = 0;
      ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
      if (NCC == CC || NCC == OCC) {
        Mask |= (NCC & 1) << Pos;
        // Add implicit use of ITSTATE.
        NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
                                               true/*isImp*/, false/*isKill*/));
        LastITMI = NMI;
      } else {
        if (NCC == ARMCC::AL &&
            MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
          --MBBI;
          MBB.remove(NMI);
          MBB.insert(InsertPos, NMI);
          ++NumMovedInsts;
          continue;
        }
        break;
      }
      TrackDefUses(NMI, Defs, Uses, TRI);
      --Pos;
    }

    // Finalize IT mask.
    Mask |= (1 << Pos);
    // Tag along (firstcond[0] << 4) with the mask.
    Mask |= (CC & 1) << 4;
    MIB.addImm(Mask);

    // Last instruction in IT block kills ITSTATE.
    LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();

    Modified = true;
    ++NumITs;
  }

  return Modified;
}
Ejemplo n.º 7
0
void MCS51FrameLowering::emitEpilogue(MachineFunction &MF,
                                       MachineBasicBlock &MBB) const {
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>();
  const MCS51InstrInfo &TII =
    *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo());

  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  unsigned RetOpcode = MBBI->getOpcode();
  DebugLoc DL = MBBI->getDebugLoc();

  switch (RetOpcode) {
  case MCS51::RET:
  case MCS51::RETI: break;  // These are ok
  default:
    llvm_unreachable("Can only insert epilog into returning blocks");
  }

  // Get the number of bytes to allocate from the FrameInfo
  uint64_t StackSize = MFI->getStackSize();
  unsigned CSSize = MCS51FI->getCalleeSavedFrameSize();
  uint64_t NumBytes = 0;

  if (hasFP(MF)) {
    // Calculate required stack adjustment
    uint64_t FrameSize = StackSize - 2;
    NumBytes = FrameSize - CSSize;

    // pop FPW.
    BuildMI(MBB, MBBI, DL, TII.get(MCS51::POP16r), MCS51::FPW);
  } else
    NumBytes = StackSize - CSSize;

  // Skip the callee-saved pop instructions.
  while (MBBI != MBB.begin()) {
    MachineBasicBlock::iterator PI = prior(MBBI);
    unsigned Opc = PI->getOpcode();
    if (Opc != MCS51::POP16r && !PI->isTerminator())
      break;
    --MBBI;
  }

  DL = MBBI->getDebugLoc();

  // If there is an ADD16ri or SUB16ri of SPW immediately before this
  // instruction, merge the two instructions.
  //if (NumBytes || MFI->hasVarSizedObjects())
  //  mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);

  if (MFI->hasVarSizedObjects()) {
    BuildMI(MBB, MBBI, DL,
            TII.get(MCS51::MOV16rr), MCS51::SPW).addReg(MCS51::FPW);
    if (CSSize) {
      MachineInstr *MI =
        BuildMI(MBB, MBBI, DL,
                TII.get(MCS51::SUB16ri), MCS51::SPW)
        .addReg(MCS51::SPW).addImm(CSSize);
      // The SRW implicit def is dead.
      MI->getOperand(3).setIsDead();
    }
  } else {
    // adjust stack pointer back: SPW += numbytes
    if (NumBytes) {
      MachineInstr *MI =
        BuildMI(MBB, MBBI, DL, TII.get(MCS51::ADD16ri), MCS51::SPW)
        .addReg(MCS51::SPW).addImm(NumBytes);
      // The SRW implicit def is dead.
      MI->getOperand(3).setIsDead();
    }
  }
}
Ejemplo n.º 8
0
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
  const PPCInstrInfo *TII =
                static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
  // Give the blocks of the function a dense, in-order, numbering.
  Fn.RenumberBlocks();
  BlockSizes.resize(Fn.getNumBlockIDs());

  // Measure each MBB and compute a size for the entire function.
  unsigned FuncSize = 0;
  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
       ++MFI) {
    MachineBasicBlock *MBB = MFI;

    unsigned BlockSize = 0;
    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
         MBBI != EE; ++MBBI)
      BlockSize += TII->GetInstSizeInBytes(MBBI);
    
    BlockSizes[MBB->getNumber()] = BlockSize;
    FuncSize += BlockSize;
  }
  
  // If the entire function is smaller than the displacement of a branch field,
  // we know we don't need to shrink any branches in this function.  This is a
  // common case.
  if (FuncSize < (1 << 15)) {
    BlockSizes.clear();
    return false;
  }
  
  // For each conditional branch, if the offset to its destination is larger
  // than the offset field allows, transform it into a long branch sequence
  // like this:
  //   short branch:
  //     bCC MBB
  //   long branch:
  //     b!CC $PC+8
  //     b MBB
  //
  bool MadeChange = true;
  bool EverMadeChange = false;
  while (MadeChange) {
    // Iteratively expand branches until we reach a fixed point.
    MadeChange = false;
  
    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
         ++MFI) {
      MachineBasicBlock &MBB = *MFI;
      unsigned MBBStartOffset = 0;
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E; ++I) {
        MachineBasicBlock *Dest = nullptr;
        if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
          Dest = I->getOperand(2).getMBB();
        else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
                 !I->getOperand(1).isImm())
          Dest = I->getOperand(1).getMBB();
        else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
                  I->getOpcode() == PPC::BDZ8  || I->getOpcode() == PPC::BDZ) &&
                 !I->getOperand(0).isImm())
          Dest = I->getOperand(0).getMBB();

        if (!Dest) {
          MBBStartOffset += TII->GetInstSizeInBytes(I);
          continue;
        }
        
        // Determine the offset from the current branch to the destination
        // block.
        int BranchSize;
        if (Dest->getNumber() <= MBB.getNumber()) {
          // If this is a backwards branch, the delta is the offset from the
          // start of this block to this branch, plus the sizes of all blocks
          // from this block to the dest.
          BranchSize = MBBStartOffset;
          
          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
            BranchSize += BlockSizes[i];
        } else {
          // Otherwise, add the size of the blocks between this block and the
          // dest to the number of bytes left in this block.
          BranchSize = -MBBStartOffset;

          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
            BranchSize += BlockSizes[i];
        }

        // If this branch is in range, ignore it.
        if (isInt<16>(BranchSize)) {
          MBBStartOffset += 4;
          continue;
        }

        // Otherwise, we have to expand it to a long branch.
        MachineInstr *OldBranch = I;
        DebugLoc dl = OldBranch->getDebugLoc();
 
        if (I->getOpcode() == PPC::BCC) {
          // The BCC operands are:
          // 0. PPC branch predicate
          // 1. CR register
          // 2. Target MBB
          PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
          unsigned CRReg = I->getOperand(1).getReg();
       
          // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
          BuildMI(MBB, I, dl, TII->get(PPC::BCC))
            .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
        } else if (I->getOpcode() == PPC::BC) {
          unsigned CRBit = I->getOperand(0).getReg();
          BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
        } else if (I->getOpcode() == PPC::BCn) {
          unsigned CRBit = I->getOperand(0).getReg();
          BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
        } else if (I->getOpcode() == PPC::BDNZ) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
        } else if (I->getOpcode() == PPC::BDNZ8) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
        } else if (I->getOpcode() == PPC::BDZ) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
        } else if (I->getOpcode() == PPC::BDZ8) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
        } else {
           llvm_unreachable("Unhandled branch type!");
        }
        
        // Uncond branch to the real destination.
        I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);

        // Remove the old branch from the function.
        OldBranch->eraseFromParent();
        
        // Remember that this instruction is 8-bytes, increase the size of the
        // block by 4, remember to iterate.
        BlockSizes[MBB.getNumber()] += 4;
        MBBStartOffset += 8;
        ++NumExpanded;
        MadeChange = true;
      }
    }
    EverMadeChange |= MadeChange;
  }
  
  BlockSizes.clear();
  return true;
}
Ejemplo n.º 9
0
/// insertCSRSpillsAndRestores - Insert spill and restore code for
/// callee saved registers used in the function.
///
void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
  // Get callee saved register information.
  MachineFrameInfo *MFI = Fn.getFrameInfo();
  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();

  MFI->setCalleeSavedInfoValid(true);

  // Early exit if no callee saved registers are modified!
  if (CSI.empty())
    return;

  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
  MachineBasicBlock::iterator I;

  // Spill using target interface.
  I = EntryBlock->begin();
  if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
      // Add the callee-saved register as live-in.
      // It's killed at the spill.
      EntryBlock->addLiveIn(CSI[i].getReg());

      // Insert the spill to the stack frame.
      unsigned Reg = CSI[i].getReg();
      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
      TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(),
                              RC, TRI);
    }
  }

  // Restore using target interface.
  for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
    MachineBasicBlock *MBB = ReturnBlocks[ri];
    I = MBB->end();
    --I;

    // Skip over all terminator instructions, which are part of the return
    // sequence.
    MachineBasicBlock::iterator I2 = I;
    while (I2 != MBB->begin() && (--I2)->isTerminator())
      I = I2;

    bool AtStart = I == MBB->begin();
    MachineBasicBlock::iterator BeforeI = I;
    if (!AtStart)
      --BeforeI;

    // Restore all registers immediately before the return and any
    // terminators that precede it.
    if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
        unsigned Reg = CSI[i].getReg();
        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
        TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
        assert(I != MBB->begin() &&
               "loadRegFromStackSlot didn't insert any code!");
        // Insert in reverse order.  loadRegFromStackSlot can insert
        // multiple instructions.
        if (AtStart)
          I = MBB->begin();
        else {
          I = BeforeI;
          ++I;
        }
      }
    }
  }
}
Ejemplo n.º 10
0
/// Walk the specified loop in the CFG (defined by all blocks dominated by the
/// specified header block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
///
void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
  MachineBasicBlock *Preheader = getCurPreheader();
  if (!Preheader)
    return;

  SmallVector<MachineDomTreeNode*, 32> Scopes;
  SmallVector<MachineDomTreeNode*, 8> WorkList;
  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;

  // Perform a DFS walk to determine the order of visit.
  WorkList.push_back(HeaderN);
  while (!WorkList.empty()) {
    MachineDomTreeNode *Node = WorkList.pop_back_val();
    assert(Node && "Null dominator tree node?");
    MachineBasicBlock *BB = Node->getBlock();

    // If the header of the loop containing this basic block is a landing pad,
    // then don't try to hoist instructions out of this loop.
    const MachineLoop *ML = MLI->getLoopFor(BB);
    if (ML && ML->getHeader()->isEHPad())
      continue;

    // If this subregion is not in the top level loop at all, exit.
    if (!CurLoop->contains(BB))
      continue;

    Scopes.push_back(Node);
    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
    unsigned NumChildren = Children.size();

    // Don't hoist things out of a large switch statement.  This often causes
    // code to be hoisted that wasn't going to be executed, and increases
    // register pressure in a situation where it's likely to matter.
    if (BB->succ_size() >= 25)
      NumChildren = 0;

    OpenChildren[Node] = NumChildren;
    // Add children in reverse order as then the next popped worklist node is
    // the first child of this node.  This means we ultimately traverse the
    // DOM tree in exactly the same order as if we'd recursed.
    for (int i = (int)NumChildren-1; i >= 0; --i) {
      MachineDomTreeNode *Child = Children[i];
      ParentMap[Child] = Node;
      WorkList.push_back(Child);
    }
  }

  if (Scopes.size() == 0)
    return;

  // Compute registers which are livein into the loop headers.
  RegSeen.clear();
  BackTrace.clear();
  InitRegPressure(Preheader);

  // Now perform LICM.
  for (MachineDomTreeNode *Node : Scopes) {
    MachineBasicBlock *MBB = Node->getBlock();

    EnterScope(MBB);

    // Process the block
    SpeculationState = SpeculateUnknown;
    for (MachineBasicBlock::iterator
         MII = MBB->begin(), E = MBB->end(); MII != E; ) {
      MachineBasicBlock::iterator NextMII = MII; ++NextMII;
      MachineInstr *MI = &*MII;
      if (!Hoist(MI, Preheader))
        UpdateRegPressure(MI);
      MII = NextMII;
    }

    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
    ExitScopeIfDone(Node, OpenChildren, ParentMap);
  }
}
Ejemplo n.º 11
0
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
  DenseMap<unsigned, MachineInstr*> AvailCopyMap;   // Def -> available copies map
  DenseMap<unsigned, MachineInstr*> CopyMap;        // Def -> copies map
  DenseMap<unsigned, unsigned> SrcMap;              // Src -> Def map

  bool Changed = false;
  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
    MachineInstr *MI = &*I;
    ++I;

    if (MI->isCopy()) {
      unsigned Def = MI->getOperand(0).getReg();
      unsigned Src = MI->getOperand(1).getReg();

      if (TargetRegisterInfo::isVirtualRegister(Def) ||
          TargetRegisterInfo::isVirtualRegister(Src))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
      if (CI != AvailCopyMap.end()) {
        MachineInstr *CopyMI = CI->second;
        unsigned SrcSrc = CopyMI->getOperand(1).getReg();
        if (!ReservedRegs.test(Def) &&
            (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
            (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) {
          // The two copies cancel out and the source of the first copy
          // hasn't been overridden, eliminate the second one. e.g.
          //  %ECX<def> = COPY %EAX<kill>
          //  ... nothing clobbered EAX.
          //  %EAX<def> = COPY %ECX
          // =>
          //  %ECX<def> = COPY %EAX
          //
          // Also avoid eliminating a copy from reserved registers unless the
          // definition is proven not clobbered. e.g.
          // %RSP<def> = COPY %RAX
          // CALL
          // %RAX<def> = COPY %RSP
          CopyMI->getOperand(1).setIsKill(false);
          MI->eraseFromParent();
          Changed = true;
          ++NumDeletes;
          continue;
        }
      }

      // If Src is defined by a previous copy, it cannot be eliminated.
      CI = CopyMap.find(Src);
      if (CI != CopyMap.end())
        MaybeDeadCopies.remove(CI->second);
      for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) {
        CI = CopyMap.find(*AS);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }

      // Copy is now a candidate for deletion.
      MaybeDeadCopies.insert(MI);

      // If 'Src' is previously source of another copy, then this earlier copy's
      // source is no longer available. e.g.
      // %xmm9<def> = copy %xmm2
      // ...
      // %xmm2<def> = copy %xmm0
      // ...
      // %xmm2<def> = copy %xmm9
      SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);

      // Remember Def is defined by the copy.
      CopyMap[Def] = MI;
      AvailCopyMap[Def] = MI;
      for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
        CopyMap[*SR] = MI;
        AvailCopyMap[*SR] = MI;
      }

      // Remember source that's copied to Def. Once it's clobbered, then
      // it's no longer available for copy propagation.
      SrcMap[Src] = Def;

      continue;
    }

    // Not a copy.
    SmallVector<unsigned, 2> Defs;
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (!MO.isReg())
        continue;
      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;

      if (TargetRegisterInfo::isVirtualRegister(Reg))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      if (MO.isDef()) {
        Defs.push_back(Reg);
        continue;
      }

      // If 'Reg' is defined by a copy, the copy is no longer a candidate
      // for elimination.
      DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
      if (CI != CopyMap.end())
        MaybeDeadCopies.remove(CI->second);
      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
        CI = CopyMap.find(*AS);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }
    }

    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Reg = Defs[i];

      // No longer defined by a copy.
      CopyMap.erase(Reg);
      AvailCopyMap.erase(Reg);
      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
        CopyMap.erase(*AS);
        AvailCopyMap.erase(*AS);
      }

      // If 'Reg' is previously source of a copy, it is no longer available for
      // copy propagation.
      SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
    }
  }

  // If MBB doesn't have successors, delete the copies whose defs are not used.
  // If MBB does have successors, then conservative assume the defs are live-out
  // since we don't want to trust live-in lists.
  if (MBB.succ_empty()) {
    for (SmallSetVector<MachineInstr*, 8>::iterator
           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
         DI != DE; ++DI) {
      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
        (*DI)->eraseFromParent();
        Changed = true;
        ++NumDeletes;
      }
    }
  }

  return Changed;
}
Ejemplo n.º 12
0
bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *&TBB,
                                     MachineBasicBlock *&FBB,
                                     SmallVectorImpl<MachineOperand> &Cond,
                                     bool AllowModify) const {
  // Most of the code and comments here are boilerplate.

  // Start from the bottom of the block and work up, examining the
  // terminator instructions.
  MachineBasicBlock::iterator I = MBB.end();
  while (I != MBB.begin()) {
    --I;
    if (I->isDebugValue())
      continue;

    // Working from the bottom, when we see a non-terminator instruction, we're
    // done.
    if (!isUnpredicatedTerminator(I))
      break;

    // A terminator that isn't a branch can't easily be handled by this
    // analysis.
    unsigned ThisCond;
    const MachineOperand *ThisTarget;
    if (!isBranch(I, ThisCond, ThisTarget))
      return true;

    // Can't handle indirect branches.
    if (!ThisTarget->isMBB())
      return true;

    if (ThisCond == SystemZ::CCMASK_ANY) {
      // Handle unconditional branches.
      if (!AllowModify) {
        TBB = ThisTarget->getMBB();
        continue;
      }

      // If the block has any instructions after a JMP, delete them.
      while (llvm::next(I) != MBB.end())
        llvm::next(I)->eraseFromParent();

      Cond.clear();
      FBB = 0;

      // Delete the JMP if it's equivalent to a fall-through.
      if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) {
        TBB = 0;
        I->eraseFromParent();
        I = MBB.end();
        continue;
      }

      // TBB is used to indicate the unconditinal destination.
      TBB = ThisTarget->getMBB();
      continue;
    }

    // Working from the bottom, handle the first conditional branch.
    if (Cond.empty()) {
      // FIXME: add X86-style branch swap
      FBB = TBB;
      TBB = ThisTarget->getMBB();
      Cond.push_back(MachineOperand::CreateImm(ThisCond));
      continue;
    }

    // Handle subsequent conditional branches.
    assert(Cond.size() == 1);
    assert(TBB);

    // Only handle the case where all conditional branches branch to the same
    // destination.
    if (TBB != ThisTarget->getMBB())
      return true;

    // If the conditions are the same, we can leave them alone.
    unsigned OldCond = Cond[0].getImm();
    if (OldCond == ThisCond)
      continue;

    // FIXME: Try combining conditions like X86 does.  Should be easy on Z!
  }

  return false;
}
Ejemplo n.º 13
0
bool mprocInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *&TBB,
                                    MachineBasicBlock *&FBB,
                                    SmallVectorImpl<MachineOperand> &Cond,
                                    bool AllowModify) const {
  // Start from the bottom of the block and work up, examining the
  // terminator instructions.
  MachineBasicBlock::iterator I = MBB.end();
  while (I != MBB.begin()) {
    --I;
    if (I->isDebugValue())
      continue;

    // Working from the bottom, when we see a non-terminator
    // instruction, we're done.
    if (!isUnpredicatedTerminator(I))
      break;

    // A terminator that isn't a branch can't easily be handled
    // by this analysis.
    if (!I->isBranch())
      return true;

    // Cannot handle indirect branches.
    /*if (I->getOpcode() == mproc::Br ||
        I->getOpcode() == mproc::Bm)
      return true;

    // Handle unconditional branches.
    if (I->getOpcode() == mproc::JMP) {
      if (!AllowModify) {
        TBB = I->getOperand(0).getMBB();
        continue;
      }
      // If the block has any instructions after a JMP, delete them.
      while (llvm::next(I) != MBB.end())
        llvm::next(I)->eraseFromParent();
      Cond.clear();
      FBB = 0;

      // Delete the JMP if it's equivalent to a fall-through.
      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
        TBB = 0;
        I->eraseFromParent();
        I = MBB.end();
        continue;
      }

      // TBB is used to indicate the unconditinal destination.
      TBB = I->getOperand(0).getMBB();
      continue;
    }
*/

    // Handle conditional branches.
   // assert(I->getOpcode() == mproc::JCC && "Invalid conditional branch");
    mprocCC::CondCodes BranchCode =
      static_cast<mprocCC::CondCodes>(I->getOperand(1).getImm());
    if (BranchCode == mprocCC::COND_INVALID)
      return true;  // Can't handle weird stuff.

    // Working from the bottom, handle the first conditional branch.
    if (Cond.empty()) {
      FBB = TBB;
      TBB = I->getOperand(0).getMBB();
      Cond.push_back(MachineOperand::CreateImm(BranchCode));
      continue;
    }

    // Handle subsequent conditional branches. Only handle the case where all
    // conditional branches branch to the same destination.
    assert(Cond.size() == 1);
    assert(TBB);

    // Only handle the case where all conditional branches branch to
    // the same destination.
    if (TBB != I->getOperand(0).getMBB())
      return true;

    mprocCC::CondCodes OldBranchCode = (mprocCC::CondCodes)Cond[0].getImm();
    // If the conditions are the same, we can leave them alone.
    if (OldBranchCode == BranchCode)
      continue;

    return true;
  }

  return false;
}
Ejemplo n.º 14
0
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  DebugLoc DL;
  bool IsTailCallReturn = false;
  if (MBB.end() != MBBI) {
    DL = MBBI->getDebugLoc();
    unsigned RetOpcode = MBBI->getOpcode();
    IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
      RetOpcode == AArch64::TCRETURNri;
  }
  int NumBytes = MFI->getStackSize();
  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  // Initial and residual are named for consistency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t ArgumentPopSize = 0;
  if (IsTailCallReturn) {
    MachineOperand &StackAdjust = MBBI->getOperand(1);

    // For a tail-call in a callee-pops-arguments environment, some or all of
    // the stack may actually be in use for the call's arguments, this is
    // calculated during LowerCall and consumed here...
    ArgumentPopSize = StackAdjust.getImm();
  } else {
    // ... otherwise the amount to pop is *all* of the argument space,
    // conveniently stored in the MachineFunctionInfo by
    // LowerFormalArguments. This will, of course, be zero for the C calling
    // convention.
    ArgumentPopSize = AFI->getArgumentStackToRestore();
  }

  // The stack frame should be like below,
  //
  //      ----------------------                     ---
  //      |                    |                      |
  //      | BytesInStackArgArea|              CalleeArgStackSize
  //      | (NumReusableBytes) |                (of tail call)
  //      |                    |                     ---
  //      |                    |                      |
  //      ---------------------|        ---           |
  //      |                    |         |            |
  //      |   CalleeSavedReg   |         |            |
  //      | (CalleeSavedStackSize)|      |            |
  //      |                    |         |            |
  //      ---------------------|         |         NumBytes
  //      |                    |     StackSize  (StackAdjustUp)
  //      |   LocalStackSize   |         |            |
  //      | (covering callee   |         |            |
  //      |       args)        |         |            |
  //      |                    |         |            |
  //      ----------------------        ---          ---
  //
  // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
  //             = StackSize + ArgumentPopSize
  //
  // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
  // it as the 2nd argument of AArch64ISD::TC_RETURN.

  // Move past the restores of the callee-saved registers.
  MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
  MachineBasicBlock::iterator Begin = MBB.begin();
  while (LastPopI != Begin) {
    --LastPopI;
    if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
      ++LastPopI;
      break;
    }
  }
  NumBytes -= AFI->getCalleeSavedStackSize();
  assert(NumBytes >= 0 && "Negative stack allocation size!?");

  if (!hasFP(MF)) {
    bool RedZone = canUseRedZone(MF);
    // If this was a redzone leaf function, we don't need to restore the
    // stack pointer (but we may need to pop stack args for fastcc).
    if (RedZone && ArgumentPopSize == 0)
      return;

    bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0;
    int StackRestoreBytes = RedZone ? 0 : NumBytes;
    if (NoCalleeSaveRestore)
      StackRestoreBytes += ArgumentPopSize;
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
                    StackRestoreBytes, TII, MachineInstr::FrameDestroy);
    // If we were able to combine the local stack pop with the argument pop,
    // then we're done.
    if (NoCalleeSaveRestore || ArgumentPopSize == 0)
      return;
    NumBytes = 0;
  }

  // Restore the original stack pointer.
  // FIXME: Rather than doing the math here, we should instead just use
  // non-post-indexed loads for the restores if we aren't actually going to
  // be able to save any instructions.
  if (NumBytes || MFI->hasVarSizedObjects())
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                    -AFI->getCalleeSavedStackSize() + 16, TII,
                    MachineInstr::FrameDestroy);

  // This must be placed after the callee-save restore code because that code
  // assumes the SP is at the same location as it was after the callee-save save
  // code in the prologue.
  if (ArgumentPopSize)
    emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
                    ArgumentPopSize, TII, MachineInstr::FrameDestroy);
}
Ejemplo n.º 15
0
bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
                                         MachineBasicBlock::iterator I,
                                         MachineBasicBlock::iterator &NMBBI,
                                         unsigned Size) {
  MachineFunction *MF = BB.getParent();

  const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
  DebugLoc DL = I->getDebugLoc();

  unsigned LL, SC, ZERO, BEQ;

  if (Size == 4) {
    if (STI->inMicroMipsMode()) {
      LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
      SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
      BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
    } else {
      LL = STI->hasMips32r6()
               ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
               : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
      SC = STI->hasMips32r6()
               ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
               : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
      BEQ = Mips::BEQ;
    }

    ZERO = Mips::ZERO;
  } else {
    LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
    SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
    ZERO = Mips::ZERO_64;
    BEQ = Mips::BEQ64;
  }

  unsigned OldVal = I->getOperand(0).getReg();
  unsigned Ptr = I->getOperand(1).getReg();
  unsigned Incr = I->getOperand(2).getReg();
  unsigned Scratch = I->getOperand(3).getReg();

  unsigned Opcode = 0;
  unsigned OR = 0;
  unsigned AND = 0;
  unsigned NOR = 0;
  bool IsNand = false;
  switch (I->getOpcode()) {
  case Mips::ATOMIC_LOAD_ADD_I32_POSTRA:
    Opcode = Mips::ADDu;
    break;
  case Mips::ATOMIC_LOAD_SUB_I32_POSTRA:
    Opcode = Mips::SUBu;
    break;
  case Mips::ATOMIC_LOAD_AND_I32_POSTRA:
    Opcode = Mips::AND;
    break;
  case Mips::ATOMIC_LOAD_OR_I32_POSTRA:
    Opcode = Mips::OR;
    break;
  case Mips::ATOMIC_LOAD_XOR_I32_POSTRA:
    Opcode = Mips::XOR;
    break;
  case Mips::ATOMIC_LOAD_NAND_I32_POSTRA:
    IsNand = true;
    AND = Mips::AND;
    NOR = Mips::NOR;
    break;
  case Mips::ATOMIC_SWAP_I32_POSTRA:
    OR = Mips::OR;
    break;
  case Mips::ATOMIC_LOAD_ADD_I64_POSTRA:
    Opcode = Mips::DADDu;
    break;
  case Mips::ATOMIC_LOAD_SUB_I64_POSTRA:
    Opcode = Mips::DSUBu;
    break;
  case Mips::ATOMIC_LOAD_AND_I64_POSTRA:
    Opcode = Mips::AND64;
    break;
  case Mips::ATOMIC_LOAD_OR_I64_POSTRA:
    Opcode = Mips::OR64;
    break;
  case Mips::ATOMIC_LOAD_XOR_I64_POSTRA:
    Opcode = Mips::XOR64;
    break;
  case Mips::ATOMIC_LOAD_NAND_I64_POSTRA:
    IsNand = true;
    AND = Mips::AND64;
    NOR = Mips::NOR64;
    break;
  case Mips::ATOMIC_SWAP_I64_POSTRA:
    OR = Mips::OR64;
    break;
  default:
    llvm_unreachable("Unknown pseudo atomic!");
  }

  const BasicBlock *LLVM_BB = BB.getBasicBlock();
  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineFunction::iterator It = ++BB.getIterator();
  MF->insert(It, loopMBB);
  MF->insert(It, exitMBB);

  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);

  BB.addSuccessor(loopMBB, BranchProbability::getOne());
  loopMBB->addSuccessor(exitMBB);
  loopMBB->addSuccessor(loopMBB);
  loopMBB->normalizeSuccProbs();

  BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
  assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!");
  assert((OldVal != Incr) && "Clobbered the wrong reg!");
  if (Opcode) {
    BuildMI(loopMBB, DL, TII->get(Opcode), Scratch).addReg(OldVal).addReg(Incr);
  } else if (IsNand) {
    assert(AND && NOR &&
           "Unknown nand instruction for atomic pseudo expansion");
    BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr);
    BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch);
  } else {
    assert(OR && "Unknown instruction for atomic pseudo expansion!");
    BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO);
  }

  BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0);
  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB);

  NMBBI = BB.end();
  I->eraseFromParent();

  LivePhysRegs LiveRegs;
  computeAndAddLiveIns(LiveRegs, *loopMBB);
  computeAndAddLiveIns(LiveRegs, *exitMBB);

  return true;
}
Ejemplo n.º 16
0
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
                                     AllSuccsCache &AllSuccessors) {
  // Don't sink instructions that the target prefers not to sink.
  if (!TII->shouldSink(*MI))
    return false;

  // Check if it's safe to move the instruction.
  if (!MI->isSafeToMove(AA, SawStore))
    return false;

  // Convergent operations may not be made control-dependent on additional
  // values.
  if (MI->isConvergent())
    return false;

  // Don't break implicit null checks.  This is a performance heuristic, and not
  // required for correctness.
  if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
    return false;

  // FIXME: This should include support for sinking instructions within the
  // block they are currently in to shorten the live ranges.  We often get
  // instructions sunk into the top of a large block, but it would be better to
  // also sink them down before their first use in the block.  This xform has to
  // be careful not to *increase* register pressure though, e.g. sinking
  // "x = y + z" down if it kills y and z would increase the live ranges of y
  // and z and only shrink the live range of x.

  bool BreakPHIEdge = false;
  MachineBasicBlock *ParentBlock = MI->getParent();
  MachineBasicBlock *SuccToSinkTo =
      FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);

  // If there are no outputs, it must have side-effects.
  if (!SuccToSinkTo)
    return false;


  // If the instruction to move defines a dead physical register which is live
  // when leaving the basic block, don't move it because it could turn into a
  // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
    const MachineOperand &MO = MI->getOperand(I);
    if (!MO.isReg()) continue;
    unsigned Reg = MO.getReg();
    if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
    if (SuccToSinkTo->isLiveIn(Reg))
      return false;
  }

  DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);

  // If the block has multiple predecessors, this is a critical edge.
  // Decide if we can sink along it or need to break the edge.
  if (SuccToSinkTo->pred_size() > 1) {
    // We cannot sink a load across a critical edge - there may be stores in
    // other code paths.
    bool TryBreak = false;
    bool store = true;
    if (!MI->isSafeToMove(AA, store)) {
      DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
      TryBreak = true;
    }

    // We don't want to sink across a critical edge if we don't dominate the
    // successor. We could be introducing calculations to new code paths.
    if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
      DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
      TryBreak = true;
    }

    // Don't sink instructions into a loop.
    if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
      DEBUG(dbgs() << " *** NOTE: Loop header found\n");
      TryBreak = true;
    }

    // Otherwise we are OK with sinking along a critical edge.
    if (!TryBreak)
      DEBUG(dbgs() << "Sinking along critical edge.\n");
    else {
      // Mark this edge as to be split.
      // If the edge can actually be split, the next iteration of the main loop
      // will sink MI in the newly created block.
      bool Status =
        PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
      if (!Status)
        DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
              "break critical edge\n");
      // The instruction will not be sunk this time.
      return false;
    }
  }

  if (BreakPHIEdge) {
    // BreakPHIEdge is true if all the uses are in the successor MBB being
    // sunken into and they are all PHI nodes. In this case, machine-sink must
    // break the critical edge first.
    bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
                                            SuccToSinkTo, BreakPHIEdge);
    if (!Status)
      DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
            "break critical edge\n");
    // The instruction will not be sunk this time.
    return false;
  }

  // Determine where to insert into. Skip phi nodes.
  MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
  while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
    ++InsertPos;

  // collect matching debug values.
  SmallVector<MachineInstr *, 2> DbgValuesToSink;
  collectDebugValues(MI, DbgValuesToSink);

  // Move the instruction.
  SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
                       ++MachineBasicBlock::iterator(MI));

  // Move debug values.
  for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
         DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
    MachineInstr *DbgMI = *DBI;
    SuccToSinkTo->splice(InsertPos, ParentBlock,  DbgMI,
                         ++MachineBasicBlock::iterator(DbgMI));
  }

  // Conservatively, clear any kill flags, since it's possible that they are no
  // longer correct.
  // Note that we have to clear the kill flags for any register this instruction
  // uses as we may sink over another instruction which currently kills the
  // used registers.
  for (MachineOperand &MO : MI->operands()) {
    if (MO.isReg() && MO.isUse())
      RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
  }

  return true;
}
Ejemplo n.º 17
0
bool MipsExpandPseudo::expandAtomicCmpSwapSubword(
    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
    MachineBasicBlock::iterator &NMBBI) {

  MachineFunction *MF = BB.getParent();

  const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
  DebugLoc DL = I->getDebugLoc();
  unsigned LL, SC;

  unsigned ZERO = Mips::ZERO;
  unsigned BNE = Mips::BNE;
  unsigned BEQ = Mips::BEQ;
  unsigned SEOp =
      I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_POSTRA ? Mips::SEB : Mips::SEH;

  if (STI->inMicroMipsMode()) {
      LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
      SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
      BNE = STI->hasMips32r6() ? Mips::BNEC_MMR6 : Mips::BNE_MM;
      BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
  } else {
    LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
                            : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
    SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
                            : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
  }

  unsigned Dest = I->getOperand(0).getReg();
  unsigned Ptr = I->getOperand(1).getReg();
  unsigned Mask = I->getOperand(2).getReg();
  unsigned ShiftCmpVal = I->getOperand(3).getReg();
  unsigned Mask2 = I->getOperand(4).getReg();
  unsigned ShiftNewVal = I->getOperand(5).getReg();
  unsigned ShiftAmnt = I->getOperand(6).getReg();
  unsigned Scratch = I->getOperand(7).getReg();
  unsigned Scratch2 = I->getOperand(8).getReg();

  // insert new blocks after the current block
  const BasicBlock *LLVM_BB = BB.getBasicBlock();
  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineFunction::iterator It = ++BB.getIterator();
  MF->insert(It, loop1MBB);
  MF->insert(It, loop2MBB);
  MF->insert(It, sinkMBB);
  MF->insert(It, exitMBB);

  // Transfer the remainder of BB and its successor edges to exitMBB.
  exitMBB->splice(exitMBB->begin(), &BB,
                  std::next(MachineBasicBlock::iterator(I)), BB.end());
  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);

  //  thisMBB:
  //    ...
  //    fallthrough --> loop1MBB
  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
  loop1MBB->addSuccessor(sinkMBB);
  loop1MBB->addSuccessor(loop2MBB);
  loop1MBB->normalizeSuccProbs();
  loop2MBB->addSuccessor(loop1MBB);
  loop2MBB->addSuccessor(sinkMBB);
  loop2MBB->normalizeSuccProbs();
  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());

  // loop1MBB:
  //   ll dest, 0(ptr)
  //   and Mask', dest, Mask
  //   bne Mask', ShiftCmpVal, exitMBB
  BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0);
  BuildMI(loop1MBB, DL, TII->get(Mips::AND), Scratch2)
      .addReg(Scratch)
      .addReg(Mask);
  BuildMI(loop1MBB, DL, TII->get(BNE))
    .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB);

  // loop2MBB:
  //   and dest, dest, mask2
  //   or dest, dest, ShiftNewVal
  //   sc dest, dest, 0(ptr)
  //   beq dest, $0, loop1MBB
  BuildMI(loop2MBB, DL, TII->get(Mips::AND), Scratch)
      .addReg(Scratch, RegState::Kill)
      .addReg(Mask2);
  BuildMI(loop2MBB, DL, TII->get(Mips::OR), Scratch)
      .addReg(Scratch, RegState::Kill)
      .addReg(ShiftNewVal);
  BuildMI(loop2MBB, DL, TII->get(SC), Scratch)
      .addReg(Scratch, RegState::Kill)
      .addReg(Ptr)
      .addImm(0);
  BuildMI(loop2MBB, DL, TII->get(BEQ))
      .addReg(Scratch, RegState::Kill)
      .addReg(ZERO)
      .addMBB(loop1MBB);

  //  sinkMBB:
  //    srl     srlres, Mask', shiftamt
  //    sign_extend dest,srlres
  BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest)
      .addReg(Scratch2)
      .addReg(ShiftAmnt);
  if (STI->hasMips32r2()) {
    BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest);
  } else {
    const unsigned ShiftImm =
        I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I16_POSTRA ? 16 : 24;
    BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest)
        .addReg(Dest, RegState::Kill)
        .addImm(ShiftImm);
    BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest)
        .addReg(Dest, RegState::Kill)
        .addImm(ShiftImm);
  }

  LivePhysRegs LiveRegs;
  computeAndAddLiveIns(LiveRegs, *loop1MBB);
  computeAndAddLiveIns(LiveRegs, *loop2MBB);
  computeAndAddLiveIns(LiveRegs, *sinkMBB);
  computeAndAddLiveIns(LiveRegs, *exitMBB);

  NMBBI = BB.end();
  I->eraseFromParent();
  return true;
}
Ejemplo n.º 18
0
void
AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {
  AArch64MachineFunctionInfo *FuncInfo =
    MF.getInfo<AArch64MachineFunctionInfo>();

  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  DebugLoc DL = MBBI->getDebugLoc();
  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
  MachineFrameInfo &MFI = *MF.getFrameInfo();
  unsigned RetOpcode = MBBI->getOpcode();

  // Initial and residual are named for consitency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
  uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
  uint64_t ArgumentPopSize = 0;
  if (RetOpcode == AArch64::TC_RETURNdi ||
      RetOpcode == AArch64::TC_RETURNxi) {
    MachineOperand &JumpTarget = MBBI->getOperand(0);
    MachineOperand &StackAdjust = MBBI->getOperand(1);

    MachineInstrBuilder MIB;
    if (RetOpcode == AArch64::TC_RETURNdi) {
      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
      if (JumpTarget.isGlobal()) {
        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                             JumpTarget.getTargetFlags());
      } else {
        assert(JumpTarget.isSymbol() && "unexpected tail call destination");
        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                              JumpTarget.getTargetFlags());
      }
    } else {
      assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
             && "Unexpected tail call");

      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
      MIB.addReg(JumpTarget.getReg(), RegState::Kill);
    }

    // Add the extra operands onto the new tail call instruction even though
    // they're not used directly (so that liveness is tracked properly etc).
    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
        MIB->addOperand(MBBI->getOperand(i));


    // Delete the pseudo instruction TC_RETURN.
    MachineInstr *NewMI = prior(MBBI);
    MBB.erase(MBBI);
    MBBI = NewMI;

    // For a tail-call in a callee-pops-arguments environment, some or all of
    // the stack may actually be in use for the call's arguments, this is
    // calculated during LowerCall and consumed here...
    ArgumentPopSize = StackAdjust.getImm();
  } else {
    // ... otherwise the amount to pop is *all* of the argument space,
    // conveniently stored in the MachineFunctionInfo by
    // LowerFormalArguments. This will, of course, be zero for the C calling
    // convention.
    ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
  }

  assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
         && "refusing to adjust stack by misaligned amt");

  // We may need to address callee-saved registers differently, so find out the
  // bound on the frame indices.
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  int MinCSFI = 0;
  int MaxCSFI = -1;

  if (CSI.size()) {
    MinCSFI = CSI[0].getFrameIdx();
    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
  }

  // The "residual" stack update comes first from this direction and guarantees
  // that SP is NumInitialBytes below its value on function entry, either by a
  // direct update or restoring it from the frame pointer.
  if (NumInitialBytes + ArgumentPopSize != 0) {
    emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
                 NumInitialBytes + ArgumentPopSize);
    --MBBI;
  }


  // MBBI now points to the instruction just past the last callee-saved
  // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
  // otherwise).

  // Now we need to find out where to put the bulk of the stack adjustment
  MachineBasicBlock::iterator FirstEpilogue = MBBI;
  while (MBBI != MBB.begin()) {
    --MBBI;

    unsigned FrameOp;
    for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
      if (MBBI->getOperand(FrameOp).isFI())
        break;
    }

    // If this instruction doesn't have a frame index we've reached the end of
    // the callee-save restoration.
    if (FrameOp == MBBI->getNumOperands())
      break;

    // Likewise if it *is* a local reference, but not to a callee-saved object.
    int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
    if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
      break;

    FirstEpilogue = MBBI;
  }

  if (MF.getFrameInfo()->hasVarSizedObjects()) {
    int64_t StaticFrameBase;
    StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
    emitRegUpdate(MBB, FirstEpilogue, DL, TII,
                  AArch64::XSP, AArch64::X29, AArch64::NoRegister,
                  StaticFrameBase);
  } else {
    emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
  }
}
Ejemplo n.º 19
0
/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
/// a spill at a better location.
bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
  SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
  VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
  assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
  SibValueMap::iterator I = SibValues.find(VNI);
  if (I == SibValues.end())
    return false;

  const SibValueInfo &SVI = I->second;

  // Let the normal folding code deal with the boring case.
  if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
    return false;

  // SpillReg may have been deleted by remat and DCE.
  if (!LIS.hasInterval(SVI.SpillReg)) {
    DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
    SibValues.erase(I);
    return false;
  }

  LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
  if (!SibLI.containsValue(SVI.SpillVNI)) {
    DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
    SibValues.erase(I);
    return false;
  }

  // Conservatively extend the stack slot range to the range of the original
  // value. We may be able to do better with stack slot coloring by being more
  // careful here.
  assert(StackInt && "No stack slot assigned yet.");
  LiveInterval &OrigLI = LIS.getInterval(Original);
  VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
  StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
  DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
               << *StackInt << '\n');

  // Already spilled everywhere.
  if (SVI.AllDefsAreReloads) {
    DEBUG(dbgs() << "\tno spill needed: " << SVI);
    ++NumOmitReloadSpill;
    return true;
  }
  // We are going to spill SVI.SpillVNI immediately after its def, so clear out
  // any later spills of the same value.
  eliminateRedundantSpills(SibLI, SVI.SpillVNI);

  MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
  MachineBasicBlock::iterator MII;
  if (SVI.SpillVNI->isPHIDef())
    MII = MBB->SkipPHIsAndLabels(MBB->begin());
  else {
    MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
    assert(DefMI && "Defining instruction disappeared");
    MII = DefMI;
    ++MII;
  }
  // Insert spill without kill flag immediately after def.
  TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
                          MRI.getRegClass(SVI.SpillReg), &TRI);
  --MII; // Point to store instruction.
  LIS.InsertMachineInstrInMaps(MII);
  DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);

  ++NumSpills;
  ++NumHoists;
  return true;
}
Ejemplo n.º 20
0
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
    bool Modified = false;

    MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
    while (MBBI != E) {
        MachineInstr &MI = *MBBI;
        MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);

        bool ModifiedOp = true;
        unsigned Opcode = MI.getOpcode();
        switch (Opcode) {
        default:
            ModifiedOp = false;
            break;

        case ARM::tLDRpci_pic:
        case ARM::t2LDRpci_pic: {
            unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
                                ? ARM::tLDRpci : ARM::t2LDRpci;
            unsigned DstReg = MI.getOperand(0).getReg();
            bool DstIsDead = MI.getOperand(0).isDead();
            MachineInstrBuilder MIB1 =
                AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                       TII->get(NewLdOpc), DstReg)
                               .addOperand(MI.getOperand(1)));
            (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
            MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                               TII->get(ARM::tPICADD))
                                       .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
                                       .addReg(DstReg)
                                       .addOperand(MI.getOperand(2));
            TransferImpOps(MI, MIB1, MIB2);
            MI.eraseFromParent();
            break;
        }

        case ARM::MOVi32imm:
        case ARM::t2MOVi32imm: {
            unsigned PredReg = 0;
            ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
            unsigned DstReg = MI.getOperand(0).getReg();
            bool DstIsDead = MI.getOperand(0).isDead();
            const MachineOperand &MO = MI.getOperand(1);
            MachineInstrBuilder LO16, HI16;

            LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                           TII->get(Opcode == ARM::MOVi32imm ?
                                    ARM::MOVi16 : ARM::t2MOVi16),
                           DstReg);
            HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                           TII->get(Opcode == ARM::MOVi32imm ?
                                    ARM::MOVTi16 : ARM::t2MOVTi16))
                   .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
                   .addReg(DstReg);

            if (MO.isImm()) {
                unsigned Imm = MO.getImm();
                unsigned Lo16 = Imm & 0xffff;
                unsigned Hi16 = (Imm >> 16) & 0xffff;
                LO16 = LO16.addImm(Lo16);
                HI16 = HI16.addImm(Hi16);
            } else {
                const GlobalValue *GV = MO.getGlobal();
                unsigned TF = MO.getTargetFlags();
                LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
                HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
            }
            (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
            (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
            LO16.addImm(Pred).addReg(PredReg);
            HI16.addImm(Pred).addReg(PredReg);
            TransferImpOps(MI, LO16, HI16);
            MI.eraseFromParent();
            break;
        }

        case ARM::VMOVQQ: {
            unsigned DstReg = MI.getOperand(0).getReg();
            bool DstIsDead = MI.getOperand(0).isDead();
            unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
            unsigned OddDst  = TRI->getSubReg(DstReg, ARM::qsub_1);
            unsigned SrcReg = MI.getOperand(1).getReg();
            bool SrcIsKill = MI.getOperand(1).isKill();
            unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
            unsigned OddSrc  = TRI->getSubReg(SrcReg, ARM::qsub_1);
            MachineInstrBuilder Even =
                AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                       TII->get(ARM::VMOVQ))
                               .addReg(EvenDst,
                                       getDefRegState(true) | getDeadRegState(DstIsDead))
                               .addReg(EvenSrc, getKillRegState(SrcIsKill)));
            MachineInstrBuilder Odd =
                AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                       TII->get(ARM::VMOVQ))
                               .addReg(OddDst,
                                       getDefRegState(true) | getDeadRegState(DstIsDead))
                               .addReg(OddSrc, getKillRegState(SrcIsKill)));
            TransferImpOps(MI, Even, Odd);
            MI.eraseFromParent();
            break;
        }

        case ARM::VLDMQ: {
            MachineInstrBuilder MIB =
                BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VLDMD));
            unsigned OpIdx = 0;
            // Grab the Q register destination.
            bool DstIsDead = MI.getOperand(OpIdx).isDead();
            unsigned DstReg = MI.getOperand(OpIdx++).getReg();
            // Copy the addrmode4 operands.
            MIB.addOperand(MI.getOperand(OpIdx++));
            MIB.addOperand(MI.getOperand(OpIdx++));
            // Copy the predicate operands.
            MIB.addOperand(MI.getOperand(OpIdx++));
            MIB.addOperand(MI.getOperand(OpIdx++));
            // Add the destination operands (D subregs).
            unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
            unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
            MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
            .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
            // Add an implicit def for the super-register.
            MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
            TransferImpOps(MI, MIB, MIB);
            MI.eraseFromParent();
            break;
        }

        case ARM::VSTMQ: {
            MachineInstrBuilder MIB =
                BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VSTMD));
            unsigned OpIdx = 0;
            // Grab the Q register source.
            bool SrcIsKill = MI.getOperand(OpIdx).isKill();
            unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
            // Copy the addrmode4 operands.
            MIB.addOperand(MI.getOperand(OpIdx++));
            MIB.addOperand(MI.getOperand(OpIdx++));
            // Copy the predicate operands.
            MIB.addOperand(MI.getOperand(OpIdx++));
            MIB.addOperand(MI.getOperand(OpIdx++));
            // Add the source operands (D subregs).
            unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
            unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
            MIB.addReg(D0).addReg(D1);
            if (SrcIsKill)
                // Add an implicit kill for the Q register.
                (*MIB).addRegisterKilled(SrcReg, TRI, true);
            TransferImpOps(MI, MIB, MIB);
            MI.eraseFromParent();
            break;
        }
        case ARM::VDUPfqf:
        case ARM::VDUPfdf: {
            unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd;
            MachineInstrBuilder MIB =
                BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
            unsigned OpIdx = 0;
            unsigned SrcReg = MI.getOperand(1).getReg();
            unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
            unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
                            Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
            // The lane is [0,1] for the containing DReg superregister.
            // Copy the dst/src register operands.
            MIB.addOperand(MI.getOperand(OpIdx++));
            MIB.addReg(DReg);
            ++OpIdx;
            // Add the lane select operand.
            MIB.addImm(Lane);
            // Add the predicate operands.
            MIB.addOperand(MI.getOperand(OpIdx++));
            MIB.addOperand(MI.getOperand(OpIdx++));

            TransferImpOps(MI, MIB, MIB);
            MI.eraseFromParent();
            break;
        }

        case ARM::VLD1q8Pseudo:
        case ARM::VLD1q16Pseudo:
        case ARM::VLD1q32Pseudo:
        case ARM::VLD1q64Pseudo:
        case ARM::VLD1q8Pseudo_UPD:
        case ARM::VLD1q16Pseudo_UPD:
        case ARM::VLD1q32Pseudo_UPD:
        case ARM::VLD1q64Pseudo_UPD:
        case ARM::VLD2d8Pseudo:
        case ARM::VLD2d16Pseudo:
        case ARM::VLD2d32Pseudo:
        case ARM::VLD2q8Pseudo:
        case ARM::VLD2q16Pseudo:
        case ARM::VLD2q32Pseudo:
        case ARM::VLD2d8Pseudo_UPD:
        case ARM::VLD2d16Pseudo_UPD:
        case ARM::VLD2d32Pseudo_UPD:
        case ARM::VLD2q8Pseudo_UPD:
        case ARM::VLD2q16Pseudo_UPD:
        case ARM::VLD2q32Pseudo_UPD:
        case ARM::VLD3d8Pseudo:
        case ARM::VLD3d16Pseudo:
        case ARM::VLD3d32Pseudo:
        case ARM::VLD1d64TPseudo:
        case ARM::VLD3d8Pseudo_UPD:
        case ARM::VLD3d16Pseudo_UPD:
        case ARM::VLD3d32Pseudo_UPD:
        case ARM::VLD1d64TPseudo_UPD:
        case ARM::VLD3q8Pseudo_UPD:
        case ARM::VLD3q16Pseudo_UPD:
        case ARM::VLD3q32Pseudo_UPD:
        case ARM::VLD3q8oddPseudo_UPD:
        case ARM::VLD3q16oddPseudo_UPD:
        case ARM::VLD3q32oddPseudo_UPD:
        case ARM::VLD4d8Pseudo:
        case ARM::VLD4d16Pseudo:
        case ARM::VLD4d32Pseudo:
        case ARM::VLD1d64QPseudo:
        case ARM::VLD4d8Pseudo_UPD:
        case ARM::VLD4d16Pseudo_UPD:
        case ARM::VLD4d32Pseudo_UPD:
        case ARM::VLD1d64QPseudo_UPD:
        case ARM::VLD4q8Pseudo_UPD:
        case ARM::VLD4q16Pseudo_UPD:
        case ARM::VLD4q32Pseudo_UPD:
        case ARM::VLD4q8oddPseudo_UPD:
        case ARM::VLD4q16oddPseudo_UPD:
        case ARM::VLD4q32oddPseudo_UPD:
            ExpandVLD(MBBI);
            break;

        case ARM::VST1q8Pseudo:
        case ARM::VST1q16Pseudo:
        case ARM::VST1q32Pseudo:
        case ARM::VST1q64Pseudo:
        case ARM::VST1q8Pseudo_UPD:
        case ARM::VST1q16Pseudo_UPD:
        case ARM::VST1q32Pseudo_UPD:
        case ARM::VST1q64Pseudo_UPD:
        case ARM::VST2d8Pseudo:
        case ARM::VST2d16Pseudo:
        case ARM::VST2d32Pseudo:
        case ARM::VST2q8Pseudo:
        case ARM::VST2q16Pseudo:
        case ARM::VST2q32Pseudo:
        case ARM::VST2d8Pseudo_UPD:
        case ARM::VST2d16Pseudo_UPD:
        case ARM::VST2d32Pseudo_UPD:
        case ARM::VST2q8Pseudo_UPD:
        case ARM::VST2q16Pseudo_UPD:
        case ARM::VST2q32Pseudo_UPD:
        case ARM::VST3d8Pseudo:
        case ARM::VST3d16Pseudo:
        case ARM::VST3d32Pseudo:
        case ARM::VST1d64TPseudo:
        case ARM::VST3d8Pseudo_UPD:
        case ARM::VST3d16Pseudo_UPD:
        case ARM::VST3d32Pseudo_UPD:
        case ARM::VST1d64TPseudo_UPD:
        case ARM::VST3q8Pseudo_UPD:
        case ARM::VST3q16Pseudo_UPD:
        case ARM::VST3q32Pseudo_UPD:
        case ARM::VST3q8oddPseudo_UPD:
        case ARM::VST3q16oddPseudo_UPD:
        case ARM::VST3q32oddPseudo_UPD:
        case ARM::VST4d8Pseudo:
        case ARM::VST4d16Pseudo:
        case ARM::VST4d32Pseudo:
        case ARM::VST1d64QPseudo:
        case ARM::VST4d8Pseudo_UPD:
        case ARM::VST4d16Pseudo_UPD:
        case ARM::VST4d32Pseudo_UPD:
        case ARM::VST1d64QPseudo_UPD:
        case ARM::VST4q8Pseudo_UPD:
        case ARM::VST4q16Pseudo_UPD:
        case ARM::VST4q32Pseudo_UPD:
        case ARM::VST4q8oddPseudo_UPD:
        case ARM::VST4q16oddPseudo_UPD:
        case ARM::VST4q32oddPseudo_UPD:
            ExpandVST(MBBI);
            break;

        case ARM::VLD2LNd8Pseudo:
        case ARM::VLD2LNd16Pseudo:
        case ARM::VLD2LNd32Pseudo:
        case ARM::VLD2LNq16Pseudo:
        case ARM::VLD2LNq32Pseudo:
        case ARM::VLD2LNd8Pseudo_UPD:
        case ARM::VLD2LNd16Pseudo_UPD:
        case ARM::VLD2LNd32Pseudo_UPD:
        case ARM::VLD2LNq16Pseudo_UPD:
        case ARM::VLD2LNq32Pseudo_UPD:
        case ARM::VLD3LNd8Pseudo:
        case ARM::VLD3LNd16Pseudo:
        case ARM::VLD3LNd32Pseudo:
        case ARM::VLD3LNq16Pseudo:
        case ARM::VLD3LNq32Pseudo:
        case ARM::VLD3LNd8Pseudo_UPD:
        case ARM::VLD3LNd16Pseudo_UPD:
        case ARM::VLD3LNd32Pseudo_UPD:
        case ARM::VLD3LNq16Pseudo_UPD:
        case ARM::VLD3LNq32Pseudo_UPD:
        case ARM::VLD4LNd8Pseudo:
        case ARM::VLD4LNd16Pseudo:
        case ARM::VLD4LNd32Pseudo:
        case ARM::VLD4LNq16Pseudo:
        case ARM::VLD4LNq32Pseudo:
        case ARM::VLD4LNd8Pseudo_UPD:
        case ARM::VLD4LNd16Pseudo_UPD:
        case ARM::VLD4LNd32Pseudo_UPD:
        case ARM::VLD4LNq16Pseudo_UPD:
        case ARM::VLD4LNq32Pseudo_UPD:
        case ARM::VST2LNd8Pseudo:
        case ARM::VST2LNd16Pseudo:
        case ARM::VST2LNd32Pseudo:
        case ARM::VST2LNq16Pseudo:
        case ARM::VST2LNq32Pseudo:
        case ARM::VST2LNd8Pseudo_UPD:
        case ARM::VST2LNd16Pseudo_UPD:
        case ARM::VST2LNd32Pseudo_UPD:
        case ARM::VST2LNq16Pseudo_UPD:
        case ARM::VST2LNq32Pseudo_UPD:
        case ARM::VST3LNd8Pseudo:
        case ARM::VST3LNd16Pseudo:
        case ARM::VST3LNd32Pseudo:
        case ARM::VST3LNq16Pseudo:
        case ARM::VST3LNq32Pseudo:
        case ARM::VST3LNd8Pseudo_UPD:
        case ARM::VST3LNd16Pseudo_UPD:
        case ARM::VST3LNd32Pseudo_UPD:
        case ARM::VST3LNq16Pseudo_UPD:
        case ARM::VST3LNq32Pseudo_UPD:
        case ARM::VST4LNd8Pseudo:
        case ARM::VST4LNd16Pseudo:
        case ARM::VST4LNd32Pseudo:
        case ARM::VST4LNq16Pseudo:
        case ARM::VST4LNq32Pseudo:
        case ARM::VST4LNd8Pseudo_UPD:
        case ARM::VST4LNd16Pseudo_UPD:
        case ARM::VST4LNd32Pseudo_UPD:
        case ARM::VST4LNq16Pseudo_UPD:
        case ARM::VST4LNq32Pseudo_UPD:
            ExpandLaneOp(MBBI);
            break;

        case ARM::VTBL2Pseudo:
            ExpandVTBL(MBBI, ARM::VTBL2, false, 2);
            break;
        case ARM::VTBL3Pseudo:
            ExpandVTBL(MBBI, ARM::VTBL3, false, 3);
            break;
        case ARM::VTBL4Pseudo:
            ExpandVTBL(MBBI, ARM::VTBL4, false, 4);
            break;
        case ARM::VTBX2Pseudo:
            ExpandVTBL(MBBI, ARM::VTBX2, true, 2);
            break;
        case ARM::VTBX3Pseudo:
            ExpandVTBL(MBBI, ARM::VTBX3, true, 3);
            break;
        case ARM::VTBX4Pseudo:
            ExpandVTBL(MBBI, ARM::VTBX4, true, 4);
            break;
        }
Ejemplo n.º 21
0
bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 
                                  MachineBasicBlock *&TBB,
                                  MachineBasicBlock *&FBB,
                                  SmallVectorImpl<MachineOperand> &Cond,
                                  bool AllowModify) const 
{
  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
    return false;
  
  // Get the last instruction in the block.
  MachineInstr *LastInst = I;
  
  // If there is only one terminator instruction, process it.
  unsigned LastOpc = LastInst->getOpcode();
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    if (!LastInst->getDesc().isBranch())
      return true;

    // Unconditional branch
    if (LastOpc == Mips::J) {
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    }

    Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
    if (BranchCode == Mips::COND_INVALID)
      return true;  // Can't handle indirect branch.

    // Conditional branch
    // Block ends with fall-through condbranch.
    if (LastOpc != Mips::COND_INVALID) {
      int LastNumOp = LastInst->getNumOperands();

      TBB = LastInst->getOperand(LastNumOp-1).getMBB();
      Cond.push_back(MachineOperand::CreateImm(BranchCode));

      for (int i=0; i<LastNumOp-1; i++) {
        Cond.push_back(LastInst->getOperand(i));
      }

      return false;
    }
  }
  
  // Get the instruction before it if it is a terminator.
  MachineInstr *SecondLastInst = I;
  
  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
    return true;

  // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it.
  unsigned SecondLastOpc    = SecondLastInst->getOpcode();
  Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);

  if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) {
    int SecondNumOp = SecondLastInst->getNumOperands();

    TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB();
    Cond.push_back(MachineOperand::CreateImm(BranchCode));

    for (int i=0; i<SecondNumOp-1; i++) {
      Cond.push_back(SecondLastInst->getOperand(i));
    }

    FBB = LastInst->getOperand(0).getMBB();
    return false;
  }
  
  // If the block ends with two unconditional branches, handle it. The last 
  // one is not executed, so remove it.
  if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) {
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}
Ejemplo n.º 22
0
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
  // be close to the source to make it easier to coalesce.
  if (AvoidsSinking(MI, MRI))
    return false;

  // Check if it's safe to move the instruction.
  if (!MI->isSafeToMove(TII, AA, SawStore))
    return false;

  // FIXME: This should include support for sinking instructions within the
  // block they are currently in to shorten the live ranges.  We often get
  // instructions sunk into the top of a large block, but it would be better to
  // also sink them down before their first use in the block.  This xform has to
  // be careful not to *increase* register pressure though, e.g. sinking
  // "x = y + z" down if it kills y and z would increase the live ranges of y
  // and z and only shrink the live range of x.

  bool BreakPHIEdge = false;
  MachineBasicBlock *ParentBlock = MI->getParent();
  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);

  // If there are no outputs, it must have side-effects.
  if (SuccToSinkTo == 0)
    return false;


  // If the instruction to move defines a dead physical register which is live
  // when leaving the basic block, don't move it because it could turn into a
  // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
    const MachineOperand &MO = MI->getOperand(I);
    if (!MO.isReg()) continue;
    unsigned Reg = MO.getReg();
    if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
    if (SuccToSinkTo->isLiveIn(Reg))
      return false;
  }

  DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);

  // If the block has multiple predecessors, this would introduce computation on
  // a path that it doesn't already exist.  We could split the critical edge,
  // but for now we just punt.
  if (SuccToSinkTo->pred_size() > 1) {
    // We cannot sink a load across a critical edge - there may be stores in
    // other code paths.
    bool TryBreak = false;
    bool store = true;
    if (!MI->isSafeToMove(TII, AA, store)) {
      DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
      TryBreak = true;
    }

    // We don't want to sink across a critical edge if we don't dominate the
    // successor. We could be introducing calculations to new code paths.
    if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
      DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
      TryBreak = true;
    }

    // Don't sink instructions into a loop.
    if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
      DEBUG(dbgs() << " *** NOTE: Loop header found\n");
      TryBreak = true;
    }

    // Otherwise we are OK with sinking along a critical edge.
    if (!TryBreak)
      DEBUG(dbgs() << "Sinking along critical edge.\n");
    else {
      MachineBasicBlock *NewSucc =
        SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
      if (!NewSucc) {
        DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
                        "break critical edge\n");
        return false;
      } else {
        DEBUG(dbgs() << " *** Splitting critical edge:"
              " BB#" << ParentBlock->getNumber()
              << " -- BB#" << NewSucc->getNumber()
              << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
        SuccToSinkTo = NewSucc;
        ++NumSplit;
        BreakPHIEdge = false;
      }
    }
  }

  if (BreakPHIEdge) {
    // BreakPHIEdge is true if all the uses are in the successor MBB being
    // sunken into and they are all PHI nodes. In this case, machine-sink must
    // break the critical edge first.
    MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
                                                   SuccToSinkTo, BreakPHIEdge);
    if (!NewSucc) {
      DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
            "break critical edge\n");
      return false;
    }

    DEBUG(dbgs() << " *** Splitting critical edge:"
          " BB#" << ParentBlock->getNumber()
          << " -- BB#" << NewSucc->getNumber()
          << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
    SuccToSinkTo = NewSucc;
    ++NumSplit;
  }

  // Determine where to insert into. Skip phi nodes.
  MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
  while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
    ++InsertPos;

  // collect matching debug values.
  SmallVector<MachineInstr *, 2> DbgValuesToSink;
  collectDebugValues(MI, DbgValuesToSink);

  // Move the instruction.
  SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
                       ++MachineBasicBlock::iterator(MI));

  // Move debug values.
  for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(),
         DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
    MachineInstr *DbgMI = *DBI;
    SuccToSinkTo->splice(InsertPos, ParentBlock,  DbgMI,
                         ++MachineBasicBlock::iterator(DbgMI));
  }

  // Conservatively, clear any kill flags, since it's possible that they are no
  // longer correct.
  MI->clearKillInfo();

  return true;
}
Ejemplo n.º 23
0
bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
    bool Changed = false;
    InstrIndexMap I2X;
    DefUseInfoMap DUM;
    buildMaps(B, I2X, DUM);

    typedef DenseMap<unsigned,CondsetInfo> CondsetMap;
    CondsetMap CM;
    MuxInfoList ML;

    MachineBasicBlock::iterator NextI, End = B.end();
    for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) {
        MachineInstr *MI = &*I;
        NextI = std::next(I);
        unsigned Opc = MI->getOpcode();
        if (!isCondTransfer(Opc))
            continue;
        unsigned DR = MI->getOperand(0).getReg();
        if (isRegPair(DR))
            continue;

        unsigned PR = MI->getOperand(1).getReg();
        unsigned Idx = I2X.lookup(MI);
        CondsetMap::iterator F = CM.find(DR);
        bool IfTrue = HII->isPredicatedTrue(Opc);

        // If there is no record of a conditional transfer for this register,
        // or the predicate register differs, create a new record for it.
        if (F != CM.end() && F->second.PredR != PR) {
            CM.erase(F);
            F = CM.end();
        }
        if (F == CM.end()) {
            auto It = CM.insert(std::make_pair(DR, CondsetInfo()));
            F = It.first;
            F->second.PredR = PR;
        }
        CondsetInfo &CI = F->second;
        if (IfTrue)
            CI.TrueX = Idx;
        else
            CI.FalseX = Idx;
        if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX)
            continue;

        // There is now a complete definition of DR, i.e. we have the predicate
        // register, the definition if-true, and definition if-false.

        // First, check if both definitions are far enough from the definition
        // of the predicate register.
        unsigned MinX = std::min(CI.TrueX, CI.FalseX);
        unsigned MaxX = std::max(CI.TrueX, CI.FalseX);
        unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0;
        bool NearDef = false;
        for (unsigned X = SearchX; X < MaxX; ++X) {
            const DefUseInfo &DU = DUM.lookup(X);
            if (!DU.Defs[PR])
                continue;
            NearDef = true;
            break;
        }
        if (NearDef)
            continue;

        // The predicate register is not defined in the last few instructions.
        // Check if the conversion to MUX is possible (either "up", i.e. at the
        // place of the earlier partial definition, or "down", where the later
        // definition is located). Examine all defs and uses between these two
        // definitions.
        // SR1, SR2 - source registers from the first and the second definition.
        MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin();
        std::advance(It1, MinX);
        std::advance(It2, MaxX);
        MachineInstr *Def1 = It1, *Def2 = It2;
        MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2);
        unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
        unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
        bool Failure = false, CanUp = true, CanDown = true;
        for (unsigned X = MinX+1; X < MaxX; X++) {
            const DefUseInfo &DU = DUM.lookup(X);
            if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) {
                Failure = true;
                break;
            }
            if (CanDown && DU.Defs[SR1])
                CanDown = false;
            if (CanUp && DU.Defs[SR2])
                CanUp = false;
        }
        if (Failure || (!CanUp && !CanDown))
            continue;

        MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2;
        MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2;
        // Prefer "down", since this will move the MUX farther away from the
        // predicate definition.
        MachineBasicBlock::iterator At = CanDown ? Def2 : Def1;
        ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2));
    }

    for (unsigned I = 0, N = ML.size(); I < N; ++I) {
        MuxInfo &MX = ML[I];
        MachineBasicBlock &B = *MX.At->getParent();
        DebugLoc DL = MX.At->getDebugLoc();
        unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
        if (!MxOpc)
            continue;
        BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
        .addReg(MX.PredR)
        .addOperand(*MX.SrcT)
        .addOperand(*MX.SrcF);
        B.erase(MX.Def1);
        B.erase(MX.Def2);
        Changed = true;
    }

    return Changed;
}
Ejemplo n.º 24
0
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  assert((MBBI->getOpcode() == ARM::tBX_RET ||
          MBBI->getOpcode() == ARM::tPOP_RET) &&
         "Can only insert epilog into returning blocks");
  DebugLoc dl = MBBI->getDebugLoc();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
  const Thumb1RegisterInfo *RegInfo =
    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
  const Thumb1InstrInfo &TII =
    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());

  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
  int NumBytes = (int)MFI->getStackSize();
  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
  unsigned FramePtr = RegInfo->getFrameRegister(MF);

  if (!AFI->hasStackFrame()) {
    if (NumBytes != 0)
      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
  } else {
    // Unwind MBBI to point to first LDR / VLDRD.
    if (MBBI != MBB.begin()) {
      do
        --MBBI;
      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
      if (!isCSRestore(MBBI, CSRegs))
        ++MBBI;
    }

    // Move SP to start of FP callee save spill area.
    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
                 AFI->getGPRCalleeSavedArea2Size() +
                 AFI->getDPRCalleeSavedAreaSize());

    if (AFI->shouldRestoreSPFromFP()) {
      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
      // Reset SP based on frame pointer only if the stack frame extends beyond
      // frame pointer stack slot, the target is ELF and the function has FP, or
      // the target uses var sized objects.
      if (NumBytes) {
        assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
               "No scratch register to restore SP from FP!");
        emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                  TII, *RegInfo);
        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
                               ARM::SP)
          .addReg(ARM::R4));
      } else
        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
                               ARM::SP)
          .addReg(FramePtr));
    } else {
      if (MBBI->getOpcode() == ARM::tBX_RET &&
          &MBB.front() != MBBI &&
          prior(MBBI)->getOpcode() == ARM::tPOP) {
        MachineBasicBlock::iterator PMBBI = prior(MBBI);
        emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
      } else
        emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
    }
  }

  if (VARegSaveSize) {
    // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
    // to LR, and we can't pop the value directly to the PC since
    // we need to update the SP after popping the value. Therefore, we
    // pop the old LR into R3 as a temporary.

    // Move back past the callee-saved register restoration
    while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
      ++MBBI;
    // Epilogue for vararg functions: pop LR to R3 and branch off it.
    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
      .addReg(ARM::R3, RegState::Define);

    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);

    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
      .addReg(ARM::R3, RegState::Kill));
    // erase the old tBX_RET instruction
    MBB.erase(MBBI);
  }
}
Ejemplo n.º 25
0
void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
                                      MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const ARM64InstrInfo *TII =
      static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
  const ARM64RegisterInfo *RegInfo =
      static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
  DebugLoc DL = MBBI->getDebugLoc();
  unsigned RetOpcode = MBBI->getOpcode();

  int NumBytes = MFI->getStackSize();
  const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();

  // Initial and residual are named for consitency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t ArgumentPopSize = 0;
  if (RetOpcode == ARM64::TCRETURNdi || RetOpcode == ARM64::TCRETURNri) {
    MachineOperand &StackAdjust = MBBI->getOperand(1);

    // For a tail-call in a callee-pops-arguments environment, some or all of
    // the stack may actually be in use for the call's arguments, this is
    // calculated during LowerCall and consumed here...
    ArgumentPopSize = StackAdjust.getImm();
  } else {
    // ... otherwise the amount to pop is *all* of the argument space,
    // conveniently stored in the MachineFunctionInfo by
    // LowerFormalArguments. This will, of course, be zero for the C calling
    // convention.
    ArgumentPopSize = AFI->getArgumentStackToRestore();
  }

  // The stack frame should be like below,
  //
  //      ----------------------                     ---
  //      |                    |                      |
  //      | BytesInStackArgArea|              CalleeArgStackSize
  //      | (NumReusableBytes) |                (of tail call)
  //      |                    |                     ---
  //      |                    |                      |
  //      ---------------------|        ---           |
  //      |                    |         |            |
  //      |   CalleeSavedReg   |         |            |
  //      | (NumRestores * 16) |         |            |
  //      |                    |         |            |
  //      ---------------------|         |         NumBytes
  //      |                    |     StackSize  (StackAdjustUp)
  //      |   LocalStackSize   |         |            |
  //      | (covering callee   |         |            |
  //      |       args)        |         |            |
  //      |                    |         |            |
  //      ----------------------        ---          ---
  //
  // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
  //             = StackSize + ArgumentPopSize
  //
  // ARM64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
  // it as the 2nd argument of ARM64ISD::TC_RETURN.
  NumBytes += ArgumentPopSize;

  unsigned NumRestores = 0;
  // Move past the restores of the callee-saved registers.
  MachineBasicBlock::iterator LastPopI = MBBI;
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
  if (LastPopI != MBB.begin()) {
    do {
      ++NumRestores;
      --LastPopI;
    } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
    if (!isCSRestore(LastPopI, CSRegs)) {
      ++LastPopI;
      --NumRestores;
    }
  }
  NumBytes -= NumRestores * 16;
  assert(NumBytes >= 0 && "Negative stack allocation size!?");

  if (!hasFP(MF)) {
    // If this was a redzone leaf function, we don't need to restore the
    // stack pointer.
    if (!canUseRedZone(MF))
      emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
    return;
  }

  // Restore the original stack pointer.
  // FIXME: Rather than doing the math here, we should instead just use
  // non-post-indexed loads for the restores if we aren't actually going to
  // be able to save any instructions.
  if (NumBytes || MFI->hasVarSizedObjects())
    emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
                    -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
}
Ejemplo n.º 26
0
bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB,
                                           MachineBasicBlock::iterator I,
                                           MachineBasicBlock::iterator &NMBBI) {

  const unsigned Size =
      I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I32_POSTRA ? 4 : 8;
  MachineFunction *MF = BB.getParent();

  const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
  DebugLoc DL = I->getDebugLoc();

  unsigned LL, SC, ZERO, BNE, BEQ, MOVE;

  if (Size == 4) {
    if (STI->inMicroMipsMode()) {
      LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
      SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
      BNE = STI->hasMips32r6() ? Mips::BNEC_MMR6 : Mips::BNE_MM;
      BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
    } else {
      LL = STI->hasMips32r6()
               ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
               : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
      SC = STI->hasMips32r6()
               ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
               : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
      BNE = Mips::BNE;
      BEQ = Mips::BEQ;
    }

    ZERO = Mips::ZERO;
    MOVE = Mips::OR;
  } else {
    LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
    SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
    ZERO = Mips::ZERO_64;
    BNE = Mips::BNE64;
    BEQ = Mips::BEQ64;
    MOVE = Mips::OR64;
  }

  unsigned Dest = I->getOperand(0).getReg();
  unsigned Ptr = I->getOperand(1).getReg();
  unsigned OldVal = I->getOperand(2).getReg();
  unsigned NewVal = I->getOperand(3).getReg();
  unsigned Scratch = I->getOperand(4).getReg();

  // insert new blocks after the current block
  const BasicBlock *LLVM_BB = BB.getBasicBlock();
  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineFunction::iterator It = ++BB.getIterator();
  MF->insert(It, loop1MBB);
  MF->insert(It, loop2MBB);
  MF->insert(It, exitMBB);

  // Transfer the remainder of BB and its successor edges to exitMBB.
  exitMBB->splice(exitMBB->begin(), &BB,
                  std::next(MachineBasicBlock::iterator(I)), BB.end());
  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);

  //  thisMBB:
  //    ...
  //    fallthrough --> loop1MBB
  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
  loop1MBB->addSuccessor(exitMBB);
  loop1MBB->addSuccessor(loop2MBB);
  loop1MBB->normalizeSuccProbs();
  loop2MBB->addSuccessor(loop1MBB);
  loop2MBB->addSuccessor(exitMBB);
  loop2MBB->normalizeSuccProbs();

  // loop1MBB:
  //   ll dest, 0(ptr)
  //   bne dest, oldval, exitMBB
  BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
  BuildMI(loop1MBB, DL, TII->get(BNE))
    .addReg(Dest, RegState::Kill).addReg(OldVal).addMBB(exitMBB);

  // loop2MBB:
  //   move scratch, NewVal
  //   sc Scratch, Scratch, 0(ptr)
  //   beq Scratch, $0, loop1MBB
  BuildMI(loop2MBB, DL, TII->get(MOVE), Scratch).addReg(NewVal).addReg(ZERO);
  BuildMI(loop2MBB, DL, TII->get(SC), Scratch)
    .addReg(Scratch).addReg(Ptr).addImm(0);
  BuildMI(loop2MBB, DL, TII->get(BEQ))
    .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB);

  LivePhysRegs LiveRegs;
  computeAndAddLiveIns(LiveRegs, *loop1MBB);
  computeAndAddLiveIns(LiveRegs, *loop2MBB);
  computeAndAddLiveIns(LiveRegs, *exitMBB);

  NMBBI = BB.end();
  I->eraseFromParent();
  return true;
}
MachineBasicBlock*
MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
                                     MachineBasicBlock *BB) const {
  MachineFunction *F = BB->getParent();
  MachineRegisterInfo &RI = F->getRegInfo();
  DebugLoc dl = MI->getDebugLoc();
  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();

  unsigned Opc;
  const TargetRegisterClass * RC;
  switch (MI->getOpcode()) {
  default:
    assert(0 && "Invalid shift opcode!");
  case MSP430::Shl8:
   Opc = MSP430::SHL8r1;
   RC = MSP430::GR8RegisterClass;
   break;
  case MSP430::Shl16:
   Opc = MSP430::SHL16r1;
   RC = MSP430::GR16RegisterClass;
   break;
  case MSP430::Sra8:
   Opc = MSP430::SAR8r1;
   RC = MSP430::GR8RegisterClass;
   break;
  case MSP430::Sra16:
   Opc = MSP430::SAR16r1;
   RC = MSP430::GR16RegisterClass;
   break;
  case MSP430::Srl8:
   Opc = MSP430::SAR8r1c;
   RC = MSP430::GR8RegisterClass;
   break;
  case MSP430::Srl16:
   Opc = MSP430::SAR16r1c;
   RC = MSP430::GR16RegisterClass;
   break;
  }

  const BasicBlock *LLVM_BB = BB->getBasicBlock();
  MachineFunction::iterator I = BB;
  ++I;

  // Create loop block
  MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *RemBB  = F->CreateMachineBasicBlock(LLVM_BB);

  F->insert(I, LoopBB);
  F->insert(I, RemBB);

  // Update machine-CFG edges by transferring all successors of the current
  // block to the block containing instructions after shift.
  RemBB->splice(RemBB->begin(), BB,
                llvm::next(MachineBasicBlock::iterator(MI)),
                BB->end());
  RemBB->transferSuccessorsAndUpdatePHIs(BB);

  // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
  BB->addSuccessor(LoopBB);
  BB->addSuccessor(RemBB);
  LoopBB->addSuccessor(RemBB);
  LoopBB->addSuccessor(LoopBB);

  unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
  unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
  unsigned ShiftReg = RI.createVirtualRegister(RC);
  unsigned ShiftReg2 = RI.createVirtualRegister(RC);
  unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
  unsigned SrcReg = MI->getOperand(1).getReg();
  unsigned DstReg = MI->getOperand(0).getReg();

  // BB:
  // cmp 0, N
  // je RemBB
  BuildMI(BB, dl, TII.get(MSP430::CMP8ri))
    .addReg(ShiftAmtSrcReg).addImm(0);
  BuildMI(BB, dl, TII.get(MSP430::JCC))
    .addMBB(RemBB)
    .addImm(MSP430CC::COND_E);

  // LoopBB:
  // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
  // ShiftAmt = phi [%N, BB],      [%ShiftAmt2, LoopBB]
  // ShiftReg2 = shift ShiftReg
  // ShiftAmt2 = ShiftAmt - 1;
  BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
    .addReg(SrcReg).addMBB(BB)
    .addReg(ShiftReg2).addMBB(LoopBB);
  BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
    .addReg(ShiftAmtSrcReg).addMBB(BB)
    .addReg(ShiftAmtReg2).addMBB(LoopBB);
  BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
    .addReg(ShiftReg);
  BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
    .addReg(ShiftAmtReg).addImm(1);
  BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
    .addMBB(LoopBB)
    .addImm(MSP430CC::COND_NE);

  // RemBB:
  // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
  BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg)
    .addReg(SrcReg).addMBB(BB)
    .addReg(ShiftReg2).addMBB(LoopBB);

  MI->eraseFromParent();   // The pseudo instruction is gone now.
  return RemBB;
}
Ejemplo n.º 28
0
bool MipsExpandPseudo::expandAtomicBinOpSubword(
    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
    MachineBasicBlock::iterator &NMBBI) {

  MachineFunction *MF = BB.getParent();

  const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
  DebugLoc DL = I->getDebugLoc();

  unsigned LL, SC;
  unsigned BEQ = Mips::BEQ;
  unsigned SEOp = Mips::SEH;

  if (STI->inMicroMipsMode()) {
      LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
      SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
      BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
  } else {
    LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
                            : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
    SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
                            : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
  }

  bool IsSwap = false;
  bool IsNand = false;

  unsigned Opcode = 0;
  switch (I->getOpcode()) {
  case Mips::ATOMIC_LOAD_NAND_I8_POSTRA:
    SEOp = Mips::SEB;
    LLVM_FALLTHROUGH;
  case Mips::ATOMIC_LOAD_NAND_I16_POSTRA:
    IsNand = true;
    break;
  case Mips::ATOMIC_SWAP_I8_POSTRA:
    SEOp = Mips::SEB;
    LLVM_FALLTHROUGH;
  case Mips::ATOMIC_SWAP_I16_POSTRA:
    IsSwap = true;
    break;
  case Mips::ATOMIC_LOAD_ADD_I8_POSTRA:
    SEOp = Mips::SEB;
    LLVM_FALLTHROUGH;
  case Mips::ATOMIC_LOAD_ADD_I16_POSTRA:
    Opcode = Mips::ADDu;
    break;
  case Mips::ATOMIC_LOAD_SUB_I8_POSTRA:
    SEOp = Mips::SEB;
    LLVM_FALLTHROUGH;
  case Mips::ATOMIC_LOAD_SUB_I16_POSTRA:
    Opcode = Mips::SUBu;
    break;
  case Mips::ATOMIC_LOAD_AND_I8_POSTRA:
    SEOp = Mips::SEB;
    LLVM_FALLTHROUGH;
  case Mips::ATOMIC_LOAD_AND_I16_POSTRA:
    Opcode = Mips::AND;
    break;
  case Mips::ATOMIC_LOAD_OR_I8_POSTRA:
    SEOp = Mips::SEB;
    LLVM_FALLTHROUGH;
  case Mips::ATOMIC_LOAD_OR_I16_POSTRA:
    Opcode = Mips::OR;
    break;
  case Mips::ATOMIC_LOAD_XOR_I8_POSTRA:
    SEOp = Mips::SEB;
    LLVM_FALLTHROUGH;
  case Mips::ATOMIC_LOAD_XOR_I16_POSTRA:
    Opcode = Mips::XOR;
    break;
  default:
    llvm_unreachable("Unknown subword atomic pseudo for expansion!");
  }

  unsigned Dest = I->getOperand(0).getReg();
  unsigned Ptr = I->getOperand(1).getReg();
  unsigned Incr = I->getOperand(2).getReg();
  unsigned Mask = I->getOperand(3).getReg();
  unsigned Mask2 = I->getOperand(4).getReg();
  unsigned ShiftAmnt = I->getOperand(5).getReg();
  unsigned OldVal = I->getOperand(6).getReg();
  unsigned BinOpRes = I->getOperand(7).getReg();
  unsigned StoreVal = I->getOperand(8).getReg();

  const BasicBlock *LLVM_BB = BB.getBasicBlock();
  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineFunction::iterator It = ++BB.getIterator();
  MF->insert(It, loopMBB);
  MF->insert(It, sinkMBB);
  MF->insert(It, exitMBB);

  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);

  BB.addSuccessor(loopMBB, BranchProbability::getOne());
  loopMBB->addSuccessor(sinkMBB);
  loopMBB->addSuccessor(loopMBB);
  loopMBB->normalizeSuccProbs();

  BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
  if (IsNand) {
    //  and andres, oldval, incr2
    //  nor binopres, $0, andres
    //  and newval, binopres, mask
    BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes)
        .addReg(OldVal)
        .addReg(Incr);
    BuildMI(loopMBB, DL, TII->get(Mips::NOR), BinOpRes)
        .addReg(Mips::ZERO)
        .addReg(BinOpRes);
    BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes)
        .addReg(BinOpRes)
        .addReg(Mask);
  } else if (!IsSwap) {
    //  <binop> binopres, oldval, incr2
    //  and newval, binopres, mask
    BuildMI(loopMBB, DL, TII->get(Opcode), BinOpRes)
        .addReg(OldVal)
        .addReg(Incr);
    BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes)
        .addReg(BinOpRes)
        .addReg(Mask);
  } else { // atomic.swap
    //  and newval, incr2, mask
    BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes)
        .addReg(Incr)
        .addReg(Mask);
  }

  // and StoreVal, OlddVal, Mask2
  // or StoreVal, StoreVal, BinOpRes
  // StoreVal<tied1> = sc StoreVal, 0(Ptr)
  // beq StoreVal, zero, loopMBB
  BuildMI(loopMBB, DL, TII->get(Mips::AND), StoreVal)
    .addReg(OldVal).addReg(Mask2);
  BuildMI(loopMBB, DL, TII->get(Mips::OR), StoreVal)
    .addReg(StoreVal).addReg(BinOpRes);
  BuildMI(loopMBB, DL, TII->get(SC), StoreVal)
    .addReg(StoreVal).addReg(Ptr).addImm(0);
  BuildMI(loopMBB, DL, TII->get(BEQ))
    .addReg(StoreVal).addReg(Mips::ZERO).addMBB(loopMBB);

  //  sinkMBB:
  //    and     maskedoldval1,oldval,mask
  //    srl     srlres,maskedoldval1,shiftamt
  //    sign_extend dest,srlres

  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());

  BuildMI(sinkMBB, DL, TII->get(Mips::AND), Dest)
    .addReg(OldVal).addReg(Mask);
  BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest)
      .addReg(Dest).addReg(ShiftAmnt);

  if (STI->hasMips32r2()) {
    BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest);
  } else {
    const unsigned ShiftImm = SEOp == Mips::SEH ? 16 : 24;
    BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest)
        .addReg(Dest, RegState::Kill)
        .addImm(ShiftImm);
    BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest)
        .addReg(Dest, RegState::Kill)
        .addImm(ShiftImm);
  }

  LivePhysRegs LiveRegs;
  computeAndAddLiveIns(LiveRegs, *loopMBB);
  computeAndAddLiveIns(LiveRegs, *sinkMBB);
  computeAndAddLiveIns(LiveRegs, *exitMBB);

  NMBBI = BB.end();
  I->eraseFromParent();

  return true;
}
Ejemplo n.º 29
0
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
    MF = &mf;
    TII = MF->getSubtarget().getInstrInfo();
    TRI = MF->getSubtarget().getRegisterInfo();
    LiveRegs = nullptr;
    assert(NumRegs == RC->getNumRegs() && "Bad regclass");

    DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
          << TRI->getRegClassName(RC) << " **********\n");

    // If no relevant registers are used in the function, we can skip it
    // completely.
    bool anyregs = false;
    for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
            I != E; ++I)
        if (MF->getRegInfo().isPhysRegUsed(*I)) {
            anyregs = true;
            break;
        }
    if (!anyregs) return false;

    // Initialize the AliasMap on the first use.
    if (AliasMap.empty()) {
        // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
        // therefore the LiveRegs array.
        AliasMap.resize(TRI->getNumRegs());
        for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
            for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
                    AI.isValid(); ++AI)
                AliasMap[*AI].push_back(i);
    }

    MachineBasicBlock *Entry = MF->begin();
    ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
    SmallVector<MachineBasicBlock*, 16> Loops;
    for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
            MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
        MachineBasicBlock *MBB = *MBBI;
        enterBasicBlock(MBB);
        if (SeenUnknownBackEdge)
            Loops.push_back(MBB);
        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
                ++I)
            visitInstr(I);
        processUndefReads(MBB);
        leaveBasicBlock(MBB);
    }

    // Visit all the loop blocks again in order to merge DomainValues from
    // back-edges.
    for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
        MachineBasicBlock *MBB = Loops[i];
        enterBasicBlock(MBB);
        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
                ++I)
            if (!I->isDebugValue())
                processDefs(I, false);
        processUndefReads(MBB);
        leaveBasicBlock(MBB);
    }

    // Clear the LiveOuts vectors and collapse any remaining DomainValues.
    for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
            MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
        LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
        if (FI == LiveOuts.end() || !FI->second)
            continue;
        for (unsigned i = 0, e = NumRegs; i != e; ++i)
            if (FI->second[i].Value)
                release(FI->second[i].Value);
        delete[] FI->second;
    }
    LiveOuts.clear();
    UndefReads.clear();
    Avail.clear();
    Allocator.DestroyAll();

    return false;
}
Ejemplo n.º 30
0
bool
AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
                                MachineBasicBlock *&FBB,
                                SmallVectorImpl<MachineOperand> &Cond,
                                bool AllowModify) const {
  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin())
    return false;
  --I;
  while (I->isDebugValue()) {
    if (I == MBB.begin())
      return false;
    --I;
  }
  if (!isUnpredicatedTerminator(I))
    return false;

  // Get the last instruction in the block.
  MachineInstr *LastInst = I;

  // If there is only one terminator instruction, process it.
  unsigned LastOpc = LastInst->getOpcode();
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    if (LastOpc == AArch64::Bimm) {
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    }
    if (isCondBranch(LastOpc)) {
      classifyCondBranch(LastInst, TBB, Cond);
      return false;
    }
    return true;  // Can't handle indirect branch.
  }

  // Get the instruction before it if it is a terminator.
  MachineInstr *SecondLastInst = I;
  unsigned SecondLastOpc = SecondLastInst->getOpcode();

  // If AllowModify is true and the block ends with two or more unconditional
  // branches, delete all but the first unconditional branch.
  if (AllowModify && LastOpc == AArch64::Bimm) {
    while (SecondLastOpc == AArch64::Bimm) {
      LastInst->eraseFromParent();
      LastInst = SecondLastInst;
      LastOpc = LastInst->getOpcode();
      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
        // Return now the only terminator is an unconditional branch.
        TBB = LastInst->getOperand(0).getMBB();
        return false;
      } else {
        SecondLastInst = I;
        SecondLastOpc = SecondLastInst->getOpcode();
      }
    }
  }

  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
    return true;

  // If the block ends with a B and a Bcc, handle it.
  if (LastOpc == AArch64::Bimm) {
    if (SecondLastOpc == AArch64::Bcc) {
      TBB =  SecondLastInst->getOperand(1).getMBB();
      Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
      Cond.push_back(SecondLastInst->getOperand(0));
      FBB = LastInst->getOperand(0).getMBB();
      return false;
    } else if (isCondBranch(SecondLastOpc)) {
      classifyCondBranch(SecondLastInst, TBB, Cond);
      FBB = LastInst->getOperand(0).getMBB();
      return false;
    }
  }

  // If the block ends with two unconditional branches, handle it.  The second
  // one is not executed, so remove it.
  if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}