Exemplo n.º 1
0
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
                                       int64_t Offset) const {

  MachineBasicBlock *MBB = MI.getParent();
  MachineFunction *MF = MBB->getParent();
  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = Subtarget.getInstrInfo();

#ifndef NDEBUG
  // FIXME: Is it possible to be storing a frame index to itself?
  bool SeenFI = false;
  for (const MachineOperand &MO: MI.operands()) {
    if (MO.isFI()) {
      if (SeenFI)
        llvm_unreachable("should not see multiple frame indices");

      SeenFI = true;
    }
  }
#endif

  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
  assert(FIOp && FIOp->isFI() && "frame index must be address operand");

  assert(TII->isMUBUF(MI));

  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
  int64_t NewOffset = OffsetOp->getImm() + Offset;
  assert(isUInt<12>(NewOffset) && "offset should be legal");

  FIOp->ChangeToRegister(BaseReg, false);
  OffsetOp->setImm(NewOffset);
}
Exemplo n.º 2
0
unsigned
SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
                                 int &FrameIndex) const {
  switch (MI->getOpcode()) {
  default: break;
  case SPU::STQDv16i8:
  case SPU::STQDv8i16:
  case SPU::STQDv4i32:
  case SPU::STQDv4f32:
  case SPU::STQDv2f64:
  case SPU::STQDr128:
  case SPU::STQDr64:
  case SPU::STQDr32:
  case SPU::STQDr16:
  case SPU::STQDr8: {
    const MachineOperand MOp1 = MI->getOperand(1);
    const MachineOperand MOp2 = MI->getOperand(2);
    if (MOp1.isImm() && MOp2.isFI()) {
      FrameIndex = MOp2.getIndex();
      return MI->getOperand(0).getReg();
    }
    break;
  }
  }
  return 0;
}
Exemplo n.º 3
0
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
                                       int64_t Offset) const {

  MachineBasicBlock *MBB = MI.getParent();
  MachineFunction *MF = MBB->getParent();
  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = Subtarget.getInstrInfo();

#ifndef NDEBUG
  // FIXME: Is it possible to be storing a frame index to itself?
  bool SeenFI = false;
  for (const MachineOperand &MO: MI.operands()) {
    if (MO.isFI()) {
      if (SeenFI)
        llvm_unreachable("should not see multiple frame indices");

      SeenFI = true;
    }
  }
#endif

  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
  assert(FIOp && FIOp->isFI() && "frame index must be address operand");

  assert(TII->isMUBUF(MI));

  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
  int64_t NewOffset = OffsetOp->getImm() + Offset;
  if (isUInt<12>(NewOffset)) {
    // If we have a legal offset, fold it directly into the instruction.
    FIOp->ChangeToRegister(BaseReg, false);
    OffsetOp->setImm(NewOffset);
    return;
  }

  // The offset is not legal, so we must insert an add of the offset.
  MachineRegisterInfo &MRI = MF->getRegInfo();
  unsigned NewReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
  DebugLoc DL = MI.getDebugLoc();

  assert(Offset != 0 && "Non-zero offset expected");

  unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);

  // In the case the instruction already had an immediate offset, here only
  // the requested new offset is added because we are leaving the original
  // immediate in place.
  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
    .addImm(Offset);
  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), NewReg)
    .addReg(UnusedCarry, RegState::Define | RegState::Dead)
    .addReg(OffsetReg, RegState::Kill)
    .addReg(BaseReg);

  FIOp->ChangeToRegister(NewReg, false);
}
Exemplo n.º 4
0
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
                                     MachineOperand &OpToFold) const {
  // We need mutate the operands of new mov instructions to add implicit
  // uses of EXEC, but adding them invalidates the use_iterator, so defer
  // this.
  SmallVector<MachineInstr *, 4> CopiesToReplace;
  SmallVector<FoldCandidate, 4> FoldList;
  MachineOperand &Dst = MI.getOperand(0);

  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
  if (FoldingImm) {
    unsigned NumLiteralUses = 0;
    MachineOperand *NonInlineUse = nullptr;
    int NonInlineUseOpNo = -1;

    MachineRegisterInfo::use_iterator NextUse, NextInstUse;
    for (MachineRegisterInfo::use_iterator
           Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
         Use != E; Use = NextUse) {
      NextUse = std::next(Use);
      MachineInstr *UseMI = Use->getParent();
      unsigned OpNo = Use.getOperandNo();

      // Folding the immediate may reveal operations that can be constant
      // folded or replaced with a copy. This can happen for example after
      // frame indices are lowered to constants or from splitting 64-bit
      // constants.
      //
      // We may also encounter cases where one or both operands are
      // immediates materialized into a register, which would ordinarily not
      // be folded due to multiple uses or operand constraints.

      if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
        DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n');

        // Some constant folding cases change the same immediate's use to a new
        // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
        // again. The same constant folded instruction could also have a second
        // use operand.
        NextUse = MRI->use_begin(Dst.getReg());
        continue;
      }

      // Try to fold any inline immediate uses, and then only fold other
      // constants if they have one use.
      //
      // The legality of the inline immediate must be checked based on the use
      // operand, not the defining instruction, because 32-bit instructions
      // with 32-bit inline immediate sources may be used to materialize
      // constants used in 16-bit operands.
      //
      // e.g. it is unsafe to fold:
      //  s_mov_b32 s0, 1.0    // materializes 0x3f800000
      //  v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00

      // Folding immediates with more than one use will increase program size.
      // FIXME: This will also reduce register usage, which may be better
      // in some cases. A better heuristic is needed.
      if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
        foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
      } else {
        if (++NumLiteralUses == 1) {
          NonInlineUse = &*Use;
          NonInlineUseOpNo = OpNo;
        }
      }
    }

    if (NumLiteralUses == 1) {
      MachineInstr *UseMI = NonInlineUse->getParent();
      foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
    }
  } else {
    // Folding register.
    for (MachineRegisterInfo::use_iterator
           Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
         Use != E; ++Use) {
      MachineInstr *UseMI = Use->getParent();

      foldOperand(OpToFold, UseMI, Use.getOperandNo(),
                  FoldList, CopiesToReplace);
    }
  }

  MachineFunction *MF = MI.getParent()->getParent();
  // Make sure we add EXEC uses to any new v_mov instructions created.
  for (MachineInstr *Copy : CopiesToReplace)
    Copy->addImplicitDefUseOperands(*MF);

  for (FoldCandidate &Fold : FoldList) {
    if (updateOperand(Fold, *TRI)) {
      // Clear kill flags.
      if (Fold.isReg()) {
        assert(Fold.OpToFold && Fold.OpToFold->isReg());
        // FIXME: Probably shouldn't bother trying to fold if not an
        // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
        // copies.
        MRI->clearKillFlags(Fold.OpToFold->getReg());
      }
      DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
            static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
      tryFoldInst(TII, Fold.UseMI);
    } else if (Fold.isCommuted()) {
      // Restoring instruction's original operand order if fold has failed.
      TII->commuteInstruction(*Fold.UseMI, false);
    }
  }
}