예제 #1
0
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
                               int Index,
                               RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const DebugLoc &DL = MI->getDebugLoc();

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned SuperReg = MI->getOperand(0).getReg();
  bool IsKill = MI->getOperand(0).isKill();

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));

    struct SIMachineFunctionInfo::SpilledReg Spill =
      MFI->getSpilledReg(MF, Index, i);
    if (Spill.hasReg()) {
      if (SuperReg == AMDGPU::M0) {
        assert(NumSubRegs == 1);
        unsigned CopyM0
          = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), CopyM0)
          .addReg(SuperReg, getKillRegState(IsKill));

        // The real spill now kills the temp copy.
        SubReg = SuperReg = CopyM0;
        IsKill = true;
      }

      BuildMI(*MBB, MI, DL,
              TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
              Spill.VGPR)
        .addReg(SubReg, getKillRegState(IsKill))
        .addImm(Spill.Lane);

      // FIXME: Since this spills to another register instead of an actual
      // frame index, we should delete the frame index when all references to
      // it are fixed.
    } else {
      // Spill SGPR to a frame index.
      // FIXME we should use S_STORE_DWORD here for VI.
      MachineInstrBuilder Mov
        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
        .addReg(SubReg, SubKillState);


      // There could be undef components of a spilled super register.
      // TODO: Can we detect this and skip the spill?
      if (NumSubRegs > 1) {
        // The last implicit use of the SuperReg carries the "Kill" flag.
        unsigned SuperKillState = 0;
        if (i + 1 == e)
          SuperKillState |= getKillRegState(IsKill);
        Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
      }

      unsigned Size = FrameInfo.getObjectSize(Index);
      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   Size, Align);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
        .addReg(TmpReg, RegState::Kill)         // src
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srrsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);
    }
  }

  MI->eraseFromParent();
  MFI->addToSpilledSGPRs(NumSubRegs);
}
예제 #2
0
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
                                 int Index,
                                 RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const DebugLoc &DL = MI->getDebugLoc();

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned SuperReg = MI->getOperand(0).getReg();

  // m0 is not allowed as with readlane/writelane, so a temporary SGPR and
  // extra copy is needed.
  bool IsM0 = (SuperReg == AMDGPU::M0);
  if (IsM0) {
    assert(NumSubRegs == 1);
    SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
  }

  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));

    SIMachineFunctionInfo::SpilledReg Spill
      = MFI->getSpilledReg(MF, Index, i);

    if (Spill.hasReg()) {
      BuildMI(*MBB, MI, DL,
              TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
              SubReg)
        .addReg(Spill.VGPR)
        .addImm(Spill.Lane)
        .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    } else {
      // Restore SGPR from a stack slot.
      // FIXME: We should use S_LOAD_DWORD here for VI.

      unsigned Align = FrameInfo.getObjectAlignment(Index);
      unsigned Size = FrameInfo.getObjectSize(Index);

      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index);

      MachineMemOperand *MMO = MF->getMachineMemOperand(
        PtrInfo, MachineMemOperand::MOLoad, Size, Align);

      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);
      BuildMI(*MBB, MI, DL,
              TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
        .addReg(TmpReg, RegState::Kill)
        .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    }
  }

  if (IsM0 && SuperReg != AMDGPU::M0) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
      .addReg(SuperReg);
  }

  MI->eraseFromParent();
}
예제 #3
0
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
  DebugLoc DL = MI->getDebugLoc();

  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
  int Index = MI->getOperand(FIOperandNum).getIndex();

  switch (MI->getOpcode()) {
    // SGPR register spill
    case AMDGPU::SI_SPILL_S512_SAVE:
    case AMDGPU::SI_SPILL_S256_SAVE:
    case AMDGPU::SI_SPILL_S128_SAVE:
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.VGPR == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

        BuildMI(*MBB, MI, DL,
                TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
                Spill.VGPR)
                .addReg(SubReg)
                .addImm(Spill.Lane);

      }
      MI->eraseFromParent();
      break;
    }

    // SGPR register restore
    case AMDGPU::SI_SPILL_S512_RESTORE:
    case AMDGPU::SI_SPILL_S256_RESTORE:
    case AMDGPU::SI_SPILL_S128_RESTORE:
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.VGPR == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

        BuildMI(*MBB, MI, DL,
                TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                SubReg)
                .addReg(Spill.VGPR)
                .addImm(Spill.Lane)
                .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
      }

      // TODO: only do this when it is needed
      switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
      case AMDGPUSubtarget::SOUTHERN_ISLANDS:
        // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
        TII->insertNOPs(MI, 3);
        break;
      case AMDGPUSubtarget::SEA_ISLANDS:
        break;
      default: // VOLCANIC_ISLANDS and later
        // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
        // and later. This also applies to VALUs which write VCC, but we're
        // unlikely to see VMEM use VCC.
        TII->insertNOPs(MI, 4);
      }

      MI->eraseFromParent();
      break;
    }

    // VGPR register spill
    case AMDGPU::SI_SPILL_V512_SAVE:
    case AMDGPU::SI_SPILL_V256_SAVE:
    case AMDGPU::SI_SPILL_V128_SAVE:
    case AMDGPU::SI_SPILL_V96_SAVE:
    case AMDGPU::SI_SPILL_V64_SAVE:
    case AMDGPU::SI_SPILL_V32_SAVE:
      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
             FrameInfo->getObjectOffset(Index), RS);
      MI->eraseFromParent();
      break;
    case AMDGPU::SI_SPILL_V32_RESTORE:
    case AMDGPU::SI_SPILL_V64_RESTORE:
    case AMDGPU::SI_SPILL_V96_RESTORE:
    case AMDGPU::SI_SPILL_V128_RESTORE:
    case AMDGPU::SI_SPILL_V256_RESTORE:
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index), RS);
      MI->eraseFromParent();
      break;
    }

    default: {
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      FIOp.ChangeToImmediate(Offset);
      if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
        unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj);
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
        FIOp.ChangeToRegister(TmpReg, false, false, true);
      }
    }
  }
}
예제 #4
0
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  TRI =
      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  bool HaveKill = false;
  bool NeedWQM = false;
  bool NeedFlat = false;
  unsigned Depth = 0;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
       BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);

      MachineInstr &MI = *I;
      if (TII->isWQM(MI) || TII->isDS(MI))
        NeedWQM = true;

      // Flat uses m0 in case it needs to access LDS.
      if (TII->isFLAT(MI))
        NeedFlat = true;

      switch (MI.getOpcode()) {
        default: break;
        case AMDGPU::SI_IF:
          ++Depth;
          If(MI);
          break;

        case AMDGPU::SI_ELSE:
          Else(MI);
          break;

        case AMDGPU::SI_BREAK:
          Break(MI);
          break;

        case AMDGPU::SI_IF_BREAK:
          IfBreak(MI);
          break;

        case AMDGPU::SI_ELSE_BREAK:
          ElseBreak(MI);
          break;

        case AMDGPU::SI_LOOP:
          ++Depth;
          Loop(MI);
          break;

        case AMDGPU::SI_END_CF:
          if (--Depth == 0 && HaveKill) {
            SkipIfDead(MI);
            HaveKill = false;
          }
          EndCf(MI);
          break;

        case AMDGPU::SI_KILL:
          if (Depth == 0)
            SkipIfDead(MI);
          else
            HaveKill = true;
          Kill(MI);
          break;

        case AMDGPU::S_BRANCH:
          Branch(MI);
          break;

        case AMDGPU::SI_INDIRECT_SRC_V1:
        case AMDGPU::SI_INDIRECT_SRC_V2:
        case AMDGPU::SI_INDIRECT_SRC_V4:
        case AMDGPU::SI_INDIRECT_SRC_V8:
        case AMDGPU::SI_INDIRECT_SRC_V16:
          IndirectSrc(MI);
          break;

        case AMDGPU::SI_INDIRECT_DST_V1:
        case AMDGPU::SI_INDIRECT_DST_V2:
        case AMDGPU::SI_INDIRECT_DST_V4:
        case AMDGPU::SI_INDIRECT_DST_V8:
        case AMDGPU::SI_INDIRECT_DST_V16:
          IndirectDst(MI);
          break;
      }
    }
  }

  if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
    MachineBasicBlock &MBB = MF.front();
    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
            AMDGPU::EXEC).addReg(AMDGPU::EXEC);
  }

  if (NeedFlat && MFI->IsKernel) {
    // TODO: What to use with function calls?
    // We will need to Initialize the flat scratch register pair.
    if (NeedFlat)
      MFI->setHasFlatInstructions(true);
  }

  return true;
}
예제 #5
0
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
                                 int Index,
                                 RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const DebugLoc &DL = MI->getDebugLoc();

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned SuperReg = MI->getOperand(0).getReg();
  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;

  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  unsigned OffsetReg = AMDGPU::M0;
  unsigned M0CopyReg = AMDGPU::NoRegister;

  if (SpillToSMEM) {
    if (RS->isRegUsed(AMDGPU::M0)) {
      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
        .addReg(AMDGPU::M0);
    }
  }

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  int64_t FrOffset = FrameInfo.getObjectOffset(Index);

  const unsigned EltSize = 4;

  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));

    if (SpillToSMEM) {
      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
                                   EltSize, MinAlign(Align, EltSize * i));

      // Add i * 4 offset
      int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i);
      if (Offset != 0) {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg())
          .addImm(Offset);
      } else {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg());
      }

      auto MIB =
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_LOAD_DWORD_SGPR), SubReg)
        .addReg(MFI->getScratchRSrcReg()) // sbase
        .addReg(OffsetReg, RegState::Kill)                // soff
        .addImm(0)                        // glc
        .addMemOperand(MMO);

      if (NumSubRegs > 1)
        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);

      continue;
    }

    SIMachineFunctionInfo::SpilledReg Spill
      = MFI->getSpilledReg(MF, Index, i);

    if (Spill.hasReg()) {
      auto MIB =
        BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                SubReg)
        .addReg(Spill.VGPR)
        .addImm(Spill.Lane);

      if (NumSubRegs > 1)
        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    } else {
      // Restore SGPR from a stack slot.
      // FIXME: We should use S_LOAD_DWORD here for VI.
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
      unsigned Align = FrameInfo.getObjectAlignment(Index);

      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);

      MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
        MachineMemOperand::MOLoad, EltSize,
        MinAlign(Align, EltSize * i));

      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);

      auto MIB =
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
        .addReg(TmpReg, RegState::Kill);

      if (NumSubRegs > 1)
        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    }
  }

  if (M0CopyReg != AMDGPU::NoRegister) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
      .addReg(M0CopyReg, RegState::Kill);
  }

  MI->eraseFromParent();
}
예제 #6
0
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  DebugLoc DL = MI->getDebugLoc();

  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
  int Index = MI->getOperand(FIOperandNum).getIndex();

  switch (MI->getOpcode()) {
    // SGPR register spill
    case AMDGPU::SI_SPILL_S512_SAVE:
    case AMDGPU::SI_SPILL_S256_SAVE:
    case AMDGPU::SI_SPILL_S128_SAVE:
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      spillSGPR(MI, Index, RS);
      break;
    }

    // SGPR register restore
    case AMDGPU::SI_SPILL_S512_RESTORE:
    case AMDGPU::SI_SPILL_S256_RESTORE:
    case AMDGPU::SI_SPILL_S128_RESTORE:
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      restoreSGPR(MI, Index, RS);
      break;
    }

    // VGPR register spill
    case AMDGPU::SI_SPILL_V512_SAVE:
    case AMDGPU::SI_SPILL_V256_SAVE:
    case AMDGPU::SI_SPILL_V128_SAVE:
    case AMDGPU::SI_SPILL_V96_SAVE:
    case AMDGPU::SI_SPILL_V64_SAVE:
    case AMDGPU::SI_SPILL_V32_SAVE: {
      const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                         AMDGPU::OpName::vdata);
      buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
            Index,
            VData->getReg(), VData->isKill(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
            *MI->memoperands_begin(),
            RS);
      MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
      MI->eraseFromParent();
      break;
    }
    case AMDGPU::SI_SPILL_V32_RESTORE:
    case AMDGPU::SI_SPILL_V64_RESTORE:
    case AMDGPU::SI_SPILL_V96_RESTORE:
    case AMDGPU::SI_SPILL_V128_RESTORE:
    case AMDGPU::SI_SPILL_V256_RESTORE:
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                         AMDGPU::OpName::vdata);

      buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
            Index,
            VData->getReg(), VData->isKill(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
            *MI->memoperands_begin(),
            RS);
      MI->eraseFromParent();
      break;
    }

    default: {
      if (TII->isMUBUF(*MI)) {
        // Disable offen so we don't need a 0 vgpr base.
        assert(static_cast<int>(FIOperandNum) ==
               AMDGPU::getNamedOperandIdx(MI->getOpcode(),
                                          AMDGPU::OpName::vaddr));

        int64_t Offset = FrameInfo.getObjectOffset(Index);
        int64_t OldImm
          = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
        int64_t NewOffset = OldImm + Offset;

        if (isUInt<12>(NewOffset) &&
            buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
          MI->eraseFromParent();
          break;
        }
      }

      int64_t Offset = FrameInfo.getObjectOffset(Index);
      FIOp.ChangeToImmediate(Offset);
      if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
        unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
        FIOp.ChangeToRegister(TmpReg, false, false, true);
      }
    }
  }
}
예제 #7
0
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
                                 int Index,
                                 RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const DebugLoc &DL = MI->getDebugLoc();

  unsigned SuperReg = MI->getOperand(0).getReg();
  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;

  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  unsigned OffsetReg = AMDGPU::M0;
  unsigned M0CopyReg = AMDGPU::NoRegister;

  if (SpillToSMEM) {
    if (RS->isRegUsed(AMDGPU::M0)) {
      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
        .addReg(AMDGPU::M0);
    }
  }

  unsigned EltSize = 4;
  unsigned ScalarLoadOp;

  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
  if (SpillToSMEM && isSGPRClass(RC)) {
    // XXX - if private_element_size is larger than 4 it might be useful to be
    // able to spill wider vmem spills.
    std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false);
  }

  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  int64_t FrOffset = FrameInfo.getObjectOffset(Index);

  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, SplitParts[i]);

    if (SpillToSMEM) {
      // FIXME: Size may be > 4 but extra bytes wasted.
      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
                                   EltSize, MinAlign(Align, EltSize * i));

      // Add i * 4 offset
      int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
      if (Offset != 0) {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg())
          .addImm(Offset);
      } else {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg());
      }

      auto MIB =
        BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
        .addReg(MFI->getScratchRSrcReg()) // sbase
        .addReg(OffsetReg, RegState::Kill)                // soff
        .addImm(0)                        // glc
        .addMemOperand(MMO);

      if (NumSubRegs > 1)
        MIB.addReg(SuperReg, RegState::ImplicitDefine);

      continue;
    }

    SIMachineFunctionInfo::SpilledReg Spill
      = MFI->getSpilledReg(MF, Index, i);

    if (Spill.hasReg()) {
      auto MIB =
        BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                SubReg)
        .addReg(Spill.VGPR)
        .addImm(Spill.Lane);

      if (NumSubRegs > 1)
        MIB.addReg(SuperReg, RegState::ImplicitDefine);
    } else {
      // Restore SGPR from a stack slot.
      // FIXME: We should use S_LOAD_DWORD here for VI.
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
      unsigned Align = FrameInfo.getObjectAlignment(Index);

      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);

      MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
        MachineMemOperand::MOLoad, EltSize,
        MinAlign(Align, EltSize * i));

      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);

      auto MIB =
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
        .addReg(TmpReg, RegState::Kill);

      if (NumSubRegs > 1)
        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    }
  }

  if (M0CopyReg != AMDGPU::NoRegister) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
      .addReg(M0CopyReg, RegState::Kill);
  }

  MI->eraseFromParent();
}
예제 #8
0
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
                               int Index,
                               RegScavenger *RS) const {
  MachineBasicBlock *MBB = MI->getParent();
  MachineFunction *MF = MBB->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned SuperReg = MI->getOperand(0).getReg();
  bool IsKill = MI->getOperand(0).isKill();
  const DebugLoc &DL = MI->getDebugLoc();

  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();

  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;

  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  const unsigned EltSize = 4;
  unsigned OffsetReg = AMDGPU::M0;
  unsigned M0CopyReg = AMDGPU::NoRegister;

  if (SpillToSMEM) {
    if (RS->isRegUsed(AMDGPU::M0)) {
      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
        .addReg(AMDGPU::M0);
    }
  }

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));

    if (SpillToSMEM) {
      int64_t FrOffset = FrameInfo.getObjectOffset(Index);
      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   EltSize, MinAlign(Align, EltSize * i));

      // Add i * 4 wave offset.
      //
      // SMEM instructions only support a single offset, so increment the wave
      // offset.

      int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i);
      if (Offset != 0) {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg())
          .addImm(Offset);
      } else {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg());
      }

      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_STORE_DWORD_SGPR))
        .addReg(SubReg, getKillRegState(IsKill)) // sdata
        .addReg(MFI->getScratchRSrcReg())        // sbase
        .addReg(OffsetReg, RegState::Kill)       // soff
        .addImm(0)                               // glc
        .addMemOperand(MMO);

      continue;
    }

    struct SIMachineFunctionInfo::SpilledReg Spill =
      MFI->getSpilledReg(MF, Index, i);
    if (Spill.hasReg()) {
      BuildMI(*MBB, MI, DL,
              TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
              Spill.VGPR)
        .addReg(SubReg, getKillRegState(IsKill))
        .addImm(Spill.Lane);

      // FIXME: Since this spills to another register instead of an actual
      // frame index, we should delete the frame index when all references to
      // it are fixed.
    } else {
      // Spill SGPR to a frame index.
      // TODO: Should VI try to spill to VGPR and then spill to SMEM?
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
      // TODO: Should VI try to spill to VGPR and then spill to SMEM?

      MachineInstrBuilder Mov
        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
        .addReg(SubReg, SubKillState);


      // There could be undef components of a spilled super register.
      // TODO: Can we detect this and skip the spill?
      if (NumSubRegs > 1) {
        // The last implicit use of the SuperReg carries the "Kill" flag.
        unsigned SuperKillState = 0;
        if (i + 1 == e)
          SuperKillState |= getKillRegState(IsKill);
        Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
      }

      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   EltSize, MinAlign(Align, EltSize * i));
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
        .addReg(TmpReg, RegState::Kill)         // src
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srrsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);
    }
  }

  if (M0CopyReg != AMDGPU::NoRegister) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
      .addReg(M0CopyReg, RegState::Kill);
  }

  MI->eraseFromParent();
  MFI->addToSpilledSGPRs(NumSubRegs);
}
예제 #9
0
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
                               int Index,
                               RegScavenger *RS) const {
  MachineBasicBlock *MBB = MI->getParent();
  MachineFunction *MF = MBB->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();

  unsigned SuperReg = MI->getOperand(0).getReg();
  bool IsKill = MI->getOperand(0).isKill();
  const DebugLoc &DL = MI->getDebugLoc();

  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();

  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;

  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  unsigned OffsetReg = AMDGPU::M0;
  unsigned M0CopyReg = AMDGPU::NoRegister;

  if (SpillToSMEM) {
    if (RS->isRegUsed(AMDGPU::M0)) {
      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
        .addReg(AMDGPU::M0);
    }
  }

  unsigned ScalarStoreOp;
  unsigned EltSize = 4;
  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
  if (SpillToSMEM && isSGPRClass(RC)) {
    // XXX - if private_element_size is larger than 4 it might be useful to be
    // able to spill wider vmem spills.
    std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true);
  }

  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, SplitParts[i]);

    if (SpillToSMEM) {
      int64_t FrOffset = FrameInfo.getObjectOffset(Index);

      // The allocated memory size is really the wavefront size * the frame
      // index size. The widest register class is 64 bytes, so a 4-byte scratch
      // allocation is enough to spill this in a single stack object.
      //
      // FIXME: Frame size/offsets are computed earlier than this, so the extra
      // space is still unnecessarily allocated.

      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   EltSize, MinAlign(Align, EltSize * i));

      // SMEM instructions only support a single offset, so increment the wave
      // offset.

      int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
      if (Offset != 0) {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg())
          .addImm(Offset);
      } else {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg());
      }

      BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
        .addReg(SubReg, getKillRegState(IsKill)) // sdata
        .addReg(MFI->getScratchRSrcReg())        // sbase
        .addReg(OffsetReg, RegState::Kill)       // soff
        .addImm(0)                               // glc
        .addMemOperand(MMO);

      continue;
    }

    struct SIMachineFunctionInfo::SpilledReg Spill =
      MFI->getSpilledReg(MF, Index, i);
    if (Spill.hasReg()) {
      BuildMI(*MBB, MI, DL,
              TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
              Spill.VGPR)
        .addReg(SubReg, getKillRegState(IsKill))
        .addImm(Spill.Lane);

      // FIXME: Since this spills to another register instead of an actual
      // frame index, we should delete the frame index when all references to
      // it are fixed.
    } else {
      // Spill SGPR to a frame index.
      // TODO: Should VI try to spill to VGPR and then spill to SMEM?
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
      // TODO: Should VI try to spill to VGPR and then spill to SMEM?

      MachineInstrBuilder Mov
        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
        .addReg(SubReg, SubKillState);


      // There could be undef components of a spilled super register.
      // TODO: Can we detect this and skip the spill?
      if (NumSubRegs > 1) {
        // The last implicit use of the SuperReg carries the "Kill" flag.
        unsigned SuperKillState = 0;
        if (i + 1 == e)
          SuperKillState |= getKillRegState(IsKill);
        Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
      }

      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   EltSize, MinAlign(Align, EltSize * i));
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
        .addReg(TmpReg, RegState::Kill)         // src
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srrsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);
    }
  }

  if (M0CopyReg != AMDGPU::NoRegister) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
      .addReg(M0CopyReg, RegState::Kill);
  }

  MI->eraseFromParent();
  MFI->addToSpilledSGPRs(NumSubRegs);
}
예제 #10
0
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  TRI =
      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  bool HaveKill = false;
  bool NeedM0 = false;
  bool NeedWQM = false;
  bool NeedFlat = false;
  unsigned Depth = 0;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
       BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);

      MachineInstr &MI = *I;
      if (TII->isDS(MI.getOpcode())) {
        NeedM0 = true;
        NeedWQM = true;
      }

      // Flat uses m0 in case it needs to access LDS.
      if (TII->isFLAT(MI.getOpcode())) {
        NeedM0 = true;
        NeedFlat = true;
      }

      switch (MI.getOpcode()) {
        default: break;
        case AMDGPU::SI_IF:
          ++Depth;
          If(MI);
          break;

        case AMDGPU::SI_ELSE:
          Else(MI);
          break;

        case AMDGPU::SI_BREAK:
          Break(MI);
          break;

        case AMDGPU::SI_IF_BREAK:
          IfBreak(MI);
          break;

        case AMDGPU::SI_ELSE_BREAK:
          ElseBreak(MI);
          break;

        case AMDGPU::SI_LOOP:
          ++Depth;
          Loop(MI);
          break;

        case AMDGPU::SI_END_CF:
          if (--Depth == 0 && HaveKill) {
            SkipIfDead(MI);
            HaveKill = false;
          }
          EndCf(MI);
          break;

        case AMDGPU::SI_KILL:
          if (Depth == 0)
            SkipIfDead(MI);
          else
            HaveKill = true;
          Kill(MI);
          break;

        case AMDGPU::S_BRANCH:
          Branch(MI);
          break;

        case AMDGPU::SI_INDIRECT_SRC:
          IndirectSrc(MI);
          break;

        case AMDGPU::SI_INDIRECT_DST_V1:
        case AMDGPU::SI_INDIRECT_DST_V2:
        case AMDGPU::SI_INDIRECT_DST_V4:
        case AMDGPU::SI_INDIRECT_DST_V8:
        case AMDGPU::SI_INDIRECT_DST_V16:
          IndirectDst(MI);
          break;

        case AMDGPU::V_INTERP_P1_F32:
        case AMDGPU::V_INTERP_P2_F32:
        case AMDGPU::V_INTERP_MOV_F32:
          NeedWQM = true;
          break;
      }
    }
  }

  if (NeedM0) {
    MachineBasicBlock &MBB = MF.front();
    // Initialize M0 to a value that won't cause LDS access to be discarded
    // due to offset clamping
    InitM0ForLDS(MBB.getFirstNonPHI());
  }

  if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
    MachineBasicBlock &MBB = MF.front();
    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
            AMDGPU::EXEC).addReg(AMDGPU::EXEC);
  }

  // FIXME: This seems inappropriate to do here.
  if (NeedFlat && MFI->IsKernel) {
    // Insert the prologue initializing the SGPRs pointing to the scratch space
    // for flat accesses.
    const MachineFrameInfo *FrameInfo = MF.getFrameInfo();

    // TODO: What to use with function calls?

    // FIXME: This is reporting stack size that is used in a scratch buffer
    // rather than registers as well.
    uint64_t StackSizeBytes = FrameInfo->getStackSize();

    int IndirectBegin
      = static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF);
    // Convert register index to 256-byte unit.
    uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256);

    assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff &&
           "Stack limits should be smaller than 16-bits");

    // Initialize the flat scratch register pair.
    // TODO: Can we use one s_mov_b64 here?

    // Offset is in units of 256-bytes.
    MachineBasicBlock &MBB = MF.front();
    DebugLoc NoDL;
    MachineBasicBlock::iterator Start = MBB.getFirstNonPHI();
    const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32);

    BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO)
      .addImm(StackOffset);

    // Documentation says size is "per-thread scratch size in bytes"
    BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI)
      .addImm(StackSizeBytes);
  }

  return true;
}
예제 #11
0
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  DebugLoc DL = MI->getDebugLoc();

  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
  int Index = MI->getOperand(FIOperandNum).getIndex();

  switch (MI->getOpcode()) {
    // SGPR register spill
    case AMDGPU::SI_SPILL_S512_SAVE:
    case AMDGPU::SI_SPILL_S256_SAVE:
    case AMDGPU::SI_SPILL_S128_SAVE:
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

      unsigned SuperReg = MI->getOperand(0).getReg();
      bool IsKill = MI->getOperand(0).isKill();
      // SubReg carries the "Kill" flag when SubReg == SuperReg.
      unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(SuperReg,
                                           &AMDGPU::SGPR_32RegClass, i);

        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.hasReg()) {
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
                  Spill.VGPR)
                  .addReg(SubReg, getKillRegState(IsKill))
                  .addImm(Spill.Lane);

          // FIXME: Since this spills to another register instead of an actual
          // frame index, we should delete the frame index when all references to
          // it are fixed.
        } else {
          // Spill SGPR to a frame index.
          // FIXME we should use S_STORE_DWORD here for VI.
          MachineInstrBuilder Mov
            = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
            .addReg(SubReg, SubKillState);


          // There could be undef components of a spilled super register.
          // TODO: Can we detect this and skip the spill?
          if (NumSubRegs > 1) {
            // The last implicit use of the SuperReg carries the "Kill" flag.
            unsigned SuperKillState = 0;
            if (i + 1 == e)
              SuperKillState |= getKillRegState(IsKill);
            Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
          }

          unsigned Size = FrameInfo->getObjectSize(Index);
          unsigned Align = FrameInfo->getObjectAlignment(Index);
          MachinePointerInfo PtrInfo
              = MachinePointerInfo::getFixedStack(*MF, Index);
          MachineMemOperand *MMO
              = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                         Size, Align);
          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
                  .addReg(TmpReg, RegState::Kill)         // src
                  .addFrameIndex(Index)                   // frame_idx
                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
                  .addImm(i * 4)                          // offset
                  .addMemOperand(MMO);
        }
      }
      MI->eraseFromParent();
      break;
    }

    // SGPR register restore
    case AMDGPU::SI_SPILL_S512_RESTORE:
    case AMDGPU::SI_SPILL_S256_RESTORE:
    case AMDGPU::SI_SPILL_S128_RESTORE:
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.hasReg()) {
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                  SubReg)
                  .addReg(Spill.VGPR)
                  .addImm(Spill.Lane)
                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
        } else {
          // Restore SGPR from a stack slot.
          // FIXME: We should use S_LOAD_DWORD here for VI.

          unsigned Align = FrameInfo->getObjectAlignment(Index);
          unsigned Size = FrameInfo->getObjectSize(Index);

          MachinePointerInfo PtrInfo
              = MachinePointerInfo::getFixedStack(*MF, Index);

          MachineMemOperand *MMO = MF->getMachineMemOperand(
              PtrInfo, MachineMemOperand::MOLoad, Size, Align);

          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
                  .addFrameIndex(Index)                   // frame_idx
                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
                  .addImm(i * 4)                          // offset
                  .addMemOperand(MMO);
          BuildMI(*MBB, MI, DL,
                  TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
                  .addReg(TmpReg, RegState::Kill)
                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
        }
      }

      MI->eraseFromParent();
      break;
    }

    // VGPR register spill
    case AMDGPU::SI_SPILL_V512_SAVE:
    case AMDGPU::SI_SPILL_V256_SAVE:
    case AMDGPU::SI_SPILL_V128_SAVE:
    case AMDGPU::SI_SPILL_V96_SAVE:
    case AMDGPU::SI_SPILL_V64_SAVE:
    case AMDGPU::SI_SPILL_V32_SAVE:
      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::src),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
      MI->eraseFromParent();
      break;
    case AMDGPU::SI_SPILL_V32_RESTORE:
    case AMDGPU::SI_SPILL_V64_RESTORE:
    case AMDGPU::SI_SPILL_V96_RESTORE:
    case AMDGPU::SI_SPILL_V128_RESTORE:
    case AMDGPU::SI_SPILL_V256_RESTORE:
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::dst),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
      MI->eraseFromParent();
      break;
    }

    default: {
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      FIOp.ChangeToImmediate(Offset);
      if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
        unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
        FIOp.ChangeToRegister(TmpReg, false, false, true);
      }
    }
  }
}
예제 #12
0
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  TRI =
      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  bool HaveKill = false;
  bool NeedFlat = false;
  unsigned Depth = 0;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
       BI != BE; ++BI) {

    MachineBasicBlock *EmptyMBBAtEnd = NULL;
    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    bool ExecModified = false;

    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);

      MachineInstr &MI = *I;

      // Flat uses m0 in case it needs to access LDS.
      if (TII->isFLAT(MI))
        NeedFlat = true;

      for (const auto &Def : I->defs()) {
        if (Def.isReg() && Def.isDef() && Def.getReg() == AMDGPU::EXEC) {
          ExecModified = true;
          break;
        }
      }

      switch (MI.getOpcode()) {
        default: break;
        case AMDGPU::SI_IF:
          ++Depth;
          If(MI);
          break;

        case AMDGPU::SI_ELSE:
          Else(MI, ExecModified);
          break;

        case AMDGPU::SI_BREAK:
          Break(MI);
          break;

        case AMDGPU::SI_IF_BREAK:
          IfBreak(MI);
          break;

        case AMDGPU::SI_ELSE_BREAK:
          ElseBreak(MI);
          break;

        case AMDGPU::SI_LOOP:
          ++Depth;
          Loop(MI);
          break;

        case AMDGPU::SI_END_CF:
          if (--Depth == 0 && HaveKill) {
            SkipIfDead(MI);
            HaveKill = false;
          }
          EndCf(MI);
          break;

        case AMDGPU::SI_KILL:
          if (Depth == 0)
            SkipIfDead(MI);
          else
            HaveKill = true;
          Kill(MI);
          break;

        case AMDGPU::S_BRANCH:
          Branch(MI);
          break;

        case AMDGPU::SI_INDIRECT_SRC_V1:
        case AMDGPU::SI_INDIRECT_SRC_V2:
        case AMDGPU::SI_INDIRECT_SRC_V4:
        case AMDGPU::SI_INDIRECT_SRC_V8:
        case AMDGPU::SI_INDIRECT_SRC_V16:
          IndirectSrc(MI);
          break;

        case AMDGPU::SI_INDIRECT_DST_V1:
        case AMDGPU::SI_INDIRECT_DST_V2:
        case AMDGPU::SI_INDIRECT_DST_V4:
        case AMDGPU::SI_INDIRECT_DST_V8:
        case AMDGPU::SI_INDIRECT_DST_V16:
          IndirectDst(MI);
          break;

        case AMDGPU::S_ENDPGM: {
          if (MF.getInfo<SIMachineFunctionInfo>()->returnsVoid())
            break;

          // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
          // because external bytecode will be appended at the end.
          if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
            // S_ENDPGM is not the last instruction. Add an empty block at
            // the end and jump there.
            if (!EmptyMBBAtEnd) {
              EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
              MF.insert(MF.end(), EmptyMBBAtEnd);
            }

            MBB.addSuccessor(EmptyMBBAtEnd);
            BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
                    .addMBB(EmptyMBBAtEnd);
          }

          I->eraseFromParent();
          break;
        }
      }
    }
  }

  if (NeedFlat && MFI->IsKernel) {
    // TODO: What to use with function calls?
    // We will need to Initialize the flat scratch register pair.
    if (NeedFlat)
      MFI->setHasFlatInstructions(true);
  }

  return true;
}
예제 #13
0
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
  DebugLoc DL = MI->getDebugLoc();

  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
  int Index = MI->getOperand(FIOperandNum).getIndex();

  switch (MI->getOpcode()) {
    // SGPR register spill
    case AMDGPU::SI_SPILL_S512_SAVE:
    case AMDGPU::SI_SPILL_S256_SAVE:
    case AMDGPU::SI_SPILL_S128_SAVE:
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.VGPR == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
                .addReg(SubReg)
                .addImm(Spill.Lane);

      }
      MI->eraseFromParent();
      break;
    }

    // SGPR register restore
    case AMDGPU::SI_SPILL_S512_RESTORE:
    case AMDGPU::SI_SPILL_S256_RESTORE:
    case AMDGPU::SI_SPILL_S128_RESTORE:
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        bool isM0 = SubReg == AMDGPU::M0;
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.VGPR == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

        if (isM0) {
          SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
        }

        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
                .addReg(Spill.VGPR)
                .addImm(Spill.Lane);
        if (isM0) {
          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
                  .addReg(SubReg);
        }
      }
      TII->insertNOPs(MI, 3);
      MI->eraseFromParent();
      break;
    }

    // VGPR register spill
    case AMDGPU::SI_SPILL_V512_SAVE:
    case AMDGPU::SI_SPILL_V256_SAVE:
    case AMDGPU::SI_SPILL_V128_SAVE:
    case AMDGPU::SI_SPILL_V96_SAVE:
    case AMDGPU::SI_SPILL_V64_SAVE:
    case AMDGPU::SI_SPILL_V32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned SrcReg = MI->getOperand(0).getReg();
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      unsigned Size = NumSubRegs * 4;
      unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);

      for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
        unsigned SubReg = NumSubRegs > 1 ?
            getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) :
            SrcReg;
        Offset += (i * 4);
        MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize);

        unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
                                                         Offset, Size);

        if (AddrReg == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling VGPRS");
           AddrReg = AMDGPU::VGPR0;
        }

        // Store the value in LDS
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32))
                .addImm(0) // gds
                .addReg(AddrReg, RegState::Kill) // addr
                .addReg(SubReg) // data0
                .addImm(0); // offset
      }

      MI->eraseFromParent();
      break;
    }
    case AMDGPU::SI_SPILL_V32_RESTORE:
    case AMDGPU::SI_SPILL_V64_RESTORE:
    case AMDGPU::SI_SPILL_V128_RESTORE:
    case AMDGPU::SI_SPILL_V256_RESTORE:
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned DstReg = MI->getOperand(0).getReg();
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      unsigned Size = NumSubRegs * 4;
      unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);

      // FIXME: We could use DS_READ_B64 here to optimize for larger registers.
      for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
        unsigned SubReg = NumSubRegs > 1 ?
            getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) :
            DstReg;

        Offset += (i * 4);
        unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
                                                          Offset, Size);
        if (AddrReg == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling VGPRs");
           AddrReg = AMDGPU::VGPR0;
        }

        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg)
                .addImm(0) // gds
                .addReg(AddrReg, RegState::Kill) // addr
                .addImm(0); //offset
      }
      MI->eraseFromParent();
      break;
    }

    default: {
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      FIOp.ChangeToImmediate(Offset);
      if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
        unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VReg_32RegClass, MI, SPAdj);
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
        FIOp.ChangeToRegister(TmpReg, false);
      }
    }
  }
}