Exemple #1
0
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
  DebugLoc DL = MI->getDebugLoc();

  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
  int Index = MI->getOperand(FIOperandNum).getIndex();

  switch (MI->getOpcode()) {
    // SGPR register spill
    case AMDGPU::SI_SPILL_S512_SAVE:
    case AMDGPU::SI_SPILL_S256_SAVE:
    case AMDGPU::SI_SPILL_S128_SAVE:
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.VGPR == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

        BuildMI(*MBB, MI, DL,
                TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
                Spill.VGPR)
                .addReg(SubReg)
                .addImm(Spill.Lane);

      }
      MI->eraseFromParent();
      break;
    }

    // SGPR register restore
    case AMDGPU::SI_SPILL_S512_RESTORE:
    case AMDGPU::SI_SPILL_S256_RESTORE:
    case AMDGPU::SI_SPILL_S128_RESTORE:
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.VGPR == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

        BuildMI(*MBB, MI, DL,
                TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                SubReg)
                .addReg(Spill.VGPR)
                .addImm(Spill.Lane)
                .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
      }

      // TODO: only do this when it is needed
      switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
      case AMDGPUSubtarget::SOUTHERN_ISLANDS:
        // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
        TII->insertNOPs(MI, 3);
        break;
      case AMDGPUSubtarget::SEA_ISLANDS:
        break;
      default: // VOLCANIC_ISLANDS and later
        // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
        // and later. This also applies to VALUs which write VCC, but we're
        // unlikely to see VMEM use VCC.
        TII->insertNOPs(MI, 4);
      }

      MI->eraseFromParent();
      break;
    }

    // VGPR register spill
    case AMDGPU::SI_SPILL_V512_SAVE:
    case AMDGPU::SI_SPILL_V256_SAVE:
    case AMDGPU::SI_SPILL_V128_SAVE:
    case AMDGPU::SI_SPILL_V96_SAVE:
    case AMDGPU::SI_SPILL_V64_SAVE:
    case AMDGPU::SI_SPILL_V32_SAVE:
      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
             FrameInfo->getObjectOffset(Index), RS);
      MI->eraseFromParent();
      break;
    case AMDGPU::SI_SPILL_V32_RESTORE:
    case AMDGPU::SI_SPILL_V64_RESTORE:
    case AMDGPU::SI_SPILL_V96_RESTORE:
    case AMDGPU::SI_SPILL_V128_RESTORE:
    case AMDGPU::SI_SPILL_V256_RESTORE:
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index), RS);
      MI->eraseFromParent();
      break;
    }

    default: {
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      FIOp.ChangeToImmediate(Offset);
      if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
        unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj);
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
        FIOp.ChangeToRegister(TmpReg, false, false, true);
      }
    }
  }
}
Exemple #2
0
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
                                 int Index,
                                 RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const DebugLoc &DL = MI->getDebugLoc();

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned SuperReg = MI->getOperand(0).getReg();

  // m0 is not allowed as with readlane/writelane, so a temporary SGPR and
  // extra copy is needed.
  bool IsM0 = (SuperReg == AMDGPU::M0);
  if (IsM0) {
    assert(NumSubRegs == 1);
    SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
  }

  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));

    SIMachineFunctionInfo::SpilledReg Spill
      = MFI->getSpilledReg(MF, Index, i);

    if (Spill.hasReg()) {
      BuildMI(*MBB, MI, DL,
              TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
              SubReg)
        .addReg(Spill.VGPR)
        .addImm(Spill.Lane)
        .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    } else {
      // Restore SGPR from a stack slot.
      // FIXME: We should use S_LOAD_DWORD here for VI.

      unsigned Align = FrameInfo.getObjectAlignment(Index);
      unsigned Size = FrameInfo.getObjectSize(Index);

      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index);

      MachineMemOperand *MMO = MF->getMachineMemOperand(
        PtrInfo, MachineMemOperand::MOLoad, Size, Align);

      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);
      BuildMI(*MBB, MI, DL,
              TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
        .addReg(TmpReg, RegState::Kill)
        .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    }
  }

  if (IsM0 && SuperReg != AMDGPU::M0) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
      .addReg(SuperReg);
  }

  MI->eraseFromParent();
}
Exemple #3
0
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
  DebugLoc DL = MI->getDebugLoc();

  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
  int Index = MI->getOperand(FIOperandNum).getIndex();

  switch (MI->getOpcode()) {
    // SGPR register spill
    case AMDGPU::SI_SPILL_S512_SAVE:
    case AMDGPU::SI_SPILL_S256_SAVE:
    case AMDGPU::SI_SPILL_S128_SAVE:
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.hasReg()) {
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
                  Spill.VGPR)
                  .addReg(SubReg)
                  .addImm(Spill.Lane);

          // FIXME: Since this spills to another register instead of an actual
          // frame index, we should delete the frame index when all references to
          // it are fixed.
        } else {
          // Spill SGPR to a frame index.
          // FIXME we should use S_STORE_DWORD here for VI.
          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                  .addReg(SubReg);

          unsigned Size = FrameInfo->getObjectSize(Index);
          unsigned Align = FrameInfo->getObjectAlignment(Index);
          MachinePointerInfo PtrInfo
              = MachinePointerInfo::getFixedStack(*MF, Index);
          MachineMemOperand *MMO
              = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                         Size, Align);
          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
                  .addReg(TmpReg)                         // src
                  .addFrameIndex(Index)                   // frame_idx
                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
                  .addImm(i * 4)                          // offset
                  .addMemOperand(MMO);
        }
      }
      MI->eraseFromParent();
      break;
    }

    // SGPR register restore
    case AMDGPU::SI_SPILL_S512_RESTORE:
    case AMDGPU::SI_SPILL_S256_RESTORE:
    case AMDGPU::SI_SPILL_S128_RESTORE:
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.hasReg()) {
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                  SubReg)
                  .addReg(Spill.VGPR)
                  .addImm(Spill.Lane)
                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
        } else {
          // Restore SGPR from a stack slot.
          // FIXME: We should use S_LOAD_DWORD here for VI.

          unsigned Align = FrameInfo->getObjectAlignment(Index);
          unsigned Size = FrameInfo->getObjectSize(Index);

          MachinePointerInfo PtrInfo
              = MachinePointerInfo::getFixedStack(*MF, Index);

          MachineMemOperand *MMO = MF->getMachineMemOperand(
              PtrInfo, MachineMemOperand::MOLoad, Size, Align);

          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
                  .addFrameIndex(Index)                   // frame_idx
                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
                  .addImm(i * 4)                          // offset
                  .addMemOperand(MMO);
          BuildMI(*MBB, MI, DL,
                  TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
                  .addReg(TmpReg, RegState::Kill)
                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
        }
      }

      // TODO: only do this when it is needed
      switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
      case AMDGPUSubtarget::SOUTHERN_ISLANDS:
        // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states
        // ("S_NOP 3") on SI
        TII->insertWaitStates(MI, 4);
        break;
      case AMDGPUSubtarget::SEA_ISLANDS:
        break;
      default: // VOLCANIC_ISLANDS and later
        // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states
        // ("S_NOP 4") on VI and later. This also applies to VALUs which write
        // VCC, but we're unlikely to see VMEM use VCC.
        TII->insertWaitStates(MI, 5);
      }

      MI->eraseFromParent();
      break;
    }

    // VGPR register spill
    case AMDGPU::SI_SPILL_V512_SAVE:
    case AMDGPU::SI_SPILL_V256_SAVE:
    case AMDGPU::SI_SPILL_V128_SAVE:
    case AMDGPU::SI_SPILL_V96_SAVE:
    case AMDGPU::SI_SPILL_V64_SAVE:
    case AMDGPU::SI_SPILL_V32_SAVE:
      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
      MI->eraseFromParent();
      break;
    case AMDGPU::SI_SPILL_V32_RESTORE:
    case AMDGPU::SI_SPILL_V64_RESTORE:
    case AMDGPU::SI_SPILL_V96_RESTORE:
    case AMDGPU::SI_SPILL_V128_RESTORE:
    case AMDGPU::SI_SPILL_V256_RESTORE:
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
      MI->eraseFromParent();
      break;
    }

    default: {
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      FIOp.ChangeToImmediate(Offset);
      if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
        unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
        FIOp.ChangeToRegister(TmpReg, false, false, true);
      }
    }
  }
}
Exemple #4
0
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
                                 int Index,
                                 RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const DebugLoc &DL = MI->getDebugLoc();

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned SuperReg = MI->getOperand(0).getReg();
  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;

  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  unsigned OffsetReg = AMDGPU::M0;
  unsigned M0CopyReg = AMDGPU::NoRegister;

  if (SpillToSMEM) {
    if (RS->isRegUsed(AMDGPU::M0)) {
      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
        .addReg(AMDGPU::M0);
    }
  }

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  int64_t FrOffset = FrameInfo.getObjectOffset(Index);

  const unsigned EltSize = 4;

  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));

    if (SpillToSMEM) {
      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
                                   EltSize, MinAlign(Align, EltSize * i));

      // Add i * 4 offset
      int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i);
      if (Offset != 0) {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg())
          .addImm(Offset);
      } else {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg());
      }

      auto MIB =
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_LOAD_DWORD_SGPR), SubReg)
        .addReg(MFI->getScratchRSrcReg()) // sbase
        .addReg(OffsetReg, RegState::Kill)                // soff
        .addImm(0)                        // glc
        .addMemOperand(MMO);

      if (NumSubRegs > 1)
        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);

      continue;
    }

    SIMachineFunctionInfo::SpilledReg Spill
      = MFI->getSpilledReg(MF, Index, i);

    if (Spill.hasReg()) {
      auto MIB =
        BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                SubReg)
        .addReg(Spill.VGPR)
        .addImm(Spill.Lane);

      if (NumSubRegs > 1)
        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    } else {
      // Restore SGPR from a stack slot.
      // FIXME: We should use S_LOAD_DWORD here for VI.
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
      unsigned Align = FrameInfo.getObjectAlignment(Index);

      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);

      MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
        MachineMemOperand::MOLoad, EltSize,
        MinAlign(Align, EltSize * i));

      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);

      auto MIB =
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
        .addReg(TmpReg, RegState::Kill);

      if (NumSubRegs > 1)
        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    }
  }

  if (M0CopyReg != AMDGPU::NoRegister) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
      .addReg(M0CopyReg, RegState::Kill);
  }

  MI->eraseFromParent();
}
Exemple #5
0
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
                               int Index,
                               RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const DebugLoc &DL = MI->getDebugLoc();

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned SuperReg = MI->getOperand(0).getReg();
  bool IsKill = MI->getOperand(0).isKill();

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));

    struct SIMachineFunctionInfo::SpilledReg Spill =
      MFI->getSpilledReg(MF, Index, i);
    if (Spill.hasReg()) {
      if (SuperReg == AMDGPU::M0) {
        assert(NumSubRegs == 1);
        unsigned CopyM0
          = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), CopyM0)
          .addReg(SuperReg, getKillRegState(IsKill));

        // The real spill now kills the temp copy.
        SubReg = SuperReg = CopyM0;
        IsKill = true;
      }

      BuildMI(*MBB, MI, DL,
              TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
              Spill.VGPR)
        .addReg(SubReg, getKillRegState(IsKill))
        .addImm(Spill.Lane);

      // FIXME: Since this spills to another register instead of an actual
      // frame index, we should delete the frame index when all references to
      // it are fixed.
    } else {
      // Spill SGPR to a frame index.
      // FIXME we should use S_STORE_DWORD here for VI.
      MachineInstrBuilder Mov
        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
        .addReg(SubReg, SubKillState);


      // There could be undef components of a spilled super register.
      // TODO: Can we detect this and skip the spill?
      if (NumSubRegs > 1) {
        // The last implicit use of the SuperReg carries the "Kill" flag.
        unsigned SuperKillState = 0;
        if (i + 1 == e)
          SuperKillState |= getKillRegState(IsKill);
        Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
      }

      unsigned Size = FrameInfo.getObjectSize(Index);
      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   Size, Align);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
        .addReg(TmpReg, RegState::Kill)         // src
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srrsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);
    }
  }

  MI->eraseFromParent();
  MFI->addToSpilledSGPRs(NumSubRegs);
}
Exemple #6
0
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
                               int Index,
                               RegScavenger *RS) const {
  MachineBasicBlock *MBB = MI->getParent();
  MachineFunction *MF = MBB->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned SuperReg = MI->getOperand(0).getReg();
  bool IsKill = MI->getOperand(0).isKill();
  const DebugLoc &DL = MI->getDebugLoc();

  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();

  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;

  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  const unsigned EltSize = 4;
  unsigned OffsetReg = AMDGPU::M0;
  unsigned M0CopyReg = AMDGPU::NoRegister;

  if (SpillToSMEM) {
    if (RS->isRegUsed(AMDGPU::M0)) {
      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
        .addReg(AMDGPU::M0);
    }
  }

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));

    if (SpillToSMEM) {
      int64_t FrOffset = FrameInfo.getObjectOffset(Index);
      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   EltSize, MinAlign(Align, EltSize * i));

      // Add i * 4 wave offset.
      //
      // SMEM instructions only support a single offset, so increment the wave
      // offset.

      int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i);
      if (Offset != 0) {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg())
          .addImm(Offset);
      } else {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg());
      }

      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_STORE_DWORD_SGPR))
        .addReg(SubReg, getKillRegState(IsKill)) // sdata
        .addReg(MFI->getScratchRSrcReg())        // sbase
        .addReg(OffsetReg, RegState::Kill)       // soff
        .addImm(0)                               // glc
        .addMemOperand(MMO);

      continue;
    }

    struct SIMachineFunctionInfo::SpilledReg Spill =
      MFI->getSpilledReg(MF, Index, i);
    if (Spill.hasReg()) {
      BuildMI(*MBB, MI, DL,
              TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
              Spill.VGPR)
        .addReg(SubReg, getKillRegState(IsKill))
        .addImm(Spill.Lane);

      // FIXME: Since this spills to another register instead of an actual
      // frame index, we should delete the frame index when all references to
      // it are fixed.
    } else {
      // Spill SGPR to a frame index.
      // TODO: Should VI try to spill to VGPR and then spill to SMEM?
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
      // TODO: Should VI try to spill to VGPR and then spill to SMEM?

      MachineInstrBuilder Mov
        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
        .addReg(SubReg, SubKillState);


      // There could be undef components of a spilled super register.
      // TODO: Can we detect this and skip the spill?
      if (NumSubRegs > 1) {
        // The last implicit use of the SuperReg carries the "Kill" flag.
        unsigned SuperKillState = 0;
        if (i + 1 == e)
          SuperKillState |= getKillRegState(IsKill);
        Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
      }

      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   EltSize, MinAlign(Align, EltSize * i));
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
        .addReg(TmpReg, RegState::Kill)         // src
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srrsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);
    }
  }

  if (M0CopyReg != AMDGPU::NoRegister) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
      .addReg(M0CopyReg, RegState::Kill);
  }

  MI->eraseFromParent();
  MFI->addToSpilledSGPRs(NumSubRegs);
}
Exemple #7
0
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
                                 int Index,
                                 RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const DebugLoc &DL = MI->getDebugLoc();

  unsigned SuperReg = MI->getOperand(0).getReg();
  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;

  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  unsigned OffsetReg = AMDGPU::M0;
  unsigned M0CopyReg = AMDGPU::NoRegister;

  if (SpillToSMEM) {
    if (RS->isRegUsed(AMDGPU::M0)) {
      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
        .addReg(AMDGPU::M0);
    }
  }

  unsigned EltSize = 4;
  unsigned ScalarLoadOp;

  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
  if (SpillToSMEM && isSGPRClass(RC)) {
    // XXX - if private_element_size is larger than 4 it might be useful to be
    // able to spill wider vmem spills.
    std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false);
  }

  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  int64_t FrOffset = FrameInfo.getObjectOffset(Index);

  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, SplitParts[i]);

    if (SpillToSMEM) {
      // FIXME: Size may be > 4 but extra bytes wasted.
      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
                                   EltSize, MinAlign(Align, EltSize * i));

      // Add i * 4 offset
      int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
      if (Offset != 0) {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg())
          .addImm(Offset);
      } else {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg());
      }

      auto MIB =
        BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
        .addReg(MFI->getScratchRSrcReg()) // sbase
        .addReg(OffsetReg, RegState::Kill)                // soff
        .addImm(0)                        // glc
        .addMemOperand(MMO);

      if (NumSubRegs > 1)
        MIB.addReg(SuperReg, RegState::ImplicitDefine);

      continue;
    }

    SIMachineFunctionInfo::SpilledReg Spill
      = MFI->getSpilledReg(MF, Index, i);

    if (Spill.hasReg()) {
      auto MIB =
        BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                SubReg)
        .addReg(Spill.VGPR)
        .addImm(Spill.Lane);

      if (NumSubRegs > 1)
        MIB.addReg(SuperReg, RegState::ImplicitDefine);
    } else {
      // Restore SGPR from a stack slot.
      // FIXME: We should use S_LOAD_DWORD here for VI.
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
      unsigned Align = FrameInfo.getObjectAlignment(Index);

      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);

      MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
        MachineMemOperand::MOLoad, EltSize,
        MinAlign(Align, EltSize * i));

      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);

      auto MIB =
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
        .addReg(TmpReg, RegState::Kill);

      if (NumSubRegs > 1)
        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
    }
  }

  if (M0CopyReg != AMDGPU::NoRegister) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
      .addReg(M0CopyReg, RegState::Kill);
  }

  MI->eraseFromParent();
}
Exemple #8
0
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
                               int Index,
                               RegScavenger *RS) const {
  MachineBasicBlock *MBB = MI->getParent();
  MachineFunction *MF = MBB->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();

  unsigned SuperReg = MI->getOperand(0).getReg();
  bool IsKill = MI->getOperand(0).isKill();
  const DebugLoc &DL = MI->getDebugLoc();

  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo &FrameInfo = MF->getFrameInfo();

  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;

  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  unsigned OffsetReg = AMDGPU::M0;
  unsigned M0CopyReg = AMDGPU::NoRegister;

  if (SpillToSMEM) {
    if (RS->isRegUsed(AMDGPU::M0)) {
      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
        .addReg(AMDGPU::M0);
    }
  }

  unsigned ScalarStoreOp;
  unsigned EltSize = 4;
  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
  if (SpillToSMEM && isSGPRClass(RC)) {
    // XXX - if private_element_size is larger than 4 it might be useful to be
    // able to spill wider vmem spills.
    std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true);
  }

  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();

  // SubReg carries the "Kill" flag when SubReg == SuperReg.
  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    unsigned SubReg = NumSubRegs == 1 ?
      SuperReg : getSubReg(SuperReg, SplitParts[i]);

    if (SpillToSMEM) {
      int64_t FrOffset = FrameInfo.getObjectOffset(Index);

      // The allocated memory size is really the wavefront size * the frame
      // index size. The widest register class is 64 bytes, so a 4-byte scratch
      // allocation is enough to spill this in a single stack object.
      //
      // FIXME: Frame size/offsets are computed earlier than this, so the extra
      // space is still unnecessarily allocated.

      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   EltSize, MinAlign(Align, EltSize * i));

      // SMEM instructions only support a single offset, so increment the wave
      // offset.

      int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
      if (Offset != 0) {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg())
          .addImm(Offset);
      } else {
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
          .addReg(MFI->getScratchWaveOffsetReg());
      }

      BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
        .addReg(SubReg, getKillRegState(IsKill)) // sdata
        .addReg(MFI->getScratchRSrcReg())        // sbase
        .addReg(OffsetReg, RegState::Kill)       // soff
        .addImm(0)                               // glc
        .addMemOperand(MMO);

      continue;
    }

    struct SIMachineFunctionInfo::SpilledReg Spill =
      MFI->getSpilledReg(MF, Index, i);
    if (Spill.hasReg()) {
      BuildMI(*MBB, MI, DL,
              TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
              Spill.VGPR)
        .addReg(SubReg, getKillRegState(IsKill))
        .addImm(Spill.Lane);

      // FIXME: Since this spills to another register instead of an actual
      // frame index, we should delete the frame index when all references to
      // it are fixed.
    } else {
      // Spill SGPR to a frame index.
      // TODO: Should VI try to spill to VGPR and then spill to SMEM?
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
      // TODO: Should VI try to spill to VGPR and then spill to SMEM?

      MachineInstrBuilder Mov
        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
        .addReg(SubReg, SubKillState);


      // There could be undef components of a spilled super register.
      // TODO: Can we detect this and skip the spill?
      if (NumSubRegs > 1) {
        // The last implicit use of the SuperReg carries the "Kill" flag.
        unsigned SuperKillState = 0;
        if (i + 1 == e)
          SuperKillState |= getKillRegState(IsKill);
        Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
      }

      unsigned Align = FrameInfo.getObjectAlignment(Index);
      MachinePointerInfo PtrInfo
        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
      MachineMemOperand *MMO
        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                   EltSize, MinAlign(Align, EltSize * i));
      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
        .addReg(TmpReg, RegState::Kill)         // src
        .addFrameIndex(Index)                   // vaddr
        .addReg(MFI->getScratchRSrcReg())       // srrsrc
        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
        .addImm(i * 4)                          // offset
        .addMemOperand(MMO);
    }
  }

  if (M0CopyReg != AMDGPU::NoRegister) {
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
      .addReg(M0CopyReg, RegState::Kill);
  }

  MI->eraseFromParent();
  MFI->addToSpilledSGPRs(NumSubRegs);
}
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  DebugLoc DL = MI->getDebugLoc();

  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
  int Index = MI->getOperand(FIOperandNum).getIndex();

  switch (MI->getOpcode()) {
    // SGPR register spill
    case AMDGPU::SI_SPILL_S512_SAVE:
    case AMDGPU::SI_SPILL_S256_SAVE:
    case AMDGPU::SI_SPILL_S128_SAVE:
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

      unsigned SuperReg = MI->getOperand(0).getReg();
      bool IsKill = MI->getOperand(0).isKill();
      // SubReg carries the "Kill" flag when SubReg == SuperReg.
      unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(SuperReg,
                                           &AMDGPU::SGPR_32RegClass, i);

        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.hasReg()) {
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
                  Spill.VGPR)
                  .addReg(SubReg, getKillRegState(IsKill))
                  .addImm(Spill.Lane);

          // FIXME: Since this spills to another register instead of an actual
          // frame index, we should delete the frame index when all references to
          // it are fixed.
        } else {
          // Spill SGPR to a frame index.
          // FIXME we should use S_STORE_DWORD here for VI.
          MachineInstrBuilder Mov
            = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
            .addReg(SubReg, SubKillState);


          // There could be undef components of a spilled super register.
          // TODO: Can we detect this and skip the spill?
          if (NumSubRegs > 1) {
            // The last implicit use of the SuperReg carries the "Kill" flag.
            unsigned SuperKillState = 0;
            if (i + 1 == e)
              SuperKillState |= getKillRegState(IsKill);
            Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
          }

          unsigned Size = FrameInfo->getObjectSize(Index);
          unsigned Align = FrameInfo->getObjectAlignment(Index);
          MachinePointerInfo PtrInfo
              = MachinePointerInfo::getFixedStack(*MF, Index);
          MachineMemOperand *MMO
              = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                         Size, Align);
          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
                  .addReg(TmpReg, RegState::Kill)         // src
                  .addFrameIndex(Index)                   // frame_idx
                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
                  .addImm(i * 4)                          // offset
                  .addMemOperand(MMO);
        }
      }
      MI->eraseFromParent();
      break;
    }

    // SGPR register restore
    case AMDGPU::SI_SPILL_S512_RESTORE:
    case AMDGPU::SI_SPILL_S256_RESTORE:
    case AMDGPU::SI_SPILL_S128_RESTORE:
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.hasReg()) {
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                  SubReg)
                  .addReg(Spill.VGPR)
                  .addImm(Spill.Lane)
                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
        } else {
          // Restore SGPR from a stack slot.
          // FIXME: We should use S_LOAD_DWORD here for VI.

          unsigned Align = FrameInfo->getObjectAlignment(Index);
          unsigned Size = FrameInfo->getObjectSize(Index);

          MachinePointerInfo PtrInfo
              = MachinePointerInfo::getFixedStack(*MF, Index);

          MachineMemOperand *MMO = MF->getMachineMemOperand(
              PtrInfo, MachineMemOperand::MOLoad, Size, Align);

          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
                  .addFrameIndex(Index)                   // frame_idx
                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
                  .addImm(i * 4)                          // offset
                  .addMemOperand(MMO);
          BuildMI(*MBB, MI, DL,
                  TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
                  .addReg(TmpReg, RegState::Kill)
                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
        }
      }

      MI->eraseFromParent();
      break;
    }

    // VGPR register spill
    case AMDGPU::SI_SPILL_V512_SAVE:
    case AMDGPU::SI_SPILL_V256_SAVE:
    case AMDGPU::SI_SPILL_V128_SAVE:
    case AMDGPU::SI_SPILL_V96_SAVE:
    case AMDGPU::SI_SPILL_V64_SAVE:
    case AMDGPU::SI_SPILL_V32_SAVE:
      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::src),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
      MI->eraseFromParent();
      break;
    case AMDGPU::SI_SPILL_V32_RESTORE:
    case AMDGPU::SI_SPILL_V64_RESTORE:
    case AMDGPU::SI_SPILL_V96_RESTORE:
    case AMDGPU::SI_SPILL_V128_RESTORE:
    case AMDGPU::SI_SPILL_V256_RESTORE:
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::dst),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
      MI->eraseFromParent();
      break;
    }

    default: {
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      FIOp.ChangeToImmediate(Offset);
      if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
        unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
        FIOp.ChangeToRegister(TmpReg, false, false, true);
      }
    }
  }
}
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
  MachineFunction *MF = MI->getParent()->getParent();
  MachineBasicBlock *MBB = MI->getParent();
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
  DebugLoc DL = MI->getDebugLoc();

  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
  int Index = MI->getOperand(FIOperandNum).getIndex();

  switch (MI->getOpcode()) {
    // SGPR register spill
    case AMDGPU::SI_SPILL_S512_SAVE:
    case AMDGPU::SI_SPILL_S256_SAVE:
    case AMDGPU::SI_SPILL_S128_SAVE:
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.VGPR == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
                .addReg(SubReg)
                .addImm(Spill.Lane);

      }
      MI->eraseFromParent();
      break;
    }

    // SGPR register restore
    case AMDGPU::SI_SPILL_S512_RESTORE:
    case AMDGPU::SI_SPILL_S256_RESTORE:
    case AMDGPU::SI_SPILL_S128_RESTORE:
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                           &AMDGPU::SGPR_32RegClass, i);
        bool isM0 = SubReg == AMDGPU::M0;
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.VGPR == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

        if (isM0) {
          SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
        }

        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
                .addReg(Spill.VGPR)
                .addImm(Spill.Lane);
        if (isM0) {
          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
                  .addReg(SubReg);
        }
      }
      TII->insertNOPs(MI, 3);
      MI->eraseFromParent();
      break;
    }

    // VGPR register spill
    case AMDGPU::SI_SPILL_V512_SAVE:
    case AMDGPU::SI_SPILL_V256_SAVE:
    case AMDGPU::SI_SPILL_V128_SAVE:
    case AMDGPU::SI_SPILL_V96_SAVE:
    case AMDGPU::SI_SPILL_V64_SAVE:
    case AMDGPU::SI_SPILL_V32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned SrcReg = MI->getOperand(0).getReg();
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      unsigned Size = NumSubRegs * 4;
      unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);

      for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
        unsigned SubReg = NumSubRegs > 1 ?
            getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) :
            SrcReg;
        Offset += (i * 4);
        MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize);

        unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
                                                         Offset, Size);

        if (AddrReg == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling VGPRS");
           AddrReg = AMDGPU::VGPR0;
        }

        // Store the value in LDS
        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32))
                .addImm(0) // gds
                .addReg(AddrReg, RegState::Kill) // addr
                .addReg(SubReg) // data0
                .addImm(0); // offset
      }

      MI->eraseFromParent();
      break;
    }
    case AMDGPU::SI_SPILL_V32_RESTORE:
    case AMDGPU::SI_SPILL_V64_RESTORE:
    case AMDGPU::SI_SPILL_V128_RESTORE:
    case AMDGPU::SI_SPILL_V256_RESTORE:
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned DstReg = MI->getOperand(0).getReg();
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      unsigned Size = NumSubRegs * 4;
      unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);

      // FIXME: We could use DS_READ_B64 here to optimize for larger registers.
      for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
        unsigned SubReg = NumSubRegs > 1 ?
            getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) :
            DstReg;

        Offset += (i * 4);
        unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
                                                          Offset, Size);
        if (AddrReg == AMDGPU::NoRegister) {
           LLVMContext &Ctx = MF->getFunction()->getContext();
           Ctx.emitError("Ran out of VGPRs for spilling VGPRs");
           AddrReg = AMDGPU::VGPR0;
        }

        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg)
                .addImm(0) // gds
                .addReg(AddrReg, RegState::Kill) // addr
                .addImm(0); //offset
      }
      MI->eraseFromParent();
      break;
    }

    default: {
      int64_t Offset = FrameInfo->getObjectOffset(Index);
      FIOp.ChangeToImmediate(Offset);
      if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
        unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VReg_32RegClass, MI, SPAdj);
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
        FIOp.ChangeToRegister(TmpReg, false);
      }
    }
  }
}