void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); // m0 is not allowed as with readlane/writelane, so a temporary SGPR and // extra copy is needed. bool IsM0 = (SuperReg == AMDGPU::M0); if (IsM0) { assert(NumSubRegs == 1); SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); } for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned Align = FrameInfo.getObjectAlignment(Index); unsigned Size = FrameInfo.getObjectSize(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } if (IsM0 && SuperReg != AMDGPU::M0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(SuperReg); } MI->eraseFromParent(); }
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned SuperReg = MI->getOperand(0).getReg(); bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; if (SpillToSMEM) { if (RS->isRegUsed(AMDGPU::M0)) { M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) .addReg(AMDGPU::M0); } } unsigned EltSize = 4; unsigned ScalarLoadOp; const TargetRegisterClass *RC = getPhysRegClass(SuperReg); if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false); } ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); // SubReg carries the "Kill" flag when SubReg == SuperReg. int64_t FrOffset = FrameInfo.getObjectOffset(Index); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); if (SpillToSMEM) { // FIXME: Size may be > 4 but extra bytes wasted. unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); // Add i * 4 offset int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()); } auto MIB = BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) .addReg(MFI->getScratchRSrcReg()) // sbase .addReg(OffsetReg, RegState::Kill) // soff .addImm(0) // glc .addMemOperand(MMO); if (NumSubRegs > 1) MIB.addReg(SuperReg, RegState::ImplicitDefine); continue; } SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { auto MIB = BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); if (NumSubRegs > 1) MIB.addReg(SuperReg, RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } if (M0CopyReg != AMDGPU::NoRegister) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) .addReg(M0CopyReg, RegState::Kill); } MI->eraseFromParent(); }
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; if (SpillToSMEM) { if (RS->isRegUsed(AMDGPU::M0)) { M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) .addReg(AMDGPU::M0); } } // SubReg carries the "Kill" flag when SubReg == SuperReg. int64_t FrOffset = FrameInfo.getObjectOffset(Index); const unsigned EltSize = 4; for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); if (SpillToSMEM) { unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); // Add i * 4 offset int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()); } auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_LOAD_DWORD_SGPR), SubReg) .addReg(MFI->getScratchRSrcReg()) // sbase .addReg(OffsetReg, RegState::Kill) // soff .addImm(0) // glc .addMemOperand(MMO); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); continue; } SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { auto MIB = BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } if (M0CopyReg != AMDGPU::NoRegister) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) .addReg(M0CopyReg, RegState::Kill); } MI->eraseFromParent(); }
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineOperand &FIOp = MI->getOperand(FIOperandNum); int Index = MI->getOperand(FIOperandNum).getIndex(); switch (MI->getOpcode()) { // SGPR register spill case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(IsKill)) .addImm(Spill.Lane); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } else { // Spill SGPR to a frame index. // FIXME we should use S_STORE_DWORD here for VI. MachineInstrBuilder Mov = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addReg(SubReg, SubKillState); // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { // The last implicit use of the SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; if (i + 1 == e) SuperKillState |= getKillRegState(IsKill); Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(i * 4) // offset .addMemOperand(MMO); } } MI->eraseFromParent(); MFI->addToSpilledSGPRs(NumSubRegs); break; } // SGPR register restore case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SuperReg = MI->getOperand(0).getReg(); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned Align = FrameInfo.getObjectAlignment(Index); unsigned Size = FrameInfo.getObjectSize(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(i * 4) // offset .addMemOperand(MMO); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } MI->eraseFromParent(); break; } // VGPR register spill case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::src), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo.getObjectOffset(Index) + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); break; case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::dst), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo.getObjectOffset(Index) + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); break; } default: { int64_t Offset = FrameInfo.getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); } } } }