static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator &InsertAddr, MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) { const char *ES = "__stack_pointer"; auto *SPSymbol = MF.createExternalSymbolName(ES); MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); unsigned Zero = MRI.createVirtualRegister(PtrRC); const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), Zero) .addImm(0); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)), MachineMemOperand::MOStore, 4, 4); BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32)) .addImm(2) // p2align .addExternalSymbol(SPSymbol) .addReg(Zero) .addReg(SrcReg) .addMemOperand(MMO); }
void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions auto &MFI = MF.getFrameInfo(); assert(MFI.getCalleeSavedInfo().empty() && "WebAssembly should not have callee-saved registers"); if (!needsSP(MF, MFI)) return; uint64_t StackSize = MFI.getStackSize(); const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.begin(); DebugLoc DL; const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); unsigned Zero = MRI.createVirtualRegister(PtrRC); unsigned SPReg = MRI.createVirtualRegister(PtrRC); const char *ES = "__stack_pointer"; auto *SPSymbol = MF.createExternalSymbolName(ES); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero) .addImm(0); MachineMemOperand *LoadMMO = MF.getMachineMemOperand( MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)), MachineMemOperand::MOLoad, 4, 4); // Load the SP value. BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), StackSize ? SPReg : (unsigned)WebAssembly::SP32) .addImm(2) // p2align .addExternalSymbol(SPSymbol) .addReg(Zero) // addr .addMemOperand(LoadMMO); if (StackSize) { // Subtract the frame size unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), WebAssembly::SP32) .addReg(SPReg) .addReg(OffsetReg); } if (hasFP(MF)) { // Unlike most conventional targets (where FP points to the saved FP), // FP points to the bottom of the fixed-size locals, so we can use positive // offsets in load/store instructions. BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) .addReg(WebAssembly::SP32); } if (StackSize && needsSPWriteback(MF, MFI)) { writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL); } }
static MachineMemOperand * getFrameIndexMMO(MachineBasicBlock &MBB, int FrameIndex, unsigned flags) { MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex), flags, MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex)); return MMO; }
void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end() && !I->isDebugValue()) DL = I->getDebugLoc(); MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FrameIndex), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex)); BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg) .addFrameIndex(FrameIndex) .addImm(0) .addMemOperand(MMO); }
void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end() && !I->isDebugValue()) DL = I->getDebugLoc(); MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FrameIndex), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex)); BuildMI(MBB, I, DL, get(XCore::STWFI)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FrameIndex) .addImm(0) .addMemOperand(MMO); }
void SparcInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); // On the order of operands here: think "[FrameIdx + 0] = SrcReg". if (RC == &SP::I64RegsRegClass) BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::IntPairRegClass) BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (SP::DFPRegsRegClass.hasSubClassEq(RC)) BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (SP::QFPRegsRegClass.hasSubClassEq(RC)) // Use STQFri irrespective of its legality. If STQ is not legal, it will be // lowered into two STDs in eliminateFrameIndex. BuildMI(MBB, I, DL, get(SP::STQFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else llvm_unreachable("Can't store this register to stack slot"); }
void SparcInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); if (RC == &SP::I64RegsRegClass) BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); else if (RC == &SP::IntPairRegClass) BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); else if (SP::DFPRegsRegClass.hasSubClassEq(RC)) BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); else if (SP::QFPRegsRegClass.hasSubClassEq(RC)) // Use LDQFri irrespective of its legality. If LDQ is not legal, it will be // lowered into two LDDs in eliminateFrameIndex. BuildMI(MBB, I, DL, get(SP::LDQFri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); else llvm_unreachable("Can't load this register from stack slot"); }
void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions auto &MFI = MF.getFrameInfo(); assert(MFI.getCalleeSavedInfo().empty() && "WebAssembly should not have callee-saved registers"); if (!needsSP(MF, MFI)) return; uint64_t StackSize = MFI.getStackSize(); const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.begin(); while (InsertPt != MBB.end() && WebAssembly::isArgument(*InsertPt)) ++InsertPt; DebugLoc DL; const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); unsigned SPReg = WebAssembly::SP32; if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); const char *ES = "__stack_pointer"; auto *SPSymbol = MF.createExternalSymbolName(ES); if (MF.getSubtarget<WebAssemblySubtarget>() .getTargetTriple().isOSBinFormatELF()) { unsigned Zero = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero) .addImm(0); MachineMemOperand *LoadMMO = MF.getMachineMemOperand( MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)), MachineMemOperand::MOLoad, 4, 4); // Load the SP value. BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg) .addImm(2) // p2align .addExternalSymbol(SPSymbol) .addReg(Zero) // addr .addMemOperand(LoadMMO); } else { BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg) .addExternalSymbol(SPSymbol); } bool HasBP = hasBP(MF); if (HasBP) { auto FI = MF.getInfo<WebAssemblyFunctionInfo>(); unsigned BasePtr = MRI.createVirtualRegister(PtrRC); FI->setBasePointerVreg(BasePtr); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr) .addReg(SPReg); } if (StackSize) { // Subtract the frame size unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), WebAssembly::SP32) .addReg(SPReg) .addReg(OffsetReg); } if (HasBP) { unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC); unsigned Alignment = MFI.getMaxAlignment(); assert((1u << countTrailingZeros(Alignment)) == Alignment && "Alignment must be a power of 2"); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg) .addImm((int)~(Alignment - 1)); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32), WebAssembly::SP32) .addReg(WebAssembly::SP32) .addReg(BitmaskReg); } if (hasFP(MF)) { // Unlike most conventional targets (where FP points to the saved FP), // FP points to the bottom of the fixed-size locals, so we can use positive // offsets in load/store instructions. BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) .addReg(WebAssembly::SP32); } if (StackSize && needsSPWriteback(MF, MFI)) { writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL); } }
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); const DebugLoc &DL = MI->getDebugLoc(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; if (SpillToSMEM) { if (RS->isRegUsed(AMDGPU::M0)) { M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) .addReg(AMDGPU::M0); } } unsigned ScalarStoreOp; unsigned EltSize = 4; const TargetRegisterClass *RC = getPhysRegClass(SuperReg); if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true); } ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); if (SpillToSMEM) { int64_t FrOffset = FrameInfo.getObjectOffset(Index); // The allocated memory size is really the wavefront size * the frame // index size. The widest register class is 64 bytes, so a 4-byte scratch // allocation is enough to spill this in a single stack object. // // FIXME: Frame size/offsets are computed earlier than this, so the extra // space is still unnecessarily allocated. unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); // SMEM instructions only support a single offset, so increment the wave // offset. int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()); } BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) .addReg(SubReg, getKillRegState(IsKill)) // sdata .addReg(MFI->getScratchRSrcReg()) // sbase .addReg(OffsetReg, RegState::Kill) // soff .addImm(0) // glc .addMemOperand(MMO); continue; } struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(IsKill)) .addImm(Spill.Lane); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } else { // Spill SGPR to a frame index. // TODO: Should VI try to spill to VGPR and then spill to SMEM? unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); // TODO: Should VI try to spill to VGPR and then spill to SMEM? MachineInstrBuilder Mov = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addReg(SubReg, SubKillState); // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { // The last implicit use of the SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; if (i + 1 == e) SuperKillState |= getKillRegState(IsKill); Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srrsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); } } if (M0CopyReg != AMDGPU::NoRegister) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) .addReg(M0CopyReg, RegState::Kill); } MI->eraseFromParent(); MFI->addToSpilledSGPRs(NumSubRegs); }
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); // m0 is not allowed as with readlane/writelane, so a temporary SGPR and // extra copy is needed. bool IsM0 = (SuperReg == AMDGPU::M0); if (IsM0) { assert(NumSubRegs == 1); SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); } for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned Align = FrameInfo.getObjectAlignment(Index); unsigned Size = FrameInfo.getObjectSize(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } if (IsM0 && SuperReg != AMDGPU::M0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(SuperReg); } MI->eraseFromParent(); }
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { if (SuperReg == AMDGPU::M0) { assert(NumSubRegs == 1); unsigned CopyM0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), CopyM0) .addReg(SuperReg, getKillRegState(IsKill)); // The real spill now kills the temp copy. SubReg = SuperReg = CopyM0; IsKill = true; } BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(IsKill)) .addImm(Spill.Lane); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } else { // Spill SGPR to a frame index. // FIXME we should use S_STORE_DWORD here for VI. MachineInstrBuilder Mov = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addReg(SubReg, SubKillState); // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { // The last implicit use of the SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; if (i + 1 == e) SuperKillState |= getKillRegState(IsKill); Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srrsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); } } MI->eraseFromParent(); MFI->addToSpilledSGPRs(NumSubRegs); }
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; if (SpillToSMEM) { if (RS->isRegUsed(AMDGPU::M0)) { M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) .addReg(AMDGPU::M0); } } // SubReg carries the "Kill" flag when SubReg == SuperReg. int64_t FrOffset = FrameInfo.getObjectOffset(Index); const unsigned EltSize = 4; for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); if (SpillToSMEM) { unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); // Add i * 4 offset int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()); } auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_LOAD_DWORD_SGPR), SubReg) .addReg(MFI->getScratchRSrcReg()) // sbase .addReg(OffsetReg, RegState::Kill) // soff .addImm(0) // glc .addMemOperand(MMO); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); continue; } SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { auto MIB = BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } if (M0CopyReg != AMDGPU::NoRegister) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) .addReg(M0CopyReg, RegState::Kill); } MI->eraseFromParent(); }
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); const DebugLoc &DL = MI->getDebugLoc(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); const unsigned EltSize = 4; unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; if (SpillToSMEM) { if (RS->isRegUsed(AMDGPU::M0)) { M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) .addReg(AMDGPU::M0); } } // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); if (SpillToSMEM) { int64_t FrOffset = FrameInfo.getObjectOffset(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); // Add i * 4 wave offset. // // SMEM instructions only support a single offset, so increment the wave // offset. int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()); } BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_STORE_DWORD_SGPR)) .addReg(SubReg, getKillRegState(IsKill)) // sdata .addReg(MFI->getScratchRSrcReg()) // sbase .addReg(OffsetReg, RegState::Kill) // soff .addImm(0) // glc .addMemOperand(MMO); continue; } struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(IsKill)) .addImm(Spill.Lane); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } else { // Spill SGPR to a frame index. // TODO: Should VI try to spill to VGPR and then spill to SMEM? unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); // TODO: Should VI try to spill to VGPR and then spill to SMEM? MachineInstrBuilder Mov = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addReg(SubReg, SubKillState); // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { // The last implicit use of the SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; if (i + 1 == e) SuperKillState |= getKillRegState(IsKill); Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srrsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); } } if (M0CopyReg != AMDGPU::NoRegister) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) .addReg(M0CopyReg, RegState::Kill); } MI->eraseFromParent(); MFI->addToSpilledSGPRs(NumSubRegs); }
void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, int Index, unsigned ValueReg, bool IsKill, unsigned ScratchRsrcReg, unsigned ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, RegScavenger *RS) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); const MCInstrDesc &Desc = TII->get(LoadStoreOp); const DebugLoc &DL = MI->getDebugLoc(); bool IsStore = Desc.mayStore(); bool RanOutOfSGPRs = false; bool Scavenged = false; unsigned SOffset = ScratchOffsetReg; const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32; unsigned Size = NumSubRegs * 4; int64_t Offset = InstOffset + MFI.getObjectOffset(Index); const int64_t OriginalImmOffset = Offset; unsigned Align = MFI.getObjectAlignment(Index); const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); if (!isUInt<12>(Offset + Size)) { SOffset = AMDGPU::NoRegister; // We don't have access to the register scavenger if this function is called // during PEI::scavengeFrameVirtualRegs(). if (RS) SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass); if (SOffset == AMDGPU::NoRegister) { // There are no free SGPRs, and since we are in the process of spilling // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true // on SI/CI and on VI it is true until we implement spilling using scalar // stores), we have no way to free up an SGPR. Our solution here is to // add the offset directly to the ScratchOffset register, and then // subtract the offset after the spill to return ScratchOffset to it's // original value. RanOutOfSGPRs = true; SOffset = ScratchOffsetReg; } else { Scavenged = true; } BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) .addReg(ScratchOffsetReg) .addImm(Offset); Offset = 0; } const unsigned EltSize = 4; for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) { unsigned SubReg = NumSubRegs == 1 ? ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i)); unsigned SOffsetRegState = 0; unsigned SrcDstRegState = getDefRegState(!IsStore); if (i + 1 == e) { SOffsetRegState |= getKillRegState(Scavenged); // The last implicit use carries the "Kill" flag. SrcDstRegState |= getKillRegState(IsKill); } MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i); MachineMemOperand *NewMMO = MF->getMachineMemOperand(PInfo, MMO->getFlags(), EltSize, MinAlign(Align, EltSize * i)); auto MIB = BuildMI(*MBB, MI, DL, Desc) .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)) .addReg(ScratchRsrcReg) .addReg(SOffset, SOffsetRegState) .addImm(Offset) .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe .addMemOperand(NewMMO); if (NumSubRegs > 1) MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); } if (RanOutOfSGPRs) { // Subtract the offset we added to the ScratchOffset register. BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg) .addReg(ScratchOffsetReg) .addImm(OriginalImmOffset); } }
void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned SuperReg = MI->getOperand(0).getReg(); bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; if (SpillToSMEM) { if (RS->isRegUsed(AMDGPU::M0)) { M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) .addReg(AMDGPU::M0); } } unsigned EltSize = 4; unsigned ScalarLoadOp; const TargetRegisterClass *RC = getPhysRegClass(SuperReg); if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false); } ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); // SubReg carries the "Kill" flag when SubReg == SuperReg. int64_t FrOffset = FrameInfo.getObjectOffset(Index); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); if (SpillToSMEM) { // FIXME: Size may be > 4 but extra bytes wasted. unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); // Add i * 4 offset int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addReg(MFI->getScratchWaveOffsetReg()); } auto MIB = BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) .addReg(MFI->getScratchRSrcReg()) // sbase .addReg(OffsetReg, RegState::Kill) // soff .addImm(0) // glc .addMemOperand(MMO); if (NumSubRegs > 1) MIB.addReg(SuperReg, RegState::ImplicitDefine); continue; } SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { auto MIB = BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); if (NumSubRegs > 1) MIB.addReg(SuperReg, RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srsrc .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } if (M0CopyReg != AMDGPU::NoRegister) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) .addReg(M0CopyReg, RegState::Kill); } MI->eraseFromParent(); }
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); MachineOperand &FIOp = MI->getOperand(FIOperandNum); int Index = MI->getOperand(FIOperandNum).getIndex(); switch (MI->getOpcode()) { // SGPR register spill case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), &AMDGPU::SGPR_32RegClass, i); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg) .addImm(Spill.Lane); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } else { // Spill SGPR to a frame index. // FIXME we should use S_STORE_DWORD here for VI. BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addReg(SubReg); unsigned Size = FrameInfo->getObjectSize(Index); unsigned Align = FrameInfo->getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg) // src .addFrameIndex(Index) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(i * 4) // offset .addMemOperand(MMO); } } MI->eraseFromParent(); break; } // SGPR register restore case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), &AMDGPU::SGPR_32RegClass, i); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned Align = FrameInfo->getObjectAlignment(Index); unsigned Size = FrameInfo->getObjectSize(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(i * 4) // offset .addMemOperand(MMO); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } // TODO: only do this when it is needed switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) { case AMDGPUSubtarget::SOUTHERN_ISLANDS: // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states // ("S_NOP 3") on SI TII->insertWaitStates(MI, 4); break; case AMDGPUSubtarget::SEA_ISLANDS: break; default: // VOLCANIC_ISLANDS and later // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states // ("S_NOP 4") on VI and later. This also applies to VALUs which write // VCC, but we're unlikely to see VMEM use VCC. TII->insertWaitStates(MI, 5); } MI->eraseFromParent(); break; } // VGPR register spill case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index) + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); break; case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index) + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); break; } default: { int64_t Offset = FrameInfo->getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); } } } }
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineOperand &FIOp = MI->getOperand(FIOperandNum); int Index = MI->getOperand(FIOperandNum).getIndex(); switch (MI->getOpcode()) { // SGPR register spill case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(SuperReg, &AMDGPU::SGPR_32RegClass, i); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(IsKill)) .addImm(Spill.Lane); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } else { // Spill SGPR to a frame index. // FIXME we should use S_STORE_DWORD here for VI. MachineInstrBuilder Mov = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addReg(SubReg, SubKillState); // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { // The last implicit use of the SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; if (i + 1 == e) SuperKillState |= getKillRegState(IsKill); Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } unsigned Size = FrameInfo->getObjectSize(Index); unsigned Align = FrameInfo->getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(i * 4) // offset .addMemOperand(MMO); } } MI->eraseFromParent(); break; } // SGPR register restore case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), &AMDGPU::SGPR_32RegClass, i); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned Align = FrameInfo->getObjectAlignment(Index); unsigned Size = FrameInfo->getObjectSize(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(i * 4) // offset .addMemOperand(MMO); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } MI->eraseFromParent(); break; } // VGPR register spill case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::src), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index) + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); break; case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::dst), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index) + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); break; } default: { int64_t Offset = FrameInfo->getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); } } } }