static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, std::vector<FoldCandidate> &FoldList, SmallVectorImpl<MachineInstr *> &CopiesToReplace, const SIInstrInfo *TII, const SIRegisterInfo &TRI, MachineRegisterInfo &MRI) { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); // FIXME: Fold operands with subregs. if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || UseOp.isImplicit())) { return; } bool FoldingImm = OpToFold.isImm(); APInt Imm; if (FoldingImm) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI.getRegClass(UseReg) : TRI.getPhysRegClass(UseReg); Imm = APInt(64, OpToFold.getImm()); const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode()); const TargetRegisterClass *FoldRC = TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); // Split 64-bit constants into 32-bits for folding. if (FoldRC->getSize() == 8 && UseOp.getSubReg()) { if (UseRC->getSize() != 8) return; if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } } // In order to fold immediates into copies, we need to change the // copy to a MOV. if (UseMI->getOpcode() == AMDGPU::COPY) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI.getRegClass(DestReg) : TRI.getPhysRegClass(DestReg); unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) return; UseMI->setDesc(TII->get(MovOp)); CopiesToReplace.push_back(UseMI); } } // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); for (MachineRegisterInfo::use_iterator RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { MachineInstr *RSUseMI = RSUse->getParent(); if (RSUse->getSubReg() != RegSeqDstSubReg) continue; foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, CopiesToReplace, TII, TRI, MRI); } return; } const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[UseOpIdx].RegClass == -1) return; if (FoldingImm) { MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); return; } tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. return; }
void SIFoldOperands::foldOperand( MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl<FoldCandidate> &FoldList, SmallVectorImpl<MachineInstr *> &CopiesToReplace) const { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); if (!isUseSafeToFold(TII, *UseMI, UseOp)) return; // FIXME: Fold operands with subregs. if (UseOp.isReg() && OpToFold.isReg()) { if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister) return; // Don't fold subregister extracts into tied operands, only if it is a full // copy since a subregister use tied to a full register def doesn't really // make sense. e.g. don't fold: // // %vreg1 = COPY %vreg0:sub1 // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0> // // into // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0> if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) return; } // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. if (UseMI->isRegSequence()) { unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); for (MachineRegisterInfo::use_iterator RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end(); RSUse != RSE; ++RSUse) { MachineInstr *RSUseMI = RSUse->getParent(); if (RSUse->getSubReg() != RegSeqDstSubReg) continue; foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, CopiesToReplace); } return; } bool FoldingImm = OpToFold.isImm(); // In order to fold immediates into copies, we need to change the // copy to a MOV. if (FoldingImm && UseMI->isCopy()) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI->getRegClass(DestReg) : TRI->getPhysRegClass(DestReg); unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) return; UseMI->setDesc(TII->get(MovOp)); CopiesToReplace.push_back(UseMI); } else { const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[UseOpIdx].RegClass == -1) return; } if (!FoldingImm) { tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. return; } const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc(); const TargetRegisterClass *FoldRC = TRI->getRegClass(FoldDesc.OpInfo[0].RegClass); // Split 64-bit constants into 32-bits for folding. if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI->getRegClass(UseReg) : TRI->getPhysRegClass(UseReg); if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64) return; APInt Imm(64, OpToFold.getImm()); if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); return; } tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); }