/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add /// implicit defs to a machine instruction if there was an earlier def of its /// super-register. void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { MachineInstr *LastDef = PhysRegDef[Reg]; // If there was a previous use or a "full" def all is well. if (!LastDef && !PhysRegUse[Reg]) { // Otherwise, the last sub-register def implicitly defines this register. // e.g. // AH = // AL = ... <imp-def EAX>, <imp-kill AH> // = AH // ... // = EAX // All of the sub-registers must have been defined before the use of Reg! SmallSet<unsigned, 4> PartDefRegs; MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs); // If LastPartialDef is NULL, it must be using a livein register. if (LastPartialDef) { LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, true/*IsImp*/)); PhysRegDef[Reg] = LastPartialDef; SmallSet<unsigned, 8> Processed; for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { if (Processed.count(SubReg)) continue; if (PartDefRegs.count(SubReg)) continue; // This part of Reg was defined before the last partial def. It's killed // here. LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg, false/*IsDef*/, true/*IsImp*/)); PhysRegDef[SubReg] = LastPartialDef; for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) Processed.insert(*SS); } } } else if (LastDef && !PhysRegUse[Reg] && !LastDef->findRegisterDefOperand(Reg)) // Last def defines the super register, add an implicit def of reg. LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, true/*IsImp*/)); // Remember this use. PhysRegUse[Reg] = MI; for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) PhysRegUse[SubReg] = MI; }
// Get the subreg type that is most likely to be coalesced // for an SPR register that will be used in VDUP32d pseudo. unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { if (!TRI->isVirtualRegister(SReg)) return getDPRLaneFromSPR(SReg); MachineInstr *MI = MRI->getVRegDef(SReg); if (!MI) return ARM::ssub_0; MachineOperand *MO = MI->findRegisterDefOperand(SReg); assert(MO->isReg() && "Non-register operand found!"); if (!MO) return ARM::ssub_0; if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) { SReg = MI->getOperand(1).getReg(); } if (TargetRegisterInfo::isVirtualRegister(SReg)) { if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; return ARM::ssub_0; } return getDPRLaneFromSPR(SReg); }
bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; if (!LastDef && !LastUse) return false; MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; // The whole register is used. // AL = // AH = // // = AX // = AL, AX<imp-use, kill> // AX = // // Or whole register is defined, but not used at all. // AX<dead> = // ... // AX = // // Or whole register is defined, but only partly used. // AX<dead> = AL<imp-def> // = AL<kill> // AX = MachineInstr *LastPartDef = nullptr; unsigned LastPartDefDist = 0; SmallSet<unsigned, 8> PartUses; for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { // There was a def of this sub-register in between. This is a partial // def, keep track of the last one. unsigned Dist = DistanceMap[Def]; if (Dist > LastPartDefDist) { LastPartDefDist = Dist; LastPartDef = Def; } continue; } if (MachineInstr *Use = PhysRegUse[SubReg]) { for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid(); ++SS) PartUses.insert(*SS); unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { LastRefOrPartRefDist = Dist; LastRefOrPartRef = Use; } } } if (!PhysRegUse[Reg]) { // Partial uses. Mark register def dead and add implicit def of // sub-registers which are used. // EAX<dead> = op AL<imp-def> // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; if (!PartUses.count(SubReg)) continue; bool NeedDef = true; if (PhysRegDef[Reg] == PhysRegDef[SubReg]) { MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg); if (MO) { NeedDef = false; assert(!MO->isDead()); } } if (NeedDef) PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, true/*IsDef*/, true/*IsImp*/)); MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg); if (LastSubRef) LastSubRef->addRegisterKilled(SubReg, TRI, true); else { LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid(); ++SS) PhysRegUse[*SS] = LastRefOrPartRef; } for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) PartUses.erase(*SS); } } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { if (LastPartDef) // The last partial def kills the register. LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, true/*IsImp*/, true/*IsKill*/)); else { MachineOperand *MO = LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; // If the last reference is the last def, then it's not used at all. // That is, unless we are currently processing the last reference itself. LastRefOrPartRef->addRegisterDead(Reg, TRI, true); if (NeedEC) { // If we are adding a subreg def and the superreg def is marked early // clobber, add an early clobber marker to the subreg def. MO = LastRefOrPartRef->findRegisterDefOperand(Reg); if (MO) MO->setIsEarlyClobber(); } } } else LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); return true; }
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { MachineFunction * MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock::iterator I = *MI; switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::TGID_X: addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X); break; case AMDGPU::TGID_Y: addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y); break; case AMDGPU::TGID_Z: addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z); break; case AMDGPU::TIDIG_X: addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X); break; case AMDGPU::TIDIG_Y: addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y); break; case AMDGPU::TIDIG_Z: addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z); break; case AMDGPU::NGROUPS_X: lowerImplicitParameter(MI, *BB, MRI, 0); break; case AMDGPU::NGROUPS_Y: lowerImplicitParameter(MI, *BB, MRI, 1); break; case AMDGPU::NGROUPS_Z: lowerImplicitParameter(MI, *BB, MRI, 2); break; case AMDGPU::GLOBAL_SIZE_X: lowerImplicitParameter(MI, *BB, MRI, 3); break; case AMDGPU::GLOBAL_SIZE_Y: lowerImplicitParameter(MI, *BB, MRI, 4); break; case AMDGPU::GLOBAL_SIZE_Z: lowerImplicitParameter(MI, *BB, MRI, 5); break; case AMDGPU::LOCAL_SIZE_X: lowerImplicitParameter(MI, *BB, MRI, 6); break; case AMDGPU::LOCAL_SIZE_Y: lowerImplicitParameter(MI, *BB, MRI, 7); break; case AMDGPU::LOCAL_SIZE_Z: lowerImplicitParameter(MI, *BB, MRI, 8); break; case AMDGPU::CLAMP_R600: MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)); break; case AMDGPU::FABS_R600: MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)); break; case AMDGPU::FNEG_R600: MI->getOperand(1).addTargetFlag(MO_FLAG_NEG); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)); break; case AMDGPU::R600_LOAD_CONST: { int64_t RegIndex = MI->getOperand(1).getImm(); unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) .addOperand(MI->getOperand(0)) .addReg(ConstantReg); break; } case AMDGPU::LOAD_INPUT: { int64_t RegIndex = MI->getOperand(1).getImm(); addLiveIn(MI, MF, MRI, TII, AMDGPU::R600_TReg32RegClass.getRegister(RegIndex)); break; } case AMDGPU::MASK_WRITE: { unsigned maskedRegister = MI->getOperand(0).getReg(); assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); def->addTargetFlag(MO_FLAG_MASK); // Return early so the instruction is not erased return BB; } case AMDGPU::RAT_WRITE_CACHELESS_eg: { // Convert to DWORD address unsigned NewAddr = MRI.createVirtualRegister( AMDGPU::R600_TReg32_XRegisterClass); unsigned ShiftValue = MRI.createVirtualRegister( AMDGPU::R600_TReg32RegisterClass); // XXX In theory, we should be able to pass ShiftValue directly to // the LSHR_eg instruction as an inline literal, but I tried doing it // this way and it didn't produce the correct results. BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) .addReg(AMDGPU::ALU_LITERAL_X) .addImm(2); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) .addOperand(MI->getOperand(1)) .addReg(ShiftValue); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) .addOperand(MI->getOperand(0)) .addReg(NewAddr); break; } case AMDGPU::STORE_OUTPUT: { int64_t OutputIndex = MI->getOperand(1).getImm(); unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg) .addOperand(MI->getOperand(0)); if (!MRI.isLiveOut(OutputReg)) { MRI.addLiveOut(OutputReg); } break; } case AMDGPU::RESERVE_REG: { R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); int64_t ReservedIndex = MI->getOperand(0).getImm(); unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); MFI->ReservedRegs.push_back(ReservedReg); break; } case AMDGPU::TXD: { unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) .addOperand(MI->getOperand(3)) .addOperand(MI->getOperand(4)) .addOperand(MI->getOperand(5)); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) .addOperand(MI->getOperand(2)) .addOperand(MI->getOperand(4)) .addOperand(MI->getOperand(5)); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addOperand(MI->getOperand(4)) .addOperand(MI->getOperand(5)) .addReg(t0, RegState::Implicit) .addReg(t1, RegState::Implicit); break; } case AMDGPU::TXD_SHADOW: { unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) .addOperand(MI->getOperand(3)) .addOperand(MI->getOperand(4)) .addOperand(MI->getOperand(5)); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) .addOperand(MI->getOperand(2)) .addOperand(MI->getOperand(4)) .addOperand(MI->getOperand(5)); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addOperand(MI->getOperand(4)) .addOperand(MI->getOperand(5)) .addReg(t0, RegState::Implicit) .addReg(t1, RegState::Implicit); break; } } MI->eraseFromParent(); return BB; }
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *BundleMI = 0; CPSRDef = 0; HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. for (MachineBasicBlock::pred_iterator I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. continue; } if (PInfo.HighLatencyCPSR) { HighLatencyCPSR = true; break; } } // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; if (MI->isBundle()) { BundleMI = MI; continue; } if (MI->isDebugValue()) continue; LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); // Does NextMII belong to the same bundle as MI? bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; // Removing and reinserting the first instruction in a bundle will break // up the bundle. Fix the bundling if it was broken. if (NextInSameBundle && !NextMII->isBundledWithPred()) NextMII->bundleWithPred(); } if (!NextInSameBundle && MI->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. if (BundleMI->killsRegister(ARM::CPSR)) LiveCPSR = false; MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); if (MO && !MO->isDead()) LiveCPSR = true; } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; HighLatencyCPSR = false; IsSelfLoop = false; } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); IsSelfLoop = false; } } MBBInfo &Info = BlockInfo[MBB.getNumber()]; Info.HighLatencyCPSR = HighLatencyCPSR; Info.Visited = true; return Modified; }
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; MachineInstr *BundleMI = 0; // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; if (MI->isBundle()) { BundleMI = MI; continue; } LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); unsigned Opcode = MI->getOpcode(); DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); if (OPI != ReduceOpcodeMap.end()) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } goto ProcessNext; } // Try to transform to a 16-bit two-address instruction. if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; goto ProcessNext; } // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } } ProcessNext: if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. if (BundleMI->killsRegister(ARM::CPSR)) LiveCPSR = false; MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); if (MO && !MO->isDead()) LiveCPSR = true; } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; IsSelfLoop = false; } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; IsSelfLoop = false; } } return Modified; }