MachineBasicBlock::iterator Filler::findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot) { SmallSet<unsigned, 32> RegDefs; SmallSet<unsigned, 32> RegUses; bool sawLoad = false; bool sawStore = false; if (slot == MBB.begin()) return MBB.end(); if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL) return MBB.end(); if (slot->getOpcode() == SP::RETL) { MachineBasicBlock::iterator J = slot; --J; if (J->getOpcode() == SP::RESTORErr || J->getOpcode() == SP::RESTOREri) { // change retl to ret. slot->setDesc(TM.getInstrInfo()->get(SP::RET)); return J; } } // Call's delay filler can def some of call's uses. if (slot->isCall()) insertCallDefsUses(slot, RegDefs, RegUses); else insertDefsUses(slot, RegDefs, RegUses); bool done = false; MachineBasicBlock::iterator I = slot; while (!done) { done = (I == MBB.begin()); if (!done) --I; // skip debug value if (I->isDebugValue()) continue; if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() || I->hasDelaySlot() || I->isBundledWithSucc()) break; if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) { insertDefsUses(I, RegDefs, RegUses); continue; } return I; } return MBB.end(); }
MachineBasicBlock::iterator Filler::findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot) { SmallSet<unsigned, 32> RegDefs; SmallSet<unsigned, 32> RegUses; bool sawLoad = false; bool sawStore = false; MachineBasicBlock::iterator I = slot; if (slot->getOpcode() == SP::RET) return MBB.end(); if (slot->getOpcode() == SP::RETL) { --I; if (I->getOpcode() != SP::RESTORErr) return MBB.end(); //change retl to ret slot->setDesc(TII->get(SP::RET)); return I; } //Call's delay filler can def some of call's uses. if (slot->isCall()) insertCallUses(slot, RegUses); else insertDefsUses(slot, RegDefs, RegUses); bool done = false; while (!done) { done = (I == MBB.begin()); if (!done) --I; // skip debug value if (I->isDebugValue()) continue; if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isLabel() || I->hasDelaySlot() || isDelayFiller(MBB, I)) break; if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) { insertDefsUses(I, RegDefs, RegUses); continue; } return I; } return MBB.end(); }
static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) { if (!I->hasUnmodeledSideEffects()) return false; unsigned op = I->getOpcode(); if (op == MBlaze::ADDK || op == MBlaze::ADDIK || op == MBlaze::ADDC || op == MBlaze::ADDIC || op == MBlaze::ADDKC || op == MBlaze::ADDIKC || op == MBlaze::RSUBK || op == MBlaze::RSUBIK || op == MBlaze::RSUBC || op == MBlaze::RSUBIC || op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC) return false; return true; }
bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); MachineBasicBlock::iterator E = MBB->end(); MachineBasicBlock::iterator MBBI = CI.I; const unsigned Opc = CI.I->getOpcode(); const InstClassEnum InstClass = getInstClass(Opc); if (InstClass == UNKNOWN) { return false; } const unsigned Regs = getRegs(Opc); unsigned AddrOpName[5] = {0}; int AddrIdx[5]; const MachineOperand *AddrReg[5]; unsigned NumAddresses = 0; if (Regs & ADDR) { AddrOpName[NumAddresses++] = AMDGPU::OpName::addr; } if (Regs & SBASE) { AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase; } if (Regs & SRSRC) { AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc; } if (Regs & SOFFSET) { AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; } if (Regs & VADDR) { AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; } for (unsigned i = 0; i < NumAddresses; i++) { AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]); AddrReg[i] = &CI.I->getOperand(AddrIdx[i]); // We only ever merge operations with the same base address register, so // don't bother scanning forward if there are no other uses. if (AddrReg[i]->isReg() && (TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) || MRI->hasOneNonDBGUse(AddrReg[i]->getReg()))) return false; } ++MBBI; DenseSet<unsigned> RegDefsToMove; DenseSet<unsigned> PhysRegUsesToMove; addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove); for (; MBBI != E; ++MBBI) { const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE); if ((getInstClass(MBBI->getOpcode()) != InstClass) || (IsDS && (MBBI->getOpcode() != Opc))) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will // be merged into. if (MBBI->hasUnmodeledSideEffects()) { // We can't re-order this instruction with respect to other memory // operations, so we fail both conditions mentioned above. return false; } if (MBBI->mayLoadOrStore() && (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. CI.InstsToMove.push_back(&*MBBI); addDefsUsesToList(*MBBI, RegDefsToMove, PhysRegUsesToMove); continue; } // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove, CI.InstsToMove); continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) return false; // Handle a case like // DS_WRITE_B32 addr, v, idx0 // w = DS_READ_B32 addr, idx0 // DS_WRITE_B32 addr, f(w), idx1 // where the DS_READ_B32 ends up in InstsToMove and therefore prevents // merging of the two writes. if (addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove, CI.InstsToMove)) continue; bool Match = true; for (unsigned i = 0; i < NumAddresses; i++) { const MachineOperand &AddrRegNext = MBBI->getOperand(AddrIdx[i]); if (AddrReg[i]->isImm() || AddrRegNext.isImm()) { if (AddrReg[i]->isImm() != AddrRegNext.isImm() || AddrReg[i]->getImm() != AddrRegNext.getImm()) { Match = false; break; } continue; } // Check same base pointer. Be careful of subregisters, which can occur // with vectors of pointers. if (AddrReg[i]->getReg() != AddrRegNext.getReg() || AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) { Match = false; break; } } if (Match) { int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm(); CI.Width0 = getOpcodeWidth(*CI.I); CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm(); CI.Width1 = getOpcodeWidth(*MBBI); CI.Paired = MBBI; if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) { CI.Offset0 &= 0xffff; CI.Offset1 &= 0xffff; } else { CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm(); CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm(); if (CI.InstClass != S_BUFFER_LOAD_IMM) { CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); } } // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI)) if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) return true; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // check if we can move I across MBBI and if we can move all I's users if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) break; } return false; }
MachineBasicBlock::iterator SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, unsigned EltSize, SmallVectorImpl<MachineInstr*> &InstsToMove) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; ++MBBI; SmallVector<const MachineOperand *, 8> DefsToMove; addDefsToList(*I, DefsToMove); for ( ; MBBI != E; ++MBBI) { if (MBBI->getOpcode() != I->getOpcode()) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will // be merged into. if (MBBI->hasUnmodeledSideEffects()) // We can't re-order this instruction with respect to other memory // opeations, so we fail both conditions mentioned above. return E; if (MBBI->mayLoadOrStore() && !TII->areMemAccessesTriviallyDisjoint(*I, *MBBI, AA)) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); continue; } // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. for (const MachineOperand *Def : DefsToMove) { bool ReadDef = MBBI->readsVirtualRegister(Def->getReg()); // If ReadDef is true, then there is a use of Def between I // and the instruction that I will potentially be merged with. We // will need to move this instruction after the merged instructions. if (ReadDef) { InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); break; } } continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) return E; int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr); const MachineOperand &AddrReg0 = I->getOperand(AddrIdx); const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx); // Check same base pointer. Be careful of subregisters, which can occur with // vectors of pointers. if (AddrReg0.getReg() == AddrReg1.getReg() && AddrReg0.getSubReg() == AddrReg1.getSubReg()) { int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset); unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff; unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff; // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. if (offsetsCanBeCombined(Offset0, Offset1, EltSize) && canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA)) return MBBI; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // FIXME: This is too conservative. break; } return E; }
bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) { MachineBasicBlock::iterator E = CI.I->getParent()->end(); MachineBasicBlock::iterator MBBI = CI.I; ++MBBI; SmallVector<const MachineOperand *, 8> DefsToMove; addDefsToList(*CI.I, DefsToMove); for ( ; MBBI != E; ++MBBI) { if (MBBI->getOpcode() != CI.I->getOpcode()) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will // be merged into. if (MBBI->hasUnmodeledSideEffects()) // We can't re-order this instruction with respect to other memory // opeations, so we fail both conditions mentioned above. return false; if (MBBI->mayLoadOrStore() && !memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA)) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. CI.InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); continue; } // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove); continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) return false; // Handle a case like // DS_WRITE_B32 addr, v, idx0 // w = DS_READ_B32 addr, idx0 // DS_WRITE_B32 addr, f(w), idx1 // where the DS_READ_B32 ends up in InstsToMove and therefore prevents // merging of the two writes. if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove)) continue; int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::addr); const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx); const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx); // Check same base pointer. Be careful of subregisters, which can occur with // vectors of pointers. if (AddrReg0.getReg() == AddrReg1.getReg() && AddrReg0.getSubReg() == AddrReg1.getSubReg()) { int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm() & 0xffff; CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff; CI.Paired = MBBI; // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. if (offsetsCanBeCombined(CI)) if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) return true; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // check if we can move I across MBBI and if we can move all I's users if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) break; } return false; }