MachineBasicBlock::iterator SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, unsigned EltSize, SmallVectorImpl<MachineInstr*> &InstsToMove) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; ++MBBI; SmallVector<const MachineOperand *, 8> DefsToMove; addDefsToList(*I, DefsToMove); for ( ; MBBI != E; ++MBBI) { if (MBBI->getOpcode() != I->getOpcode()) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will // be merged into. if (MBBI->hasUnmodeledSideEffects()) // We can't re-order this instruction with respect to other memory // opeations, so we fail both conditions mentioned above. return E; if (MBBI->mayLoadOrStore() && !TII->areMemAccessesTriviallyDisjoint(*I, *MBBI, AA)) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); continue; } // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. for (const MachineOperand *Def : DefsToMove) { bool ReadDef = MBBI->readsVirtualRegister(Def->getReg()); // If ReadDef is true, then there is a use of Def between I // and the instruction that I will potentially be merged with. We // will need to move this instruction after the merged instructions. if (ReadDef) { InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); break; } } continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) return E; int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr); const MachineOperand &AddrReg0 = I->getOperand(AddrIdx); const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx); // Check same base pointer. Be careful of subregisters, which can occur with // vectors of pointers. if (AddrReg0.getReg() == AddrReg1.getReg() && AddrReg0.getSubReg() == AddrReg1.getSubReg()) { int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset); unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff; unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff; // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. if (offsetsCanBeCombined(Offset0, Offset1, EltSize) && canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA)) return MBBI; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // FIXME: This is too conservative. break; } return E; }
bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); MachineBasicBlock::iterator E = MBB->end(); MachineBasicBlock::iterator MBBI = CI.I; const unsigned Opc = CI.I->getOpcode(); const InstClassEnum InstClass = getInstClass(Opc); if (InstClass == UNKNOWN) { return false; } const unsigned Regs = getRegs(Opc); unsigned AddrOpName[5] = {0}; int AddrIdx[5]; const MachineOperand *AddrReg[5]; unsigned NumAddresses = 0; if (Regs & ADDR) { AddrOpName[NumAddresses++] = AMDGPU::OpName::addr; } if (Regs & SBASE) { AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase; } if (Regs & SRSRC) { AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc; } if (Regs & SOFFSET) { AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; } if (Regs & VADDR) { AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; } for (unsigned i = 0; i < NumAddresses; i++) { AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]); AddrReg[i] = &CI.I->getOperand(AddrIdx[i]); // We only ever merge operations with the same base address register, so // don't bother scanning forward if there are no other uses. if (AddrReg[i]->isReg() && (TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) || MRI->hasOneNonDBGUse(AddrReg[i]->getReg()))) return false; } ++MBBI; DenseSet<unsigned> RegDefsToMove; DenseSet<unsigned> PhysRegUsesToMove; addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove); for (; MBBI != E; ++MBBI) { const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE); if ((getInstClass(MBBI->getOpcode()) != InstClass) || (IsDS && (MBBI->getOpcode() != Opc))) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will // be merged into. if (MBBI->hasUnmodeledSideEffects()) { // We can't re-order this instruction with respect to other memory // operations, so we fail both conditions mentioned above. return false; } if (MBBI->mayLoadOrStore() && (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. CI.InstsToMove.push_back(&*MBBI); addDefsUsesToList(*MBBI, RegDefsToMove, PhysRegUsesToMove); continue; } // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove, CI.InstsToMove); continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) return false; // Handle a case like // DS_WRITE_B32 addr, v, idx0 // w = DS_READ_B32 addr, idx0 // DS_WRITE_B32 addr, f(w), idx1 // where the DS_READ_B32 ends up in InstsToMove and therefore prevents // merging of the two writes. if (addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove, CI.InstsToMove)) continue; bool Match = true; for (unsigned i = 0; i < NumAddresses; i++) { const MachineOperand &AddrRegNext = MBBI->getOperand(AddrIdx[i]); if (AddrReg[i]->isImm() || AddrRegNext.isImm()) { if (AddrReg[i]->isImm() != AddrRegNext.isImm() || AddrReg[i]->getImm() != AddrRegNext.getImm()) { Match = false; break; } continue; } // Check same base pointer. Be careful of subregisters, which can occur // with vectors of pointers. if (AddrReg[i]->getReg() != AddrRegNext.getReg() || AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) { Match = false; break; } } if (Match) { int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm(); CI.Width0 = getOpcodeWidth(*CI.I); CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm(); CI.Width1 = getOpcodeWidth(*MBBI); CI.Paired = MBBI; if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) { CI.Offset0 &= 0xffff; CI.Offset1 &= 0xffff; } else { CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm(); CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm(); if (CI.InstClass != S_BUFFER_LOAD_IMM) { CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); } } // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI)) if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) return true; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // check if we can move I across MBBI and if we can move all I's users if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) break; } return false; }
bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) { MachineBasicBlock::iterator E = CI.I->getParent()->end(); MachineBasicBlock::iterator MBBI = CI.I; ++MBBI; SmallVector<const MachineOperand *, 8> DefsToMove; addDefsToList(*CI.I, DefsToMove); for ( ; MBBI != E; ++MBBI) { if (MBBI->getOpcode() != CI.I->getOpcode()) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will // be merged into. if (MBBI->hasUnmodeledSideEffects()) // We can't re-order this instruction with respect to other memory // opeations, so we fail both conditions mentioned above. return false; if (MBBI->mayLoadOrStore() && !memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA)) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. CI.InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); continue; } // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove); continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) return false; // Handle a case like // DS_WRITE_B32 addr, v, idx0 // w = DS_READ_B32 addr, idx0 // DS_WRITE_B32 addr, f(w), idx1 // where the DS_READ_B32 ends up in InstsToMove and therefore prevents // merging of the two writes. if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove)) continue; int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::addr); const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx); const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx); // Check same base pointer. Be careful of subregisters, which can occur with // vectors of pointers. if (AddrReg0.getReg() == AddrReg1.getReg() && AddrReg0.getSubReg() == AddrReg1.getSubReg()) { int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm() & 0xffff; CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff; CI.Paired = MBBI; // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. if (offsetsCanBeCombined(CI)) if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) return true; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // check if we can move I across MBBI and if we can move all I's users if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) break; } return false; }