MachineBasicBlock::iterator SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, unsigned EltSize, SmallVectorImpl<MachineInstr*> &InstsToMove) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; ++MBBI; SmallVector<const MachineOperand *, 8> DefsToMove; addDefsToList(*I, DefsToMove); for ( ; MBBI != E; ++MBBI) { if (MBBI->getOpcode() != I->getOpcode()) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will // be merged into. if (MBBI->hasUnmodeledSideEffects()) // We can't re-order this instruction with respect to other memory // opeations, so we fail both conditions mentioned above. return E; if (MBBI->mayLoadOrStore() && !TII->areMemAccessesTriviallyDisjoint(*I, *MBBI, AA)) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); continue; } // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. for (const MachineOperand *Def : DefsToMove) { bool ReadDef = MBBI->readsVirtualRegister(Def->getReg()); // If ReadDef is true, then there is a use of Def between I // and the instruction that I will potentially be merged with. We // will need to move this instruction after the merged instructions. if (ReadDef) { InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); break; } } continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) return E; int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr); const MachineOperand &AddrReg0 = I->getOperand(AddrIdx); const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx); // Check same base pointer. Be careful of subregisters, which can occur with // vectors of pointers. if (AddrReg0.getReg() == AddrReg1.getReg() && AddrReg0.getSubReg() == AddrReg1.getSubReg()) { int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset); unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff; unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff; // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. if (offsetsCanBeCombined(Offset0, Offset1, EltSize) && canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA)) return MBBI; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // FIXME: This is too conservative. break; } return E; }