MachineBasicBlock::iterator
SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
        unsigned EltSize) {
    MachineBasicBlock::iterator E = I->getParent()->end();
    MachineBasicBlock::iterator MBBI = I;
    ++MBBI;

    if (MBBI->getOpcode() != I->getOpcode())
        return E;

    // Don't merge volatiles.
    if (MBBI->hasOrderedMemoryRef())
        return E;

    int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
    const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
    const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);

    // Check same base pointer. Be careful of subregisters, which can occur with
    // vectors of pointers.
    if (AddrReg0.getReg() == AddrReg1.getReg() &&
            AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
        int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
                        AMDGPU::OpName::offset);
        unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
        unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;

        // Check both offsets fit in the reduced range.
        if (offsetsCanBeCombined(Offset0, Offset1, EltSize))
            return MBBI;
    }

    return E;
}
bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
  MachineBasicBlock *MBB = CI.I->getParent();
  MachineBasicBlock::iterator E = MBB->end();
  MachineBasicBlock::iterator MBBI = CI.I;

  const unsigned Opc = CI.I->getOpcode();
  const InstClassEnum InstClass = getInstClass(Opc);

  if (InstClass == UNKNOWN) {
    return false;
  }

  const unsigned Regs = getRegs(Opc);

  unsigned AddrOpName[5] = {0};
  int AddrIdx[5];
  const MachineOperand *AddrReg[5];
  unsigned NumAddresses = 0;

  if (Regs & ADDR) {
    AddrOpName[NumAddresses++] = AMDGPU::OpName::addr;
  }

  if (Regs & SBASE) {
    AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase;
  }

  if (Regs & SRSRC) {
    AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
  }

  if (Regs & SOFFSET) {
    AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
  }

  if (Regs & VADDR) {
    AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
  }

  for (unsigned i = 0; i < NumAddresses; i++) {
    AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]);
    AddrReg[i] = &CI.I->getOperand(AddrIdx[i]);

    // We only ever merge operations with the same base address register, so
    // don't bother scanning forward if there are no other uses.
    if (AddrReg[i]->isReg() &&
        (TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) ||
         MRI->hasOneNonDBGUse(AddrReg[i]->getReg())))
      return false;
  }

  ++MBBI;

  DenseSet<unsigned> RegDefsToMove;
  DenseSet<unsigned> PhysRegUsesToMove;
  addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);

  for (; MBBI != E; ++MBBI) {
    const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE);

    if ((getInstClass(MBBI->getOpcode()) != InstClass) ||
        (IsDS && (MBBI->getOpcode() != Opc))) {
      // This is not a matching DS instruction, but we can keep looking as
      // long as one of these conditions are met:
      // 1. It is safe to move I down past MBBI.
      // 2. It is safe to move MBBI down past the instruction that I will
      //    be merged into.

      if (MBBI->hasUnmodeledSideEffects()) {
        // We can't re-order this instruction with respect to other memory
        // operations, so we fail both conditions mentioned above.
        return false;
      }

      if (MBBI->mayLoadOrStore() &&
          (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
           !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
        // We fail condition #1, but we may still be able to satisfy condition
        // #2.  Add this instruction to the move list and then we will check
        // if condition #2 holds once we have selected the matching instruction.
        CI.InstsToMove.push_back(&*MBBI);
        addDefsUsesToList(*MBBI, RegDefsToMove, PhysRegUsesToMove);
        continue;
      }

      // When we match I with another DS instruction we will be moving I down
      // to the location of the matched instruction any uses of I will need to
      // be moved down as well.
      addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove,
                            CI.InstsToMove);
      continue;
    }

    // Don't merge volatiles.
    if (MBBI->hasOrderedMemoryRef())
      return false;

    // Handle a case like
    //   DS_WRITE_B32 addr, v, idx0
    //   w = DS_READ_B32 addr, idx0
    //   DS_WRITE_B32 addr, f(w), idx1
    // where the DS_READ_B32 ends up in InstsToMove and therefore prevents
    // merging of the two writes.
    if (addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove,
                              CI.InstsToMove))
      continue;

    bool Match = true;
    for (unsigned i = 0; i < NumAddresses; i++) {
      const MachineOperand &AddrRegNext = MBBI->getOperand(AddrIdx[i]);

      if (AddrReg[i]->isImm() || AddrRegNext.isImm()) {
        if (AddrReg[i]->isImm() != AddrRegNext.isImm() ||
            AddrReg[i]->getImm() != AddrRegNext.getImm()) {
          Match = false;
          break;
        }
        continue;
      }

      // Check same base pointer. Be careful of subregisters, which can occur
      // with vectors of pointers.
      if (AddrReg[i]->getReg() != AddrRegNext.getReg() ||
          AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) {
        Match = false;
        break;
      }
    }

    if (Match) {
      int OffsetIdx =
          AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset);
      CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm();
      CI.Width0 = getOpcodeWidth(*CI.I);
      CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm();
      CI.Width1 = getOpcodeWidth(*MBBI);
      CI.Paired = MBBI;

      if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) {
        CI.Offset0 &= 0xffff;
        CI.Offset1 &= 0xffff;
      } else {
        CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm();
        CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm();
        if (CI.InstClass != S_BUFFER_LOAD_IMM) {
          CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
          CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
        }
      }

      // Check both offsets fit in the reduced range.
      // We also need to go through the list of instructions that we plan to
      // move and make sure they are all safe to move down past the merged
      // instruction.
      if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
        if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
          return true;
    }

    // We've found a load/store that we couldn't merge for some reason.
    // We could potentially keep looking, but we'd need to make sure that
    // it was safe to move I and also all the instruction in InstsToMove
    // down past this instruction.
    // check if we can move I across MBBI and if we can move all I's users
    if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
        !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
      break;
  }
  return false;
}
MachineBasicBlock::iterator
SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
                                  unsigned EltSize,
                                  SmallVectorImpl<MachineInstr*> &InstsToMove) {
  MachineBasicBlock::iterator E = I->getParent()->end();
  MachineBasicBlock::iterator MBBI = I;
  ++MBBI;

  SmallVector<const MachineOperand *, 8> DefsToMove;
  addDefsToList(*I, DefsToMove);

  for ( ; MBBI != E; ++MBBI) {

    if (MBBI->getOpcode() != I->getOpcode()) {

      // This is not a matching DS instruction, but we can keep looking as
      // long as one of these conditions are met:
      // 1. It is safe to move I down past MBBI.
      // 2. It is safe to move MBBI down past the instruction that I will
      //    be merged into.

      if (MBBI->hasUnmodeledSideEffects())
        // We can't re-order this instruction with respect to other memory
        // opeations, so we fail both conditions mentioned above.
        return E;

      if (MBBI->mayLoadOrStore() &&
          !TII->areMemAccessesTriviallyDisjoint(*I, *MBBI, AA)) {
        // We fail condition #1, but we may still be able to satisfy condition
        // #2.  Add this instruction to the move list and then we will check
        // if condition #2 holds once we have selected the matching instruction.
        InstsToMove.push_back(&*MBBI);
        addDefsToList(*MBBI, DefsToMove);
        continue;
      }

      // When we match I with another DS instruction we will be moving I down
      // to the location of the matched instruction any uses of I will need to
      // be moved down as well.
      for (const MachineOperand *Def : DefsToMove) {
        bool ReadDef = MBBI->readsVirtualRegister(Def->getReg());
        // If ReadDef is true, then there is a use of Def between I
        // and the instruction that I will potentially be merged with. We
        // will need to move this instruction after the merged instructions.
        if (ReadDef) {
          InstsToMove.push_back(&*MBBI);
          addDefsToList(*MBBI, DefsToMove);
          break;
        }
      }
      continue;
    }

    // Don't merge volatiles.
    if (MBBI->hasOrderedMemoryRef())
      return E;

    int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
    const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
    const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);

    // Check same base pointer. Be careful of subregisters, which can occur with
    // vectors of pointers.
    if (AddrReg0.getReg() == AddrReg1.getReg() &&
        AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
      int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
                                                 AMDGPU::OpName::offset);
      unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
      unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;

      // Check both offsets fit in the reduced range.
      // We also need to go through the list of instructions that we plan to
      // move and make sure they are all safe to move down past the merged
      // instruction.
      if (offsetsCanBeCombined(Offset0, Offset1, EltSize) &&
          canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA))
        return MBBI;
    }

    // We've found a load/store that we couldn't merge for some reason.
    // We could potentially keep looking, but we'd need to make sure that
    // it was safe to move I and also all the instruction in InstsToMove
    // down past this instruction.
    // FIXME: This is too conservative.
    break;
  }
  return E;
}
示例#4
0
bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
  MachineBasicBlock::iterator E = CI.I->getParent()->end();
  MachineBasicBlock::iterator MBBI = CI.I;
  ++MBBI;

  SmallVector<const MachineOperand *, 8> DefsToMove;
  addDefsToList(*CI.I, DefsToMove);

  for ( ; MBBI != E; ++MBBI) {
    if (MBBI->getOpcode() != CI.I->getOpcode()) {
      // This is not a matching DS instruction, but we can keep looking as
      // long as one of these conditions are met:
      // 1. It is safe to move I down past MBBI.
      // 2. It is safe to move MBBI down past the instruction that I will
      //    be merged into.

      if (MBBI->hasUnmodeledSideEffects())
        // We can't re-order this instruction with respect to other memory
        // opeations, so we fail both conditions mentioned above.
        return false;

      if (MBBI->mayLoadOrStore() &&
        !memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA)) {
        // We fail condition #1, but we may still be able to satisfy condition
        // #2.  Add this instruction to the move list and then we will check
        // if condition #2 holds once we have selected the matching instruction.
        CI.InstsToMove.push_back(&*MBBI);
        addDefsToList(*MBBI, DefsToMove);
        continue;
      }

      // When we match I with another DS instruction we will be moving I down
      // to the location of the matched instruction any uses of I will need to
      // be moved down as well.
      addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove);
      continue;
    }

    // Don't merge volatiles.
    if (MBBI->hasOrderedMemoryRef())
      return false;

    // Handle a case like
    //   DS_WRITE_B32 addr, v, idx0
    //   w = DS_READ_B32 addr, idx0
    //   DS_WRITE_B32 addr, f(w), idx1
    // where the DS_READ_B32 ends up in InstsToMove and therefore prevents
    // merging of the two writes.
    if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove))
      continue;

    int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
                                             AMDGPU::OpName::addr);
    const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
    const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);

    // Check same base pointer. Be careful of subregisters, which can occur with
    // vectors of pointers.
    if (AddrReg0.getReg() == AddrReg1.getReg() &&
        AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
      int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
                                                 AMDGPU::OpName::offset);
      CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm() & 0xffff;
      CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
      CI.Paired = MBBI;

      // Check both offsets fit in the reduced range.
      // We also need to go through the list of instructions that we plan to
      // move and make sure they are all safe to move down past the merged
      // instruction.
      if (offsetsCanBeCombined(CI))
        if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
          return true;
    }

    // We've found a load/store that we couldn't merge for some reason.
    // We could potentially keep looking, but we'd need to make sure that
    // it was safe to move I and also all the instruction in InstsToMove
    // down past this instruction.
    // check if we can move I across MBBI and if we can move all I's users
    if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
      !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
      break;
  }
  return false;
}