Example #1
0
void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
                                           unsigned LoadStoreOp,
                                           unsigned Value,
                                           unsigned ScratchRsrcReg,
                                           unsigned ScratchOffset,
                                           int64_t Offset,
                                           RegScavenger *RS) const {

  MachineBasicBlock *MBB = MI->getParent();
  const MachineFunction *MF = MI->getParent()->getParent();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
  LLVMContext &Ctx = MF->getFunction()->getContext();
  DebugLoc DL = MI->getDebugLoc();
  bool IsLoad = TII->get(LoadStoreOp).mayLoad();

  bool RanOutOfSGPRs = false;
  unsigned SOffset = ScratchOffset;

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned Size = NumSubRegs * 4;

  if (!isUInt<12>(Offset + Size)) {
    SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
    if (SOffset == AMDGPU::NoRegister) {
      RanOutOfSGPRs = true;
      SOffset = AMDGPU::SGPR0;
    }
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
            .addReg(ScratchOffset)
            .addImm(Offset);
    Offset = 0;
  }

  if (RanOutOfSGPRs)
    Ctx.emitError("Ran out of SGPRs for spilling VGPRS");

  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
    unsigned SubReg = NumSubRegs > 1 ?
        getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
        Value;

    BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
      .addReg(SubReg, getDefRegState(IsLoad))
      .addReg(ScratchRsrcReg)
      .addReg(SOffset)
      .addImm(Offset)
      .addImm(0) // glc
      .addImm(0) // slc
      .addImm(0) // tfe
      .addReg(Value, RegState::Implicit | getDefRegState(IsLoad))
      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
  }
}
Example #2
0
MachineInstrBuilder
MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
                                  MachineBasicBlock::iterator I) const {
  MachineInstrBuilder MIB;

  // Certain branches have two forms: e.g beq $1, $zero, dst vs beqz $1, dest
  // Pick the zero form of the branch for readable assembly and for greater
  // branch distance in non-microMIPS mode.
  // FIXME: Certain atomic sequences on mips64 generate 32bit references to
  // Mips::ZERO, which is incorrect. This test should be updated to use
  // Subtarget.getABI().GetZeroReg() when those atomic sequences and others
  // are fixed.
  bool BranchWithZeroOperand =
      (I->isBranch() && !I->isPseudo() && I->getOperand(1).isReg() &&
       (I->getOperand(1).getReg() == Mips::ZERO ||
        I->getOperand(1).getReg() == Mips::ZERO_64));

  if (BranchWithZeroOperand) {
    switch (NewOpc) {
    case Mips::BEQC:
      NewOpc = Mips::BEQZC;
      break;
    case Mips::BNEC:
      NewOpc = Mips::BNEZC;
      break;
    case Mips::BGEC:
      NewOpc = Mips::BGEZC;
      break;
    case Mips::BLTC:
      NewOpc = Mips::BLTZC;
      break;
    }
  }

  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc));

  // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an
  // immediate 0 as an operand and requires the removal of it's %RA<imp-def>
  // implicit operand as copying the implicit operations of the instructio we're
  // looking at will give us the correct flags.
  if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 ||
      NewOpc == Mips::JIALC64) {

    if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64)
      MIB->RemoveOperand(0);

    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      MIB.addOperand(I->getOperand(J));
    }

    MIB.addImm(0);

 } else if (BranchWithZeroOperand) {
    // For MIPSR6 and microMIPS branches with an explicit zero operand, copy
    // everything after the zero.
     MIB.addOperand(I->getOperand(0));

    for (unsigned J = 2, E = I->getDesc().getNumOperands(); J < E; ++J) {
      MIB.addOperand(I->getOperand(J));
    }
  } else {
    // All other cases copy all other operands.
    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      MIB.addOperand(I->getOperand(J));
    }
  }

  MIB.copyImplicitOps(*I);

  MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
  return MIB;
}
Example #3
0
MachineInstrBuilder
MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
                                  MachineBasicBlock::iterator I) const {
  MachineInstrBuilder MIB;

  // Certain branches have two forms: e.g beq $1, $zero, dest vs beqz $1, dest
  // Pick the zero form of the branch for readable assembly and for greater
  // branch distance in non-microMIPS mode.
  // Additional MIPSR6 does not permit the use of register $zero for compact
  // branches.
  // FIXME: Certain atomic sequences on mips64 generate 32bit references to
  // Mips::ZERO, which is incorrect. This test should be updated to use
  // Subtarget.getABI().GetZeroReg() when those atomic sequences and others
  // are fixed.
  int ZeroOperandPosition = -1;
  bool BranchWithZeroOperand = false;
  if (I->isBranch() && !I->isPseudo()) {
    auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo();
    ZeroOperandPosition = I->findRegisterUseOperandIdx(Mips::ZERO, false, TRI);
    BranchWithZeroOperand = ZeroOperandPosition != -1;
  }

  if (BranchWithZeroOperand) {
    switch (NewOpc) {
    case Mips::BEQC:
      NewOpc = Mips::BEQZC;
      break;
    case Mips::BNEC:
      NewOpc = Mips::BNEZC;
      break;
    case Mips::BGEC:
      NewOpc = Mips::BGEZC;
      break;
    case Mips::BLTC:
      NewOpc = Mips::BLTZC;
      break;
    case Mips::BEQC64:
      NewOpc = Mips::BEQZC64;
      break;
    case Mips::BNEC64:
      NewOpc = Mips::BNEZC64;
      break;
    }
  }

  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc));

  // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an
  // immediate 0 as an operand and requires the removal of it's %RA<imp-def>
  // implicit operand as copying the implicit operations of the instructio we're
  // looking at will give us the correct flags.
  if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 ||
      NewOpc == Mips::JIALC64) {

    if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64)
      MIB->RemoveOperand(0);

    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      MIB.add(I->getOperand(J));
    }

    MIB.addImm(0);

  } else {
    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
        continue;

      MIB.add(I->getOperand(J));
    }
  }

  MIB.copyImplicitOps(*I);

  MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
  return MIB;
}
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
    MachineBasicBlock::iterator I,
    MachineBasicBlock::iterator Paired,
    unsigned EltSize) {
    MachineBasicBlock *MBB = I->getParent();

    // Be sure to use .addOperand(), and not .addReg() with these. We want to be
    // sure we preserve the subregister index and any register flags set on them.
    const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
    const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
    const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
    const MachineOperand *Data1
        = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);


    unsigned Offset0
        = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
    unsigned Offset1
        = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;

    unsigned NewOffset0 = Offset0 / EltSize;
    unsigned NewOffset1 = Offset1 / EltSize;
    unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;

    // Prefer the st64 form if we can use it, even if we can fit the offset in the
    // non st64 version. I'm not sure if there's any real reason to do this.
    bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
    if (UseST64) {
        NewOffset0 /= 64;
        NewOffset1 /= 64;
        Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
    }

    assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
           (NewOffset0 != NewOffset1) &&
           "Computed offset doesn't fit");

    const MCInstrDesc &Write2Desc = TII->get(Opc);
    DebugLoc DL = I->getDebugLoc();

    MachineInstrBuilder Write2
        = BuildMI(*MBB, I, DL, Write2Desc)
          .addOperand(*Addr) // addr
          .addOperand(*Data0) // data0
          .addOperand(*Data1) // data1
          .addImm(NewOffset0) // offset0
          .addImm(NewOffset1) // offset1
          .addImm(0) // gds
          .addOperand(*M0Reg)  // m0
          .addMemOperand(*I->memoperands_begin())
          .addMemOperand(*Paired->memoperands_begin());

    // XXX - How do we express subregisters here?
    unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
                            M0Reg->getReg()
                          };

    LIS->RemoveMachineInstrFromMaps(I);
    LIS->RemoveMachineInstrFromMaps(Paired);
    I->eraseFromParent();
    Paired->eraseFromParent();

    LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);

    DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
    return Write2.getInstr();
}
MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
    MachineBasicBlock::iterator I,
    MachineBasicBlock::iterator Paired,
    unsigned EltSize) {
    MachineBasicBlock *MBB = I->getParent();

    // Be careful, since the addresses could be subregisters themselves in weird
    // cases, like vectors of pointers.
    const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
    const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);

    unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
    unsigned DestReg1
        = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg();

    unsigned Offset0
        = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
    unsigned Offset1
        = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;

    unsigned NewOffset0 = Offset0 / EltSize;
    unsigned NewOffset1 = Offset1 / EltSize;
    unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;

    // Prefer the st64 form if we can use it, even if we can fit the offset in the
    // non st64 version. I'm not sure if there's any real reason to do this.
    bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
    if (UseST64) {
        NewOffset0 /= 64;
        NewOffset1 /= 64;
        Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
    }

    assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
           (NewOffset0 != NewOffset1) &&
           "Computed offset doesn't fit");

    const MCInstrDesc &Read2Desc = TII->get(Opc);

    const TargetRegisterClass *SuperRC
        = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
    unsigned DestReg = MRI->createVirtualRegister(SuperRC);

    DebugLoc DL = I->getDebugLoc();
    MachineInstrBuilder Read2
        = BuildMI(*MBB, I, DL, Read2Desc, DestReg)
          .addOperand(*AddrReg) // addr
          .addImm(NewOffset0) // offset0
          .addImm(NewOffset1) // offset1
          .addImm(0) // gds
          .addOperand(*M0Reg) // M0
          .addMemOperand(*I->memoperands_begin())
          .addMemOperand(*Paired->memoperands_begin());

    LIS->InsertMachineInstrInMaps(Read2);

    unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
    unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
    updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
    updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);

    LIS->RemoveMachineInstrFromMaps(I);
    LIS->RemoveMachineInstrFromMaps(Paired);
    I->eraseFromParent();
    Paired->eraseFromParent();

    LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
    LIS->shrinkToUses(&AddrRegLI);

    LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
    LIS->shrinkToUses(&M0RegLI);

    // Currently m0 is treated as a register class with one member instead of an
    // implicit physical register. We are using the virtual register for the first
    // one, but we still need to update the live range of the now unused second m0
    // virtual register to avoid verifier errors.
    const MachineOperand *PairedM0Reg
        = TII->getNamedOperand(*Paired, AMDGPU::OpName::m0);
    LiveInterval &PairedM0RegLI = LIS->getInterval(PairedM0Reg->getReg());
    LIS->shrinkToUses(&PairedM0RegLI);

    LIS->getInterval(DestReg); // Create new LI

    DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
    return Read2.getInstr();
}