Ejemplo n.º 1
0
/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
/// implicit defs to a machine instruction if there was an earlier def of its
/// super-register.
void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
  MachineInstr *LastDef = PhysRegDef[Reg];
  // If there was a previous use or a "full" def all is well.
  if (!LastDef && !PhysRegUse[Reg]) {
    // Otherwise, the last sub-register def implicitly defines this register.
    // e.g.
    // AH =
    // AL = ... <imp-def EAX>, <imp-kill AH>
    //    = AH
    // ...
    //    = EAX
    // All of the sub-registers must have been defined before the use of Reg!
    SmallSet<unsigned, 4> PartDefRegs;
    MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
    // If LastPartialDef is NULL, it must be using a livein register.
    if (LastPartialDef) {
      LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
                                                           true/*IsImp*/));
      PhysRegDef[Reg] = LastPartialDef;
      SmallSet<unsigned, 8> Processed;
      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
           unsigned SubReg = *SubRegs; ++SubRegs) {
        if (Processed.count(SubReg))
          continue;
        if (PartDefRegs.count(SubReg))
          continue;
        // This part of Reg was defined before the last partial def. It's killed
        // here.
        LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
                                                             false/*IsDef*/,
                                                             true/*IsImp*/));
        PhysRegDef[SubReg] = LastPartialDef;
        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
          Processed.insert(*SS);
      }
    }
  }
  else if (LastDef && !PhysRegUse[Reg] &&
           !LastDef->findRegisterDefOperand(Reg))
    // Last def defines the super register, add an implicit def of reg.
    LastDef->addOperand(MachineOperand::CreateReg(Reg,
                                                 true/*IsDef*/, true/*IsImp*/));

  // Remember this use.
  PhysRegUse[Reg]  = MI;
  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
       unsigned SubReg = *SubRegs; ++SubRegs)
    PhysRegUse[SubReg] =  MI;
}
Ejemplo n.º 2
0
// Get the subreg type that is most likely to be coalesced
// for an SPR register that will be used in VDUP32d pseudo.
unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
  if (!TRI->isVirtualRegister(SReg))
    return getDPRLaneFromSPR(SReg);

  MachineInstr *MI = MRI->getVRegDef(SReg);
  if (!MI) return ARM::ssub_0;
  MachineOperand *MO = MI->findRegisterDefOperand(SReg);

  assert(MO->isReg() && "Non-register operand found!");
  if (!MO) return ARM::ssub_0;

  if (MI->isCopy() && usesRegClass(MI->getOperand(1),
                                    &ARM::SPRRegClass)) {
    SReg = MI->getOperand(1).getReg();
  }

  if (TargetRegisterInfo::isVirtualRegister(SReg)) {
    if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
    return ARM::ssub_0;
  }
  return getDPRLaneFromSPR(SReg);
}
Ejemplo n.º 3
0
bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
  MachineInstr *LastDef = PhysRegDef[Reg];
  MachineInstr *LastUse = PhysRegUse[Reg];
  if (!LastDef && !LastUse)
    return false;

  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
  unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
  // The whole register is used.
  // AL =
  // AH =
  //
  //    = AX
  //    = AL, AX<imp-use, kill>
  // AX =
  //
  // Or whole register is defined, but not used at all.
  // AX<dead> =
  // ...
  // AX =
  //
  // Or whole register is defined, but only partly used.
  // AX<dead> = AL<imp-def>
  //    = AL<kill>
  // AX =
  MachineInstr *LastPartDef = nullptr;
  unsigned LastPartDefDist = 0;
  SmallSet<unsigned, 8> PartUses;
  for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
    unsigned SubReg = *SubRegs;
    MachineInstr *Def = PhysRegDef[SubReg];
    if (Def && Def != LastDef) {
      // There was a def of this sub-register in between. This is a partial
      // def, keep track of the last one.
      unsigned Dist = DistanceMap[Def];
      if (Dist > LastPartDefDist) {
        LastPartDefDist = Dist;
        LastPartDef = Def;
      }
      continue;
    }
    if (MachineInstr *Use = PhysRegUse[SubReg]) {
      for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid();
           ++SS)
        PartUses.insert(*SS);
      unsigned Dist = DistanceMap[Use];
      if (Dist > LastRefOrPartRefDist) {
        LastRefOrPartRefDist = Dist;
        LastRefOrPartRef = Use;
      }
    }
  }

  if (!PhysRegUse[Reg]) {
    // Partial uses. Mark register def dead and add implicit def of
    // sub-registers which are used.
    // EAX<dead>  = op  AL<imp-def>
    // That is, EAX def is dead but AL def extends pass it.
    PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
    for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
      unsigned SubReg = *SubRegs;
      if (!PartUses.count(SubReg))
        continue;
      bool NeedDef = true;
      if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
        MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
        if (MO) {
          NeedDef = false;
          assert(!MO->isDead());
        }
      }
      if (NeedDef)
        PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
                                                 true/*IsDef*/, true/*IsImp*/));
      MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
      if (LastSubRef)
        LastSubRef->addRegisterKilled(SubReg, TRI, true);
      else {
        LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
        for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true);
             SS.isValid(); ++SS)
          PhysRegUse[*SS] = LastRefOrPartRef;
      }
      for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
        PartUses.erase(*SS);
    }
  } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
    if (LastPartDef)
      // The last partial def kills the register.
      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
                                                true/*IsImp*/, true/*IsKill*/));
    else {
      MachineOperand *MO =
        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
      // If the last reference is the last def, then it's not used at all.
      // That is, unless we are currently processing the last reference itself.
      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
      if (NeedEC) {
        // If we are adding a subreg def and the superreg def is marked early
        // clobber, add an early clobber marker to the subreg def.
        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
        if (MO)
          MO->setIsEarlyClobber();
      }
    }
  } else
    LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
  return true;
}
Ejemplo n.º 4
0
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
    MachineInstr * MI, MachineBasicBlock * BB) const
{
  MachineFunction * MF = BB->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  MachineBasicBlock::iterator I = *MI;

  switch (MI->getOpcode()) {
  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
  case AMDGPU::TGID_X:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
    break;
  case AMDGPU::TGID_Y:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
    break;
  case AMDGPU::TGID_Z:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
    break;
  case AMDGPU::TIDIG_X:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
    break;
  case AMDGPU::TIDIG_Y:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
    break;
  case AMDGPU::TIDIG_Z:
    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
    break;
  case AMDGPU::NGROUPS_X:
    lowerImplicitParameter(MI, *BB, MRI, 0);
    break;
  case AMDGPU::NGROUPS_Y:
    lowerImplicitParameter(MI, *BB, MRI, 1);
    break;
  case AMDGPU::NGROUPS_Z:
    lowerImplicitParameter(MI, *BB, MRI, 2);
    break;
  case AMDGPU::GLOBAL_SIZE_X:
    lowerImplicitParameter(MI, *BB, MRI, 3);
    break;
  case AMDGPU::GLOBAL_SIZE_Y:
    lowerImplicitParameter(MI, *BB, MRI, 4);
    break;
  case AMDGPU::GLOBAL_SIZE_Z:
    lowerImplicitParameter(MI, *BB, MRI, 5);
    break;
  case AMDGPU::LOCAL_SIZE_X:
    lowerImplicitParameter(MI, *BB, MRI, 6);
    break;
  case AMDGPU::LOCAL_SIZE_Y:
    lowerImplicitParameter(MI, *BB, MRI, 7);
    break;
  case AMDGPU::LOCAL_SIZE_Z:
    lowerImplicitParameter(MI, *BB, MRI, 8);
    break;

  case AMDGPU::CLAMP_R600:
    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
           .addOperand(MI->getOperand(0))
           .addOperand(MI->getOperand(1));
    break;

  case AMDGPU::FABS_R600:
    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
           .addOperand(MI->getOperand(0))
           .addOperand(MI->getOperand(1));
    break;

  case AMDGPU::FNEG_R600:
    MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
            .addOperand(MI->getOperand(0))
            .addOperand(MI->getOperand(1));
    break;

  case AMDGPU::R600_LOAD_CONST:
    {
      int64_t RegIndex = MI->getOperand(1).getImm();
      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
                  .addOperand(MI->getOperand(0))
                  .addReg(ConstantReg);
      break;
    }

  case AMDGPU::LOAD_INPUT:
    {
      int64_t RegIndex = MI->getOperand(1).getImm();
      addLiveIn(MI, MF, MRI, TII,
                AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
      break;
    }

  case AMDGPU::MASK_WRITE:
    {
      unsigned maskedRegister = MI->getOperand(0).getReg();
      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
      MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
      def->addTargetFlag(MO_FLAG_MASK);
      // Return early so the instruction is not erased
      return BB;
    }

  case AMDGPU::RAT_WRITE_CACHELESS_eg:
    {
      // Convert to DWORD address
      unsigned NewAddr = MRI.createVirtualRegister(
                                             AMDGPU::R600_TReg32_XRegisterClass);
      unsigned ShiftValue = MRI.createVirtualRegister(
                                              AMDGPU::R600_TReg32RegisterClass);

      // XXX In theory, we should be able to pass ShiftValue directly to
      // the LSHR_eg instruction as an inline literal, but I tried doing it
      // this way and it didn't produce the correct results.
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
              .addReg(AMDGPU::ALU_LITERAL_X)
              .addImm(2);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
              .addOperand(MI->getOperand(1))
              .addReg(ShiftValue);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
              .addOperand(MI->getOperand(0))
              .addReg(NewAddr);
      break;
    }

  case AMDGPU::STORE_OUTPUT:
    {
      int64_t OutputIndex = MI->getOperand(1).getImm();
      unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);

      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
                  .addOperand(MI->getOperand(0));

      if (!MRI.isLiveOut(OutputReg)) {
        MRI.addLiveOut(OutputReg);
      }
      break;
    }

  case AMDGPU::RESERVE_REG:
    {
      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
      int64_t ReservedIndex = MI->getOperand(0).getImm();
      unsigned ReservedReg =
                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
      MFI->ReservedRegs.push_back(ReservedReg);
      break;
    }

  case AMDGPU::TXD:
    {
      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);

      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
              .addOperand(MI->getOperand(3))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5));
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
              .addOperand(MI->getOperand(2))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5));
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
              .addOperand(MI->getOperand(0))
              .addOperand(MI->getOperand(1))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5))
              .addReg(t0, RegState::Implicit)
              .addReg(t1, RegState::Implicit);
      break;
    }
  case AMDGPU::TXD_SHADOW:
    {
      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);

      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
              .addOperand(MI->getOperand(3))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5));
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
              .addOperand(MI->getOperand(2))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5));
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
              .addOperand(MI->getOperand(0))
              .addOperand(MI->getOperand(1))
              .addOperand(MI->getOperand(4))
              .addOperand(MI->getOperand(5))
              .addReg(t0, RegState::Implicit)
              .addReg(t1, RegState::Implicit);
      break;
    }


  }

  MI->eraseFromParent();
  return BB;
}
Ejemplo n.º 5
0
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
  bool Modified = false;

  // Yes, CPSR could be livein.
  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
  MachineInstr *BundleMI = 0;

  CPSRDef = 0;
  HighLatencyCPSR = false;

  // Check predecessors for the latest CPSRDef.
  for (MachineBasicBlock::pred_iterator
       I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
    const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
    if (!PInfo.Visited) {
      // Since blocks are visited in RPO, this must be a back-edge.
      continue;
    }
    if (PInfo.HighLatencyCPSR) {
      HighLatencyCPSR = true;
      break;
    }
  }

  // If this BB loops back to itself, conservatively avoid narrowing the
  // first instruction that does partial flag update.
  bool IsSelfLoop = MBB.isSuccessor(&MBB);
  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
  MachineBasicBlock::instr_iterator NextMII;
  for (; MII != E; MII = NextMII) {
    NextMII = llvm::next(MII);

    MachineInstr *MI = &*MII;
    if (MI->isBundle()) {
      BundleMI = MI;
      continue;
    }
    if (MI->isDebugValue())
      continue;

    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);

    // Does NextMII belong to the same bundle as MI?
    bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();

    if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
      Modified = true;
      MachineBasicBlock::instr_iterator I = prior(NextMII);
      MI = &*I;
      // Removing and reinserting the first instruction in a bundle will break
      // up the bundle. Fix the bundling if it was broken.
      if (NextInSameBundle && !NextMII->isBundledWithPred())
        NextMII->bundleWithPred();
    }

    if (!NextInSameBundle && MI->isInsideBundle()) {
      // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
      // marker is only on the BUNDLE instruction. Process the BUNDLE
      // instruction as we finish with the bundled instruction to work around
      // the inconsistency.
      if (BundleMI->killsRegister(ARM::CPSR))
        LiveCPSR = false;
      MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
      if (MO && !MO->isDead())
        LiveCPSR = true;
    }

    bool DefCPSR = false;
    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
    if (MI->isCall()) {
      // Calls don't really set CPSR.
      CPSRDef = 0;
      HighLatencyCPSR = false;
      IsSelfLoop = false;
    } else if (DefCPSR) {
      // This is the last CPSR defining instruction.
      CPSRDef = MI;
      HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
      IsSelfLoop = false;
    }
  }

  MBBInfo &Info = BlockInfo[MBB.getNumber()];
  Info.HighLatencyCPSR = HighLatencyCPSR;
  Info.Visited = true;
  return Modified;
}
Ejemplo n.º 6
0
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
  bool Modified = false;

  // Yes, CPSR could be livein.
  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
  MachineInstr *CPSRDef = 0;
  MachineInstr *BundleMI = 0;

  // If this BB loops back to itself, conservatively avoid narrowing the
  // first instruction that does partial flag update.
  bool IsSelfLoop = MBB.isSuccessor(&MBB);
  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end();
  MachineBasicBlock::instr_iterator NextMII;
  for (; MII != E; MII = NextMII) {
    NextMII = llvm::next(MII);

    MachineInstr *MI = &*MII;
    if (MI->isBundle()) {
      BundleMI = MI;
      continue;
    }

    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);

    unsigned Opcode = MI->getOpcode();
    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
    if (OPI != ReduceOpcodeMap.end()) {
      const ReduceEntry &Entry = ReduceTable[OPI->second];
      // Ignore "special" cases for now.
      if (Entry.Special) {
        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
          Modified = true;
          MachineBasicBlock::instr_iterator I = prior(NextMII);
          MI = &*I;
        }
        goto ProcessNext;
      }

      // Try to transform to a 16-bit two-address instruction.
      if (Entry.NarrowOpc2 &&
          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
        Modified = true;
        MachineBasicBlock::instr_iterator I = prior(NextMII);
        MI = &*I;
        goto ProcessNext;
      }

      // Try to transform to a 16-bit non-two-address instruction.
      if (Entry.NarrowOpc1 &&
          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
        Modified = true;
        MachineBasicBlock::instr_iterator I = prior(NextMII);
        MI = &*I;
      }
    }

  ProcessNext:
    if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
      // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
      // marker is only on the BUNDLE instruction. Process the BUNDLE
      // instruction as we finish with the bundled instruction to work around
      // the inconsistency.
      if (BundleMI->killsRegister(ARM::CPSR))
        LiveCPSR = false;
      MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
      if (MO && !MO->isDead())
        LiveCPSR = true;
    }

    bool DefCPSR = false;
    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
    if (MI->isCall()) {
      // Calls don't really set CPSR.
      CPSRDef = 0;
      IsSelfLoop = false;
    } else if (DefCPSR) {
      // This is the last CPSR defining instruction.
      CPSRDef = MI;
      IsSelfLoop = false;
    }
  }

  return Modified;
}