static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
                                      MachineBasicBlock::iterator B,
                                      AliasAnalysis *AA) {
  // RAW or WAR - cannot reorder
  // WAW - cannot reorder
  // RAR - safe to reorder
  return !(A->mayStore() || B->mayStore()) || !A->mayAlias(AA, *B, true);
}
static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
                                      MachineBasicBlock::iterator B,
                                      const SIInstrInfo *TII,
                                      AliasAnalysis *AA) {
  // RAW or WAR - cannot reorder
  // WAW - cannot reorder
  // RAR - safe to reorder
  return !(A->mayStore() || B->mayStore()) ||
         TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
}
bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
                            bool &sawLoad,
                            bool &sawStore,
                            SmallSet<unsigned, 32> &RegDefs,
                            SmallSet<unsigned, 32> &RegUses)
{

  if (candidate->isImplicitDef() || candidate->isKill())
    return true;

  if (candidate->mayLoad()) {
    sawLoad = true;
    if (sawStore)
      return true;
  }

  if (candidate->mayStore()) {
    if (sawStore)
      return true;
    sawStore = true;
    if (sawLoad)
      return true;
  }

  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
    const MachineOperand &MO = candidate->getOperand(i);
    if (!MO.isReg())
      continue; // skip

    unsigned Reg = MO.getReg();

    if (MO.isDef()) {
      // check whether Reg is defined or used before delay slot.
      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
        return true;
    }
    if (MO.isUse()) {
      // check whether Reg is defined before delay slot.
      if (IsRegInSet(RegDefs, Reg))
        return true;
    }
  }

  unsigned Opcode = candidate->getOpcode();
  // LD and LDD may have NOPs inserted afterwards in the case of some LEON
  // processors, so we can't use the delay slot if this feature is switched-on.
  if (Subtarget->insertNOPLoad()
      &&
      Opcode >=  SP::LDDArr && Opcode <= SP::LDrr)
    return true;

  // Same as above for FDIV and FSQRT on some LEON processors.
  if (Subtarget->fixAllFDIVSQRT()
      &&
      Opcode >=  SP::FDIVD && Opcode <= SP::FSQRTD)
    return true;


  return false;
}
Exemple #4
0
// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
// need to handle the case where an SGPR may need to be spilled while spilling.
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
                                      MachineFrameInfo &MFI,
                                      MachineBasicBlock::iterator MI,
                                      int Index,
                                      int64_t Offset) {
  MachineBasicBlock *MBB = MI->getParent();
  const DebugLoc &DL = MI->getDebugLoc();
  bool IsStore = MI->mayStore();

  unsigned Opc = MI->getOpcode();
  int LoadStoreOp = IsStore ?
    getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
  if (LoadStoreOp == -1)
    return false;

  unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();

  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
    .addReg(Reg, getDefRegState(!IsStore))
    .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
    .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
    .addImm(Offset)
    .addImm(0) // glc
    .addImm(0) // slc
    .addImm(0) // tfe
    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
  return true;
}
Exemple #5
0
bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
                                    MachineBasicBlock::iterator &MI) {
  assert(!MI->mayLoad() && MI->mayStore());

  bool Changed = false;

  if (MOI.isAtomic()) {
    if (MOI.getOrdering() == AtomicOrdering::Release ||
        MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
      Changed |= CC->insertWait(MI, MOI.getScope(),
                                MOI.getOrderingAddrSpace(),
                                SIMemOp::LOAD | SIMemOp::STORE,
                                MOI.getIsCrossAddressSpaceOrdering(),
                                Position::BEFORE);

    return Changed;
  }

  // Atomic instructions do not have the nontemporal attribute.
  if (MOI.isNonTemporal()) {
    Changed |= CC->enableNonTemporal(MI);
    return Changed;
  }

  return Changed;
}
Exemple #6
0
bool SIGfx6CacheControl::enableLoadCacheBypass(
    const MachineBasicBlock::iterator &MI,
    SIAtomicScope Scope,
    SIAtomicAddrSpace AddrSpace) const {
  assert(MI->mayLoad() && !MI->mayStore());
  bool Changed = false;

  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
    /// TODO: Do not set glc for rmw atomic operations as they
    /// implicitly bypass the L1 cache.

    switch (Scope) {
    case SIAtomicScope::SYSTEM:
    case SIAtomicScope::AGENT:
      Changed |= enableGLCBit(MI);
      break;
    case SIAtomicScope::WORKGROUP:
    case SIAtomicScope::WAVEFRONT:
    case SIAtomicScope::SINGLETHREAD:
      // No cache to bypass.
      break;
    default:
      llvm_unreachable("Unsupported synchronization scope");
    }
  }

  /// The scratch address space does not need the global memory caches
  /// to be bypassed as all memory operations by the same thread are
  /// sequentially consistent, and no other thread can access scratch
  /// memory.

  /// Other address spaces do not hava a cache.

  return Changed;
}
X86CallFrameOptimization::InstClassification
X86CallFrameOptimization::classifyInstruction(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) {
  if (MI == MBB.end())
    return Exit;

  // The instructions we actually care about are movs onto the stack
  int Opcode = MI->getOpcode();
  if (Opcode == X86::MOV32mi   || Opcode == X86::MOV32mr ||
      Opcode == X86::MOV64mi32 || Opcode == X86::MOV64mr)
    return Convert;

  // Not all calling conventions have only stack MOVs between the stack
  // adjust and the call.

  // We want to tolerate other instructions, to cover more cases.
  // In particular:
  // a) PCrel calls, where we expect an additional COPY of the basereg.
  // b) Passing frame-index addresses.
  // c) Calling conventions that have inreg parameters. These generate
  //    both copies and movs into registers.
  // To avoid creating lots of special cases, allow any instruction
  // that does not write into memory, does not def or use the stack
  // pointer, and does not def any register that was used by a preceding
  // push.
  // (Reading from memory is allowed, even if referenced through a
  // frame index, since these will get adjusted properly in PEI)

  // The reason for the last condition is that the pushes can't replace
  // the movs in place, because the order must be reversed.
  // So if we have a MOV32mr that uses EDX, then an instruction that defs
  // EDX, and then the call, after the transformation the push will use
  // the modified version of EDX, and not the original one.
  // Since we are still in SSA form at this point, we only need to
  // make sure we don't clobber any *physical* registers that were
  // used by an earlier mov that will become a push.

  if (MI->isCall() || MI->mayStore())
    return Exit;

  for (const MachineOperand &MO : MI->operands()) {
    if (!MO.isReg())
      continue;
    unsigned int Reg = MO.getReg();
    if (!RegInfo.isPhysicalRegister(Reg))
      continue;
    if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
      return Exit;
    if (MO.isDef()) {
      for (unsigned int U : UsedRegs)
        if (RegInfo.regsOverlap(Reg, U))
          return Exit;
    }
  }

  return Skip;
}
Exemple #8
0
bool SIGfx6CacheControl::enableNonTemporal(
    const MachineBasicBlock::iterator &MI) const {
  assert(MI->mayLoad() ^ MI->mayStore());
  bool Changed = false;

  /// TODO: Do not enableGLCBit if rmw atomic.
  Changed |= enableGLCBit(MI);
  Changed |= enableSLCBit(MI);

  return Changed;
}
Exemple #9
0
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
    const MachineBasicBlock::iterator &MI) const {
  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);

  if (!(MI->mayLoad() && MI->mayStore()))
    return None;

  // Be conservative if there are no memory operands.
  if (MI->getNumMemOperands() == 0)
    return SIMemOpInfo();

  return constructFromMIWithMMO(MI);
}
Exemple #10
0
bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
                            bool &sawLoad,
                            bool &sawStore,
                            SmallSet<unsigned, 32> &RegDefs,
                            SmallSet<unsigned, 32> &RegUses) {
  if (candidate->isImplicitDef() || candidate->isKill())
    return true;

  // Loads or stores cannot be moved past a store to the delay slot
  // and stores cannot be moved past a load. 
  if (candidate->mayLoad()) {
    if (sawStore)
      return true;
    sawLoad = true;
  }

  if (candidate->mayStore()) {
    if (sawStore)
      return true;
    sawStore = true;
    if (sawLoad)
      return true;
  }

  assert((!candidate->isCall() && !candidate->isReturn()) &&
         "Cannot put calls or returns in delay slot.");

  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
    const MachineOperand &MO = candidate->getOperand(i);
    unsigned Reg;

    if (!MO.isReg() || !(Reg = MO.getReg()))
      continue; // skip

    if (MO.isDef()) {
      // check whether Reg is defined or used before delay slot.
      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
        return true;
    }
    if (MO.isUse()) {
      // check whether Reg is defined before delay slot.
      if (IsRegInSet(RegDefs, Reg))
        return true;
    }
  }
  return false;
}
Exemple #11
0
bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
                            bool &sawLoad,
                            bool &sawStore,
                            SmallSet<unsigned, 32> &RegDefs,
                            SmallSet<unsigned, 32> &RegUses)
{

  if (candidate->isImplicitDef() || candidate->isKill())
    return true;

  if (candidate->mayLoad()) {
    sawLoad = true;
    if (sawStore)
      return true;
  }

  if (candidate->mayStore()) {
    if (sawStore)
      return true;
    sawStore = true;
    if (sawLoad)
      return true;
  }

  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
    const MachineOperand &MO = candidate->getOperand(i);
    if (!MO.isReg())
      continue; // skip

    unsigned Reg = MO.getReg();

    if (MO.isDef()) {
      //check whether Reg is defined or used before delay slot.
      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
        return true;
    }
    if (MO.isUse()) {
      //check whether Reg is defined before delay slot.
      if (IsRegInSet(RegDefs, Reg))
        return true;
    }
  }
  return false;
}
Exemple #12
0
// These are the common checks that need to performed
// to determine if
// 1. compare instruction can be moved before jump.
// 2. feeder to the compare instruction can be moved before jump.
static bool commonChecksToProhibitNewValueJump(bool afterRA,
                          MachineBasicBlock::iterator MII) {

  // If store in path, bail out.
  if (MII->mayStore())
    return false;

  // if call in path, bail out.
  if (MII->isCall())
    return false;

  // if NVJ is running prior to RA, do the following checks.
  if (!afterRA) {
    // The following Target Opcode instructions are spurious
    // to new value jump. If they are in the path, bail out.
    // KILL sets kill flag on the opcode. It also sets up a
    // single register, out of pair.
    //    %D0<def> = S2_lsr_r_p %D0<kill>, %R2<kill>
    //    %R0<def> = KILL %R0, %D0<imp-use,kill>
    //    %P0<def> = C2_cmpeqi %R0<kill>, 0
    // PHI can be anything after RA.
    // COPY can remateriaze things in between feeder, compare and nvj.
    if (MII->getOpcode() == TargetOpcode::KILL ||
        MII->getOpcode() == TargetOpcode::PHI  ||
        MII->getOpcode() == TargetOpcode::COPY)
      return false;

    // The following pseudo Hexagon instructions sets "use" and "def"
    // of registers by individual passes in the backend. At this time,
    // we don't know the scope of usage and definitions of these
    // instructions.
    if (MII->getOpcode() == Hexagon::LDriw_pred ||
        MII->getOpcode() == Hexagon::STriw_pred)
      return false;
  }

  return true;
}
Exemple #13
0
bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
  MachineBasicBlock::iterator &MI) {
  assert(MI->mayLoad() && MI->mayStore());

  bool Changed = false;

  if (MOI.isAtomic()) {
    if (MOI.getOrdering() == AtomicOrdering::Release ||
        MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
        MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
        MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
      Changed |= CC->insertWait(MI, MOI.getScope(),
                                MOI.getOrderingAddrSpace(),
                                SIMemOp::LOAD | SIMemOp::STORE,
                                MOI.getIsCrossAddressSpaceOrdering(),
                                Position::BEFORE);

    if (MOI.getOrdering() == AtomicOrdering::Acquire ||
        MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
        MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
        MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
        MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
      Changed |= CC->insertWait(MI, MOI.getScope(),
                                MOI.getOrderingAddrSpace(),
                                isAtomicRet(*MI) ? SIMemOp::LOAD :
                                                   SIMemOp::STORE,
                                MOI.getIsCrossAddressSpaceOrdering(),
                                Position::AFTER);
      Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
                                           MOI.getOrderingAddrSpace(),
                                           Position::AFTER);
    }

    return Changed;
  }

  return Changed;
}
Exemple #14
0
void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
                                         unsigned LoadStoreOp,
                                         const MachineOperand *SrcDst,
                                         unsigned ScratchRsrcReg,
                                         unsigned ScratchOffset,
                                         int64_t Offset,
                                         RegScavenger *RS) const {
  unsigned Value = SrcDst->getReg();
  bool IsKill = SrcDst->isKill();
  MachineBasicBlock *MBB = MI->getParent();
  MachineFunction *MF = MI->getParent()->getParent();
  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();

  DebugLoc DL = MI->getDebugLoc();
  bool IsStore = MI->mayStore();

  bool RanOutOfSGPRs = false;
  bool Scavenged = false;
  unsigned SOffset = ScratchOffset;
  unsigned OriginalImmOffset = Offset;

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned Size = NumSubRegs * 4;

  if (!isUInt<12>(Offset + Size)) {
    SOffset = AMDGPU::NoRegister;

    // We don't have access to the register scavenger if this function is called
    // during  PEI::scavengeFrameVirtualRegs().
    if (RS)
      SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);

    if (SOffset == AMDGPU::NoRegister) {
      // There are no free SGPRs, and since we are in the process of spilling
      // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
      // on SI/CI and on VI it is true until we implement spilling using scalar
      // stores), we have no way to free up an SGPR.  Our solution here is to
      // add the offset directly to the ScratchOffset register, and then
      // subtract the offset after the spill to return ScratchOffset to it's
      // original value.
      RanOutOfSGPRs = true;
      SOffset = ScratchOffset;
    } else {
      Scavenged = true;
    }
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
            .addReg(ScratchOffset)
            .addImm(Offset);
    Offset = 0;
  }

  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
    unsigned SubReg = NumSubRegs == 1 ?
      Value : getSubReg(Value, getSubRegFromChannel(i));

    unsigned SOffsetRegState = 0;
    unsigned SrcDstRegState = getDefRegState(!IsStore);
    if (i + 1 == e) {
      SOffsetRegState |= getKillRegState(Scavenged);
      // The last implicit use carries the "Kill" flag.
      SrcDstRegState |= getKillRegState(IsKill);
    }

    BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
      .addReg(SubReg, getDefRegState(!IsStore))
      .addReg(ScratchRsrcReg)
      .addReg(SOffset, SOffsetRegState)
      .addImm(Offset)
      .addImm(0) // glc
      .addImm(0) // slc
      .addImm(0) // tfe
      .addReg(Value, RegState::Implicit | SrcDstRegState)
      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
  }
  if (RanOutOfSGPRs) {
    // Subtract the offset we added to the ScratchOffset register.
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffset)
            .addReg(ScratchOffset)
            .addImm(OriginalImmOffset);
  }
}