Ejemplo n.º 1
0
bool HexagonGenExtract::convert(Instruction *In) {
  using namespace PatternMatch;

  Value *BF = nullptr;
  ConstantInt *CSL = nullptr, *CSR = nullptr, *CM = nullptr;
  BasicBlock *BB = In->getParent();
  LLVMContext &Ctx = BB->getContext();
  bool LogicalSR;

  // (and (shl (lshr x, #sr), #sl), #m)
  LogicalSR = true;
  bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                               m_ConstantInt(CSL)),
                         m_ConstantInt(CM)));

  if (!Match) {
    // (and (shl (ashr x, #sr), #sl), #m)
    LogicalSR = false;
    Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)),
                      m_ConstantInt(CM)));
  }
  if (!Match) {
    // (and (shl x, #sl), #m)
    LogicalSR = true;
    CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
                      m_ConstantInt(CM)));
    if (Match && NoSR0)
      return false;
  }
  if (!Match) {
    // (and (lshr x, #sr), #m)
    LogicalSR = true;
    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CM)));
  }
  if (!Match) {
    // (and (ashr x, #sr), #m)
    LogicalSR = false;
    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CM)));
  }
  if (!Match) {
    CM = nullptr;
    // (shl (lshr x, #sr), #sl)
    LogicalSR = true;
    Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)));
  }
  if (!Match) {
    CM = nullptr;
    // (shl (ashr x, #sr), #sl)
    LogicalSR = false;
    Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)));
  }
  if (!Match)
    return false;

  Type *Ty = BF->getType();
  if (!Ty->isIntegerTy())
    return false;
  unsigned BW = Ty->getPrimitiveSizeInBits();
  if (BW != 32 && BW != 64)
    return false;

  uint32_t SR = CSR->getZExtValue();
  uint32_t SL = CSL->getZExtValue();

  if (!CM) {
    // If there was no and, and the shift left did not remove all potential
    // sign bits created by the shift right, then extractu cannot reproduce
    // this value.
    if (!LogicalSR && (SR > SL))
      return false;
    APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
    CM = ConstantInt::get(Ctx, A);
  }

  // CM is the shifted-left mask. Shift it back right to remove the zero
  // bits on least-significant positions.
  APInt M = CM->getValue().lshr(SL);
  uint32_t T = M.countTrailingOnes();

  // During the shifts some of the bits will be lost. Calculate how many
  // of the original value will remain after shift right and then left.
  uint32_t U = BW - std::max(SL, SR);
  // The width of the extracted field is the minimum of the original bits
  // that remain after the shifts and the number of contiguous 1s in the mask.
  uint32_t W = std::min(U, T);
  if (W == 0)
    return false;

  // Check if the extracted bits are contained within the mask that it is
  // and-ed with. The extract operation will copy these bits, and so the
  // mask cannot any holes in it that would clear any of the bits of the
  // extracted field.
  if (!LogicalSR) {
    // If the shift right was arithmetic, it could have included some 1 bits.
    // It is still ok to generate extract, but only if the mask eliminates
    // those bits (i.e. M does not have any bits set beyond U).
    APInt C = APInt::getHighBitsSet(BW, BW-U);
    if (M.intersects(C) || !M.isMask(W))
      return false;
  } else {
    // Check if M starts with a contiguous sequence of W times 1 bits. Get
    // the low U bits of M (which eliminates the 0 bits shifted in on the
    // left), and check if the result is APInt's "mask":
    if (!M.getLoBits(U).isMask(W))
      return false;
  }

  IRBuilder<> IRB(In);
  Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
                                   : Intrinsic::hexagon_S2_extractup;
  Module *Mod = BB->getParent()->getParent();
  Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
  Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
  if (SL != 0)
    NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
  In->replaceAllUsesWith(NewIn);
  return true;
}
Ejemplo n.º 2
0
static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
                        unsigned UseOpIdx,
                        std::vector<FoldCandidate> &FoldList,
                        SmallVectorImpl<MachineInstr *> &CopiesToReplace,
                        const SIInstrInfo *TII, const SIRegisterInfo &TRI,
                        MachineRegisterInfo &MRI) {
  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);

  // FIXME: Fold operands with subregs.
  if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
      UseOp.isImplicit())) {
    return;
  }

  bool FoldingImm = OpToFold.isImm();
  APInt Imm;

  if (FoldingImm) {
    unsigned UseReg = UseOp.getReg();
    const TargetRegisterClass *UseRC
      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
      MRI.getRegClass(UseReg) :
      TRI.getPhysRegClass(UseReg);

    Imm = APInt(64, OpToFold.getImm());

    const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
    const TargetRegisterClass *FoldRC =
        TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);

    // Split 64-bit constants into 32-bits for folding.
    if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
      if (UseRC->getSize() != 8)
        return;

      if (UseOp.getSubReg() == AMDGPU::sub0) {
        Imm = Imm.getLoBits(32);
      } else {
        assert(UseOp.getSubReg() == AMDGPU::sub1);
        Imm = Imm.getHiBits(32);
      }
    }

    // In order to fold immediates into copies, we need to change the
    // copy to a MOV.
    if (UseMI->getOpcode() == AMDGPU::COPY) {
      unsigned DestReg = UseMI->getOperand(0).getReg();
      const TargetRegisterClass *DestRC
        = TargetRegisterInfo::isVirtualRegister(DestReg) ?
        MRI.getRegClass(DestReg) :
        TRI.getPhysRegClass(DestReg);

      unsigned MovOp = TII->getMovOpcode(DestRC);
      if (MovOp == AMDGPU::COPY)
        return;

      UseMI->setDesc(TII->get(MovOp));
      CopiesToReplace.push_back(UseMI);
    }
  }

  // Special case for REG_SEQUENCE: We can't fold literals into
  // REG_SEQUENCE instructions, so we have to fold them into the
  // uses of REG_SEQUENCE.
  if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();

    for (MachineRegisterInfo::use_iterator
         RSUse = MRI.use_begin(RegSeqDstReg),
         RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {

      MachineInstr *RSUseMI = RSUse->getParent();
      if (RSUse->getSubReg() != RegSeqDstSubReg)
        continue;

      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
                  CopiesToReplace, TII, TRI, MRI);
    }
    return;
  }

  const MCInstrDesc &UseDesc = UseMI->getDesc();

  // Don't fold into target independent nodes.  Target independent opcodes
  // don't have defined register classes.
  if (UseDesc.isVariadic() ||
      UseDesc.OpInfo[UseOpIdx].RegClass == -1)
    return;

  if (FoldingImm) {
    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
    return;
  }

  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);

  // FIXME: We could try to change the instruction from 64-bit to 32-bit
  // to enable more folding opportunites.  The shrink operands pass
  // already does this.
  return;
}
Ejemplo n.º 3
0
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      if (!isSafeToFold(MI.getOpcode()))
        continue;

      unsigned OpSize = TII->getOpSize(MI, 1);
      MachineOperand &OpToFold = MI.getOperand(1);
      bool FoldingImm = OpToFold.isImm();

      // FIXME: We could also be folding things like FrameIndexes and
      // TargetIndexes.
      if (!FoldingImm && !OpToFold.isReg())
        continue;

      // Folding immediates with more than one use will increase program size.
      // FIXME: This will also reduce register usage, which may be better
      // in some cases.  A better heuristic is needed.
      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
          !MRI.hasOneUse(MI.getOperand(0).getReg()))
        continue;

      // FIXME: Fold operands with subregs.
      if (OpToFold.isReg() &&
          (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
           OpToFold.getSubReg()))
        continue;

      std::vector<FoldCandidate> FoldList;
      for (MachineRegisterInfo::use_iterator
           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
           Use != E; ++Use) {

        MachineInstr *UseMI = Use->getParent();
        const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());

        // FIXME: Fold operands with subregs.
        if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
            UseOp.isImplicit())) {
          continue;
        }

        APInt Imm;

        if (FoldingImm) {
          unsigned UseReg = UseOp.getReg();
          const TargetRegisterClass *UseRC
            = TargetRegisterInfo::isVirtualRegister(UseReg) ?
            MRI.getRegClass(UseReg) :
            TRI.getRegClass(UseReg);

          Imm = APInt(64, OpToFold.getImm());

          // Split 64-bit constants into 32-bits for folding.
          if (UseOp.getSubReg()) {
            if (UseRC->getSize() != 8)
              continue;

            if (UseOp.getSubReg() == AMDGPU::sub0) {
              Imm = Imm.getLoBits(32);
            } else {
              assert(UseOp.getSubReg() == AMDGPU::sub1);
              Imm = Imm.getHiBits(32);
            }
          }

          // In order to fold immediates into copies, we need to change the
          // copy to a MOV.
          if (UseMI->getOpcode() == AMDGPU::COPY) {
            unsigned DestReg = UseMI->getOperand(0).getReg();
            const TargetRegisterClass *DestRC
              = TargetRegisterInfo::isVirtualRegister(DestReg) ?
              MRI.getRegClass(DestReg) :
              TRI.getRegClass(DestReg);

            unsigned MovOp = TII->getMovOpcode(DestRC);
            if (MovOp == AMDGPU::COPY)
              continue;

            UseMI->setDesc(TII->get(MovOp));
          }
        }

        const MCInstrDesc &UseDesc = UseMI->getDesc();

        // Don't fold into target independent nodes.  Target independent opcodes
        // don't have defined register classes.
        if (UseDesc.isVariadic() ||
            UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
          continue;

        if (FoldingImm) {
          MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
          tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
          continue;
        }

        tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);

        // FIXME: We could try to change the instruction from 64-bit to 32-bit
        // to enable more folding opportunites.  The shrink operands pass
        // already does this.
      }

      for (FoldCandidate &Fold : FoldList) {
        if (updateOperand(Fold, TRI)) {
          // Clear kill flags.
          if (!Fold.isImm()) {
            assert(Fold.OpToFold && Fold.OpToFold->isReg());
            Fold.OpToFold->setIsKill(false);
          }
          DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
        }
      }
    }
  }
  return false;
}