/// getTripCount - Return a loop-invariant LLVM value indicating the /// number of times the loop will be executed. The trip count can /// be either a register or a constant value. If the trip-count /// cannot be determined, this returns null. /// /// We find the trip count from the phi instruction that defines the /// induction variable. We follow the links to the CMP instruction /// to get the trip count. /// /// Based upon getTripCount in LoopInfo. /// CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, SmallVector<MachineInstr *, 2> &OldInsts) const { MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate a CTR loop if the loop has more than one exit. if (LastMBB == 0) return 0; MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); if (LastI->getOpcode() != PPC::BCC) return 0; // We need to make sure that this compare is defining the condition // register actually used by the terminating branch. unsigned PredReg = LastI->getOperand(1).getReg(); DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); unsigned PredCond = LastI->getOperand(0).getImm(); if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) return 0; // Check that the loop has a induction variable. SmallVector<MachineInstr *, 4> IVars, IOps; getCanonicalInductionVariable(L, IVars, IOps); for (unsigned i = 0; i < IVars.size(); ++i) { MachineInstr *IOp = IOps[i]; MachineInstr *IV_Inst = IVars[i]; // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', // if Imm is 0, get the count from the PHI opnd // if Imm is -M, than M is the count // Otherwise, Imm is the count MachineOperand *IV_Opnd; const MachineOperand *InitialValue; if (!L->contains(IV_Inst->getOperand(2).getMBB())) { InitialValue = &IV_Inst->getOperand(1); IV_Opnd = &IV_Inst->getOperand(3); } else { InitialValue = &IV_Inst->getOperand(3); IV_Opnd = &IV_Inst->getOperand(1); } DEBUG(dbgs() << "Considering:\n"); DEBUG(dbgs() << " induction operation: " << *IOp); DEBUG(dbgs() << " induction variable: " << *IV_Inst); DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); // Look for the cmp instruction to determine if we // can get a useful trip count. The trip count can // be either a register or an immediate. The location // of the value depends upon the type (reg or imm). for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); bool SignedCmp, Int64Cmp; MachineInstr *MI = IV_Opnd->getParent(); if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); OldInsts.push_back(IOp); DEBUG(dbgs() << " compare: " << *MI); const MachineOperand &MO = MI->getOperand(2); assert(MO.isImm() && "IV Cmp Operand should be an immediate"); int64_t ImmVal; if (SignedCmp) ImmVal = (short) MO.getImm(); else ImmVal = MO.getImm(); const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); assert(L->contains(IV_DefInstr->getParent()) && "IV definition should occurs in loop"); int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { int64_t start = DefInstr->getOperand(2).getImm(); MachineInstr *DefInstr2 = MRI->getVRegDef(DefInstr->getOperand(1).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); DEBUG(dbgs() << " initial constant: " << *DefInstr2); start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; int64_t count = ImmVal - start; if ((count % iv_value) != 0) { return 0; } OldInsts.push_back(DefInstr); OldInsts.push_back(DefInstr2); // count/iv_value, the trip count, should be positive here. If it // is negative, that indicates that the counter will wrap. if (Int64Cmp) return new CountValue(count/iv_value); else return new CountValue(uint32_t(count/iv_value)); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } OldInsts.push_back(DefInstr); if (Int64Cmp) return new CountValue(count/iv_value); else return new CountValue(uint32_t(count/iv_value)); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { return new CountValue(InitialValueReg, iv_value > 0); } // FIXME: handle non-zero end value. } // FIXME: handle non-unit increments (we might not want to introduce // division but we can handle some 2^n cases with shifts). } } } return 0; }
std::pair<MachineLegalizer::LegalizeAction, LLT> MachineLegalizer::getAction(MachineInstr &MI) const { return getAction(MI.getOpcode(), MI.getType()); }
bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); // Optimize sequences emitted for control flow lowering. They are originally // emitted as the separate operations because spill code may need to be // inserted for the saved copy of exec. // // x = copy exec // z = s_<op>_b64 x, y // exec = copy z // => // x = s_<op>_saveexec_b64 y // for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB); MachineBasicBlock::reverse_iterator E = MBB.rend(); if (I == E) continue; unsigned CopyToExec = isCopyToExec(*I); if (CopyToExec == AMDGPU::NoRegister) continue; // Scan backwards to find the def. auto CopyToExecInst = &*I; auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec); if (CopyFromExecInst == E) { auto PrepareExecInst = std::next(I); if (PrepareExecInst == E) continue; // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec if (CopyToExecInst->getOperand(1).isKill() && isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) { DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst); PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC); PrepareExecInst->getOperand(0).setIsRenamable(false); DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n'); CopyToExecInst->eraseFromParent(); } continue; } if (isLiveOut(MBB, CopyToExec)) { // The copied register is live out and has a second use in another block. DEBUG(dbgs() << "Exec copy source register is live out\n"); continue; } unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg(); MachineInstr *SaveExecInst = nullptr; SmallVector<MachineInstr *, 4> OtherUseInsts; for (MachineBasicBlock::iterator J = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator(); J != JE; ++J) { if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) { DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n'); // Make sure this is inserted after any VALU ops that may have been // scheduled in between. SaveExecInst = nullptr; break; } bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI); if (J->modifiesRegister(CopyToExec, TRI)) { if (SaveExecInst) { DEBUG(dbgs() << "Multiple instructions modify " << printReg(CopyToExec, TRI) << '\n'); SaveExecInst = nullptr; break; } unsigned SaveExecOp = getSaveExecOp(J->getOpcode()); if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END) break; if (ReadsCopyFromExec) { SaveExecInst = &*J; DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n'); continue; } else { DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n'); break; } } else if (ReadsCopyFromExec && !SaveExecInst) { // Make sure no other instruction is trying to use this copy, before it // will be rewritten by the saveexec, i.e. hasOneUse. There may have // been another use, such as an inserted spill. For example: // // %sgpr0_sgpr1 = COPY %exec // spill %sgpr0_sgpr1 // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1 // DEBUG(dbgs() << "Found second use of save inst candidate: " << *J << '\n'); break; } if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) { assert(SaveExecInst != &*J); OtherUseInsts.push_back(&*J); } } if (!SaveExecInst) continue; DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n'); MachineOperand &Src0 = SaveExecInst->getOperand(1); MachineOperand &Src1 = SaveExecInst->getOperand(2); MachineOperand *OtherOp = nullptr; if (Src0.isReg() && Src0.getReg() == CopyFromExec) { OtherOp = &Src1; } else if (Src1.isReg() && Src1.getReg() == CopyFromExec) { if (!SaveExecInst->isCommutable()) break; OtherOp = &Src0; } else llvm_unreachable("unexpected"); CopyFromExecInst->eraseFromParent(); auto InsPt = SaveExecInst->getIterator(); const DebugLoc &DL = SaveExecInst->getDebugLoc(); BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())), CopyFromExec) .addReg(OtherOp->getReg()); SaveExecInst->eraseFromParent(); CopyToExecInst->eraseFromParent(); for (MachineInstr *OtherInst : OtherUseInsts) { OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC, AMDGPU::NoSubRegister, *TRI, /*ClearIsRenamable=*/true); } } return true; }
/// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( MachineInstr &Root, MachineInstr &Prev, MachineCombinerPattern Pattern, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); // This array encodes the operand index for each parameter because the // operands may be commuted. Each row corresponds to a pattern value, // and each column specifies the index of A, B, X, Y. unsigned OpIdx[4][4] = { { 1, 1, 2, 2 }, { 1, 2, 2, 1 }, { 2, 1, 1, 2 }, { 2, 2, 1, 1 } }; int Row; switch (Pattern) { case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break; case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break; case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break; case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break; default: llvm_unreachable("unexpected MachineCombinerPattern"); } MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]); MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]); MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]); MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); MachineOperand &OpC = Root.getOperand(0); unsigned RegA = OpA.getReg(); unsigned RegB = OpB.getReg(); unsigned RegX = OpX.getReg(); unsigned RegY = OpY.getReg(); unsigned RegC = OpC.getReg(); if (TargetRegisterInfo::isVirtualRegister(RegA)) MRI.constrainRegClass(RegA, RC); if (TargetRegisterInfo::isVirtualRegister(RegB)) MRI.constrainRegClass(RegB, RC); if (TargetRegisterInfo::isVirtualRegister(RegX)) MRI.constrainRegClass(RegX, RC); if (TargetRegisterInfo::isVirtualRegister(RegY)) MRI.constrainRegClass(RegY, RC); if (TargetRegisterInfo::isVirtualRegister(RegC)) MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of // recycling RegB because the MachineCombiner's computation of the critical // path requires a new register definition rather than an existing one. unsigned NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); unsigned Opcode = Root.getOpcode(); bool KillA = OpA.isKill(); bool KillX = OpX.isKill(); bool KillY = OpY.isKill(); // Create new instructions for insertion. MachineInstrBuilder MIB1 = BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) .addReg(RegX, getKillRegState(KillX)) .addReg(RegY, getKillRegState(KillY)); MachineInstrBuilder MIB2 = BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) .addReg(RegA, getKillRegState(KillA)) .addReg(NewVR, getKillRegState(true)); setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); // Record new instructions for insertion and old instructions for deletion. InsInstrs.push_back(MIB1); InsInstrs.push_back(MIB2); DelInstrs.push_back(&Prev); DelInstrs.push_back(&Root); }
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " << MF.getName() << "\n"); #if 0 // for now disable this, if we move NewValueJump before register // allocation we need this information. LiveVariables &LVs = getAnalysis<LiveVariables>(); #endif QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); if (!QRI->Subtarget.hasV4TOps() || DisableNewValueJumps) { return false; } int nvjCount = DbgNVJCount; int nvjGenerated = 0; // Loop through all the bb's of the function for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { MachineBasicBlock* MBB = MBBb; DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); DEBUG(MBB->dump()); DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); bool foundJump = false; bool foundCompare = false; bool invertPredicate = false; unsigned predReg = 0; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; bool MO1IsKill = false; bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; MachineInstr *cmpInstr = NULL, *jmpInstr = NULL; MachineBasicBlock *jmpTarget = NULL; bool afterRA = false; bool isSecondOpReg = false; bool isSecondOpNewified = false; // Traverse the basic block - bottom up for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); MII != E;) { MachineInstr *MI = --MII; if (MI->isDebugValue()) { continue; } if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) break; DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); if (!foundJump && (MI->getOpcode() == Hexagon::JMP_t || MI->getOpcode() == Hexagon::JMP_f || MI->getOpcode() == Hexagon::JMP_tnew_t || MI->getOpcode() == Hexagon::JMP_tnew_nt || MI->getOpcode() == Hexagon::JMP_fnew_t || MI->getOpcode() == Hexagon::JMP_fnew_nt)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; jmpInstr = MI; predReg = MI->getOperand(0).getReg(); afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. // if(!jmpInstr->getOperand(0).isKill()) break; // This predicate register is live out out of BB // this would only work if we can actually use Live // variable analysis on phy regs - but LLVM does not // provide LV analysis on phys regs. //if(LVs.isLiveOut(predReg, *MBB)) break; // Get all the successors of this block - which will always // be 2. Check if the predicate register is live in in those // successor. If yes, we can not delete the predicate - // I am doing this only because LLVM does not provide LiveOut // at the BB level. bool predLive = false; for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SIE = MBB->succ_end(); SI != SIE; ++SI) { MachineBasicBlock* succMBB = *SI; if (succMBB->isLiveIn(predReg)) { predLive = true; } } if (predLive) break; jmpTarget = MI->getOperand(1).getMBB(); foundJump = true; if (MI->getOpcode() == Hexagon::JMP_f || MI->getOpcode() == Hexagon::JMP_fnew_t || MI->getOpcode() == Hexagon::JMP_fnew_nt) { invertPredicate = true; } continue; } // No new value jump if there is a barrier. A barrier has to be in its // own packet. A barrier has zero operands. We conservatively bail out // here if we see any instruction with zero operands. if (foundJump && MI->getNumOperands() == 0) break; if (foundJump && !foundCompare && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 if (QII->isNewValueJumpCandidate(MI)) { assert((MI->getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); isSecondOpReg = MI->getOperand(2).isReg(); if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, afterRA, jmpPos, MF)) break; cmpInstr = MI; cmpPos = MII; foundCompare = true; // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. cmpReg1 = MI->getOperand(1).getReg(); if (MI->getOperand(1).isKill()) MO1IsKill = true; if (isSecondOpReg) { cmpOp2 = MI->getOperand(2).getReg(); if (MI->getOperand(2).isKill()) MO2IsKill = true; } else cmpOp2 = MI->getOperand(2).getImm(); continue; } } if (foundCompare && foundJump) { // If "common" checks fail, bail out on this BB. if (!commonChecksToProhibitNewValueJump(afterRA, MII)) break; bool foundFeeder = false; MachineBasicBlock::iterator feederPos = MII; if (MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && (MI->getOperand(0).getReg() == cmpReg1 || (isSecondOpReg && MI->getOperand(0).getReg() == (unsigned) cmpOp2))) { unsigned feederReg = MI->getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true // (second operand of the compare is also register), try that one. // TODO: Try to come up with some heuristic to figure out which // feeder would benefit. if (feederReg == cmpReg1) { if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { if (!isSecondOpReg) break; else continue; } else foundFeeder = true; } if (!foundFeeder && isSecondOpReg && feederReg == (unsigned) cmpOp2) if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) break; if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. if (cmpInstr->getOpcode() == Hexagon::CMPEQrr && feederReg == (unsigned) cmpOp2) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; MO1IsKill = MO2IsKill; cmpOp2 = tmp; MO2IsKill = tmpIsKill; } // Now we have swapped the operands, all we need to check is, // if the second operand (after swap) is the feeder. // And if it is, make a note. if (feederReg == (unsigned)cmpOp2) isSecondOpNewified = true; } // Now that we are moving feeder close the jump, // make sure we are respecting the kill values of // the operands of the feeder. bool updatedIsKill = false; for (unsigned i = 0; i < MI->getNumOperands(); i++) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned feederReg = MO.getReg(); for (MachineBasicBlock::iterator localII = feederPos, end = jmpPos; localII != end; localII++) { MachineInstr *localMI = localII; for (unsigned j = 0; j < localMI->getNumOperands(); j++) { MachineOperand &localMO = localMI->getOperand(j); if (localMO.isReg() && localMO.isUse() && localMO.isKill() && feederReg == localMO.getReg()) { // We found that there is kill of a use register // Set up a kill flag on the register localMO.setIsKill(false); MO.setIsKill(); updatedIsKill = true; break; } } if (updatedIsKill) break; } } if (updatedIsKill) break; } MBB->splice(jmpPos, MI->getParent(), MI); MBB->splice(jmpPos, MI->getParent(), cmpInstr); DebugLoc dl = MI->getDebugLoc(); MachineInstr *NewMI; assert((QII->isNewValueJumpCandidate(cmpInstr)) && "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified, jmpTarget, MBPI); if (invertPredicate) opc = QII->getInvertedPredicatedOpcode(opc); if (isSecondOpReg) NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri || cmpInstr->getOpcode() == Hexagon::CMPGTri) && cmpOp2 == -1 ) // Corresponding new-value compare jump instructions don't have the // operand for -1 immediate value. NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addMBB(jmpTarget); else NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addImm(cmpOp2) .addMBB(jmpTarget); assert(NewMI && "New Value Jump Instruction Not created!"); (void)NewMI; if (cmpInstr->getOperand(0).isReg() && cmpInstr->getOperand(0).isKill()) cmpInstr->getOperand(0).setIsKill(false); if (cmpInstr->getOperand(1).isReg() && cmpInstr->getOperand(1).isKill()) cmpInstr->getOperand(1).setIsKill(false); cmpInstr->eraseFromParent(); jmpInstr->eraseFromParent(); ++nvjGenerated; ++NumNVJGenerated; break; } } } } return true; }
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); TRI = MF->getTarget().getRegisterInfo(); ReservedRegisters = TRI->getReservedRegs(mf); unsigned NumRegs = TRI->getNumRegs(); PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); /// Get some space for a respectable number of registers. VirtRegInfo.resize(64); analyzePHINodes(mf); // Calculate live variable information in depth first order on the CFG of the // function. This guarantees that we will see the definition of a virtual // register before its uses due to dominance properties of SSA (except for PHI // nodes, which are treated as a special case). MachineBasicBlock *Entry = MF->begin(); SmallPtrSet<MachineBasicBlock*,16> Visited; for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); DFI != E; ++DFI) { MachineBasicBlock *MBB = *DFI; // Mark live-in registers as live-in. for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(), EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); HandlePhysRegDef(*II, 0); } // Loop over all of the instructions, processing them. DistanceMap.clear(); unsigned Dist = 0; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { MachineInstr *MI = I; DistanceMap.insert(std::make_pair(MI, Dist++)); // Process all of the operands of the instruction... unsigned NumOperandsToProcess = MI->getNumOperands(); // Unless it is a PHI node. In this case, ONLY process the DEF, not any // of the uses. They will be handled in other basic blocks. if (MI->getOpcode() == TargetInstrInfo::PHI) NumOperandsToProcess = 1; SmallVector<unsigned, 4> UseRegs; SmallVector<unsigned, 4> DefRegs; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.getReg() == 0) continue; unsigned MOReg = MO.getReg(); if (MO.isUse()) UseRegs.push_back(MOReg); if (MO.isDef()) DefRegs.push_back(MOReg); } // Process all uses. for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { unsigned MOReg = UseRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); else if (!ReservedRegisters[MOReg]) HandlePhysRegUse(MOReg, MI); } // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!ReservedRegisters[MOReg]) HandlePhysRegDef(MOReg, MI); } } // Handle any virtual assignments from PHI nodes which might be at the // bottom of this basic block. We check all of our successor blocks to see // if they have PHI nodes, and if so, we simulate an assignment at the end // of the current block. if (!PHIVarInfo[MBB->getNumber()].empty()) { SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), E = VarInfoVec.end(); I != E; ++I) // Mark it alive only in the block we are representing. MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), MBB); } // Finally, if the last instruction in the block is a return, make sure to // mark it as using all of the live-out values in the function. if (!MBB->empty() && MBB->back().getDesc().isReturn()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator I = MF->getRegInfo().liveout_begin(), E = MF->getRegInfo().liveout_end(); I != E; ++I) { assert(TargetRegisterInfo::isPhysicalRegister(*I) && "Cannot have a live-out virtual register!"); HandlePhysRegUse(*I, Ret); // Add live-out registers as implicit uses. if (!Ret->readsRegister(*I)) Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); } } // Loop over PhysRegDef / PhysRegUse, killing any registers that are // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) if (PhysRegDef[i] || PhysRegUse[i]) HandlePhysRegDef(i, 0); std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); } // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j) if (VirtRegInfo[i].Kills[j] == MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister)) VirtRegInfo[i] .Kills[j]->addRegisterDead(i + TargetRegisterInfo::FirstVirtualRegister, TRI); else VirtRegInfo[i] .Kills[j]->addRegisterKilled(i + TargetRegisterInfo::FirstVirtualRegister, TRI); // Check to make sure there are no unreachable blocks in the MC CFG for the // function. If so, it is due to a bug in the instruction selector or some // other part of the code generator if this happens. #ifndef NDEBUG for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) assert(Visited.count(&*i) != 0 && "unreachable basic block found"); #endif delete[] PhysRegDef; delete[] PhysRegUse; delete[] PHIVarInfo; return false; }
/// findMatchingInsn - Scan the instructions looking for a load/store that can /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LdStPairFlags &Flags, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; ++MBBI; unsigned Opc = FirstMI->getOpcode(); bool MayLoad = FirstMI->mayLoad(); bool IsUnscaled = isUnscaledLdst(Opc); unsigned Reg = FirstMI->getOperand(0).getReg(); unsigned BaseReg = FirstMI->getOperand(1).getReg(); int Offset = FirstMI->getOperand(2).getImm(); // Early exit if the first instruction modifies the base register. // e.g., ldr x0, [x0] // Early exit if the offset if not possible to match. (6 bits of positive // range, plus allow an extra one in case we find a later insn that matches // with Offset-1 if (FirstMI->modifiesRegister(BaseReg, TRI)) return E; int OffsetStride = IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1; if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return E; // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. BitVector ModifiedRegs, UsedRegs; ModifiedRegs.resize(TRI->getNumRegs()); UsedRegs.resize(TRI->getNumRegs()); // Remember any instructions that read/write memory between FirstMI and MI. SmallVector<MachineInstr *, 4> MemInsns; for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr *MI = MBBI; // Skip DBG_VALUE instructions. Otherwise debug info can affect the // optimization by changing how far we scan. if (MI->isDebugValue()) continue; // Now that we know this is a real instruction, count it. ++Count; bool CanMergeOpc = Opc == MI->getOpcode(); Flags.setSExtIdx(-1); if (!CanMergeOpc) { bool IsValidLdStrOpc; unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); if (!IsValidLdStrOpc) continue; // Opc will be the first instruction in the pair. Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0); CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); } if (CanMergeOpc && MI->getOperand(2).isImm()) { // If we've found another instruction with the same opcode, check to see // if the base and offset are compatible with our starting instruction. // These instructions all have scaled immediate operands, so we just // check for +1/-1. Make sure to check the new instruction offset is // actually an immediate and not a symbolic reference destined for // a relocation. // // Pairwise instructions have a 7-bit signed offset field. Single insns // have a 12-bit unsigned offset field. To be a valid combine, the // final offset must be in range. unsigned MIBaseReg = MI->getOperand(1).getReg(); int MIOffset = MI->getOperand(2).getImm(); if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || (Offset + OffsetStride == MIOffset))) { int MinOffset = Offset < MIOffset ? Offset : MIOffset; // If this is a volatile load/store that otherwise matched, stop looking // as something is going on that we don't have enough information to // safely transform. Similarly, stop if we see a hint to avoid pairs. if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) return E; // If the resultant immediate offset of merging these instructions // is out of range for a pairwise instruction, bail and keep looking. bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); if (MI->mayLoadOrStore()) MemInsns.push_back(MI); continue; } // If the alignment requirements of the paired (scaled) instruction // can't express the offset of the unscaled input, bail and keep // looking. if (IsUnscaled && EnableAArch64UnscaledMemOp && (alignTo(MinOffset, OffsetStride) != MinOffset)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); if (MI->mayLoadOrStore()) MemInsns.push_back(MI); continue; } // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. if (MayLoad && Reg == MI->getOperand(0).getReg()) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); if (MI->mayLoadOrStore()) MemInsns.push_back(MI); continue; } // If the Rt of the second instruction was not modified or used between // the two instructions and none of the instructions between the second // and first alias with the second, we can combine the second into the // first. if (!ModifiedRegs[MI->getOperand(0).getReg()] && !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) && !mayAlias(MI, MemInsns, TII)) { Flags.setMergeForward(false); return MBBI; } // Likewise, if the Rt of the first instruction is not modified or used // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && !(FirstMI->mayLoad() && UsedRegs[FirstMI->getOperand(0).getReg()]) && !mayAlias(FirstMI, MemInsns, TII)) { Flags.setMergeForward(true); return MBBI; } // Unable to combine these instructions due to interference in between. // Keep looking. } } // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. if (MI->isCall()) return E; // Update modified / uses register lists. trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. if (ModifiedRegs[BaseReg]) return E; // Update list of instructions that read/write memory. if (MI->mayLoadOrStore()) MemInsns.push_back(MI); } return E; }
MipsInstrInfo::BranchType MipsInstrInfo::AnalyzeBranch( MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify, SmallVectorImpl<MachineInstr *> &BranchInstrs) const { MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); // Skip all the debug instructions. while (I != REnd && I->isDebugValue()) ++I; if (I == REnd || !isUnpredicatedTerminator(*I)) { // This block ends with no branches (it just falls through to its succ). // Leave TBB/FBB null. TBB = FBB = nullptr; return BT_NoBranch; } MachineInstr *LastInst = &*I; unsigned LastOpc = LastInst->getOpcode(); BranchInstrs.push_back(LastInst); // Not an analyzable branch (e.g., indirect jump). if (!getAnalyzableBrOpc(LastOpc)) return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; // Get the second to last instruction in the block. unsigned SecondLastOpc = 0; MachineInstr *SecondLastInst = nullptr; if (++I != REnd) { SecondLastInst = &*I; SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode()); // Not an analyzable branch (must be an indirect jump). if (isUnpredicatedTerminator(*SecondLastInst) && !SecondLastOpc) return BT_None; } // If there is only one terminator instruction, process it. if (!SecondLastOpc) { // Unconditional branch. if (LastInst->isUnconditionalBranch()) { TBB = LastInst->getOperand(0).getMBB(); return BT_Uncond; } // Conditional branch AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); return BT_Cond; } // If we reached here, there are two branches. // If there are three terminators, we don't know what sort of block this is. if (++I != REnd && isUnpredicatedTerminator(*I)) return BT_None; BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst); // If second to last instruction is an unconditional branch, // analyze it and remove the last instruction. if (SecondLastInst->isUnconditionalBranch()) { // Return if the last instruction cannot be removed. if (!AllowModify) return BT_None; TBB = SecondLastInst->getOperand(0).getMBB(); LastInst->eraseFromParent(); BranchInstrs.pop_back(); return BT_Uncond; } // Conditional branch followed by an unconditional branch. // The last one must be unconditional. if (!LastInst->isUnconditionalBranch()) return BT_None; AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return BT_CondUncond; }
bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (!LastInst->getDesc().isBranch()) return true; // Unconditional branch if (LastOpc == Mips::J) { TBB = LastInst->getOperand(0).getMBB(); return false; } Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); if (BranchCode == Mips::COND_INVALID) return true; // Can't handle indirect branch. // Conditional branch // Block ends with fall-through condbranch. if (LastOpc != Mips::COND_INVALID) { int LastNumOp = LastInst->getNumOperands(); TBB = LastInst->getOperand(LastNumOp-1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); for (int i=0; i<LastNumOp-1; i++) { Cond.push_back(LastInst->getOperand(i)); } return false; } } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it. unsigned SecondLastOpc = SecondLastInst->getOpcode(); Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) { int SecondNumOp = SecondLastInst->getNumOperands(); TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); for (int i=0; i<SecondNumOp-1; i++) { Cond.push_back(SecondLastInst->getOperand(i)); } FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two unconditional branches, handle it. The last // one is not executed, so remove it. if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); if (MI.getOpcode() != AMDGPU::input_perspective) return false; MachineBasicBlock::iterator I = &MI; unsigned DstReg = MI.getOperand(0).getReg(); R600MachineFunctionInfo *MFI = MI.getParent()->getParent() ->getInfo<R600MachineFunctionInfo>(); unsigned IJIndexBase; // In Evergreen ISA doc section 8.3.2 : // We need to interpolate XY and ZW in two different instruction groups. // An INTERP_* must occupy all 4 slots of an instruction group. // Output of INTERP_XY is written in X,Y slots // Output of INTERP_ZW is written in Z,W slots // // Thus interpolation requires the following sequences : // // AnyGPR.x = INTERP_ZW; (Write Masked Out) // AnyGPR.y = INTERP_ZW; (Write Masked Out) // DstGPR.z = INTERP_ZW; // DstGPR.w = INTERP_ZW; (End of first IG) // DstGPR.x = INTERP_XY; // DstGPR.y = INTERP_XY; // AnyGPR.z = INTERP_XY; (Write Masked Out) // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) // switch (MI.getOperand(1).getImm()) { case 0: IJIndexBase = MFI->GetIJPerspectiveIndex(); break; case 1: IJIndexBase = MFI->GetIJLinearIndex(); break; default: assert(0 && "Unknow ij index"); } for (unsigned i = 0; i < 8; i++) { unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( 2 * IJIndexBase + ((i + 1) % 2)); unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( 4 * MI.getOperand(2).getImm()); unsigned Sel; switch (i % 4) { case 0:Sel = AMDGPU::sel_x;break; case 1:Sel = AMDGPU::sel_y;break; case 2:Sel = AMDGPU::sel_z;break; case 3:Sel = AMDGPU::sel_w;break; default:break; } unsigned Res = TRI.getSubReg(DstReg, Sel); const MCInstrDesc &Opcode = (i < 4)? TII->get(AMDGPU::INTERP_ZW): TII->get(AMDGPU::INTERP_XY); MachineInstr *NewMI = BuildMI(*(MI.getParent()), I, MI.getParent()->findDebugLoc(I), Opcode, Res) .addReg(IJIndex) .addReg(ReadReg) .addImm(0); if (!(i> 1 && i < 6)) { TII->addFlag(NewMI, 0, MO_FLAG_MASK); } if (i % 4 != 3) TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); } MI.eraseFromParent(); return true; }
/// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// /// @param Ops Operand indices from analyzeVirtReg(). /// @param LoadMI Load instruction to use instead of stack slot when non-null. /// @return True on success. bool InlineSpiller:: foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, MachineInstr *LoadMI) { if (Ops.empty()) return false; // Don't attempt folding in bundles. MachineInstr *MI = Ops.front().first; if (Ops.back().first != MI || MI->isBundled()) return false; bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT || MI->getOpcode() == TargetOpcode::STACKMAP); // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { unsigned Idx = Ops[i].second; MachineOperand &MO = MI->getOperand(Idx); if (MO.isImplicit()) { ImpReg = MO.getReg(); continue; } // FIXME: Teach targets to deal with subregs. if (!SpillSubRegs && MO.getSubReg()) return false; // We cannot fold a load instruction into a def. if (LoadMI && MO.isDef()) return false; // Tied use operands should not be passed to foldMemoryOperand. if (!MI->isRegTiedToDefOperand(Idx)) FoldOps.push_back(Idx); } MachineInstrSpan MIS(MI); MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; // Remove LIS for any dead defs in the original MI not in FoldMI. for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { if (!MO->isReg()) continue; unsigned Reg = MO->getReg(); if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || MRI.isReserved(Reg)) { continue; } // Skip non-Defs, including undef uses and internal reads. if (MO->isUse()) continue; MIBundleOperands::PhysRegInfo RI = MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); if (RI.Defines) continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); if (VNInfo *VNI = LR->getVNInfoAt(Idx)) LR->removeValNo(VNI); } } } LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); // Insert any new instructions other than FoldMI into the LIS maps. assert(!MIS.empty() && "Unexpected empty span of instructions!"); for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end(); MII != End; ++MII) if (&*MII != FoldMI) LIS.InsertMachineInstrInMaps(&*MII); // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. if (ImpReg) for (unsigned i = FoldMI->getNumOperands(); i; --i) { MachineOperand &MO = FoldMI->getOperand(i - 1); if (!MO.isReg() || !MO.isImplicit()) break; if (MO.getReg() == ImpReg) FoldMI->RemoveOperand(i - 1); } DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, "folded")); if (!WasCopy) ++NumFolded; else if (Ops.front().second == 0) ++NumSpills; else ++NumReloads; return true; }
//===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (MBlaze::isUncondBranchOpcode(LastOpc)) { TBB = LastInst->getOperand(0).getMBB(); return false; } if (MBlaze::isCondBranchOpcode(LastOpc)) { // Block ends with fall-through condbranch. TBB = LastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); return false; } // Otherwise, don't know what this is. return true; } // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with something like BEQID then BRID, handle it. if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) && MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two unconditional branches, handle it. // The second one is not executed, so remove it. if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) && MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
bool OptimizeLEAPass::isLEA(const MachineInstr &MI) { unsigned Opcode = MI.getOpcode(); return Opcode == X86::LEA16r || Opcode == X86::LEA32r || Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; }
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; switch (MI.getOpcode()) { default: continue; case AMDGPU::COPY: { if (isVGPRToSGPRCopy(MI, TRI, MRI)) { DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); TII->moveToVALU(MI); } break; } case AMDGPU::PHI: { DEBUG(dbgs() << "Fixing PHI: " << MI); for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { const MachineOperand &Op = MI.getOperand(i); unsigned Reg = Op.getReg(); const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg()); MRI.constrainRegClass(Op.getReg(), RC); } unsigned Reg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, MI.getOperand(0).getSubReg()); if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) { MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass); } if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) break; // If a PHI node defines an SGPR and any of its operands are VGPRs, // then we need to move it to the VALU. // // Also, if a PHI node defines an SGPR and has all SGPR operands // we must move it to the VALU, because the SGPR operands will // all end up being assigned the same register, which means // there is a potential for a conflict if different threads take // different control flow paths. // // For Example: // // sgpr0 = def; // ... // sgpr1 = def; // ... // sgpr2 = PHI sgpr0, sgpr1 // use sgpr2; // // Will Become: // // sgpr2 = def; // ... // sgpr2 = def; // ... // use sgpr2 // // FIXME: This is OK if the branching decision is made based on an // SGPR value. bool SGPRBranch = false; // The one exception to this rule is when one of the operands // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK // instruction. In this case, there we know the program will // never enter the second block (the loop) without entering // the first block (where the condition is computed), so there // is no chance for values to be over-written. bool HasBreakDef = false; for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { unsigned Reg = MI.getOperand(i).getReg(); if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { TII->moveToVALU(MI); break; } MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); assert(DefInstr); switch(DefInstr->getOpcode()) { case AMDGPU::SI_BREAK: case AMDGPU::SI_IF_BREAK: case AMDGPU::SI_ELSE_BREAK: // If we see a PHI instruction that defines an SGPR, then that PHI // instruction has already been considered and should have // a *_BREAK as an operand. case AMDGPU::PHI: HasBreakDef = true; break; } } if (!SGPRBranch && !HasBreakDef) TII->moveToVALU(MI); break; } case AMDGPU::REG_SEQUENCE: { if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || !hasVGPROperands(MI, TRI)) continue; DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); TII->moveToVALU(MI); break; } case AMDGPU::INSERT_SUBREG: { const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); if (TRI->isSGPRClass(DstRC) && (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); TII->moveToVALU(MI); } break; } } } } return true; }
bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastOpc == AArch64::Bimm) { TBB = LastInst->getOperand(0).getMBB(); return false; } if (isCondBranch(LastOpc)) { classifyCondBranch(LastInst, TBB, Cond); return false; } return true; // Can't handle indirect branch. } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; unsigned SecondLastOpc = SecondLastInst->getOpcode(); // If AllowModify is true and the block ends with two or more unconditional // branches, delete all but the first unconditional branch. if (AllowModify && LastOpc == AArch64::Bimm) { while (SecondLastOpc == AArch64::Bimm) { LastInst->eraseFromParent(); LastInst = SecondLastInst; LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { // Return now the only terminator is an unconditional branch. TBB = LastInst->getOperand(0).getMBB(); return false; } else { SecondLastInst = I; SecondLastOpc = SecondLastInst->getOpcode(); } } } // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with a B and a Bcc, handle it. if (LastOpc == AArch64::Bimm) { if (SecondLastOpc == AArch64::Bcc) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(SecondLastOpc)) { classifyCondBranch(SecondLastInst, TBB, Cond); FBB = LastInst->getOperand(0).getMBB(); return false; } } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't /// implemented for a target). Upon success, this returns false and returns /// with the following information in various cases: /// /// 1. If this block ends with no branches (it just falls through to its succ) /// just return false, leaving TBB/FBB null. /// 2. If this block ends with only an unconditional branch, it sets TBB to be /// the destination block. /// 3. If this block ends with an conditional branch and it falls through to /// an successor block, it sets TBB to be the branch destination block and a /// list of operands that evaluate the condition. These /// operands can be passed to other TargetInstrInfo methods to create new /// branches. /// 4. If this block ends with an conditional branch and an unconditional /// block, it returns the 'true' destination in TBB, the 'false' destination /// in FBB, and a list of operands that evaluate the condition. These /// operands can be passed to other TargetInstrInfo methods to create new /// branches. /// /// Note that RemoveBranch and InsertBranch must be implemented to support /// cases where this method returns success. /// bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) return false; if (!isUnpredicatedTerminator(*I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { if (IsBRU(LastInst->getOpcode())) { TBB = LastInst->getOperand(0).getMBB(); return false; } XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); if (BranchCode == XCore::COND_INVALID) return true; // Can't handle indirect branch. // Conditional branch // Block ends with fall-through condbranch. TBB = LastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); Cond.push_back(LastInst->getOperand(0)); return false; } // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) return true; unsigned SecondLastOpc = SecondLastInst->getOpcode(); XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); // If the block ends with conditional branch followed by unconditional, // handle it. if (BranchCode != XCore::COND_INVALID && IsBRU(LastInst->getOpcode())) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. if (IsBRU(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Likewise if it ends with a branch table followed by an unconditional branch. if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) { I = LastInst; if (AllowModify) I->eraseFromParent(); return true; } // Otherwise, can't handle this. return true; }
void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, int &AccessScale, int &MinOffset, int &MaxOffset) const { switch (MI.getOpcode()) { default: llvm_unreachable("Unkown load/store kind"); case TargetOpcode::DBG_VALUE: AccessScale = 1; MinOffset = INT_MIN; MaxOffset = INT_MAX; return; case AArch64::LS8_LDR: case AArch64::LS8_STR: case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: case AArch64::LDRSBw: case AArch64::LDRSBx: AccessScale = 1; MinOffset = 0; MaxOffset = 0xfff; return; case AArch64::LS16_LDR: case AArch64::LS16_STR: case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: case AArch64::LDRSHw: case AArch64::LDRSHx: AccessScale = 2; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; case AArch64::LS32_LDR: case AArch64::LS32_STR: case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: case AArch64::LDRSWx: case AArch64::LDPSWx: AccessScale = 4; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; case AArch64::LS64_LDR: case AArch64::LS64_STR: case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: case AArch64::PRFM: AccessScale = 8; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: AccessScale = 16; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: AccessScale = 4; MinOffset = -0x40 * AccessScale; MaxOffset = 0x3f * AccessScale; return; case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: AccessScale = 8; MinOffset = -0x40 * AccessScale; MaxOffset = 0x3f * AccessScale; return; case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: AccessScale = 16; MinOffset = -0x40 * AccessScale; MaxOffset = 0x3f * AccessScale; return; case AArch64::LD1x2_8B: case AArch64::ST1x2_8B: AccessScale = 16; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; case AArch64::LD1x3_8B: case AArch64::ST1x3_8B: AccessScale = 24; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; case AArch64::LD1x4_8B: case AArch64::ST1x4_8B: case AArch64::LD1x2_16B: case AArch64::ST1x2_16B: AccessScale = 32; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; case AArch64::LD1x3_16B: case AArch64::ST1x3_16B: AccessScale = 48; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; case AArch64::LD1x4_16B: case AArch64::ST1x4_16B: AccessScale = 64; MinOffset = 0; MaxOffset = 0xfff * AccessScale; return; } }
bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE; }
bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); MRI = &MF.getRegInfo(); DenseMap<unsigned, unsigned> PeepholeMap; DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap; if (DisableHexagonPeephole) return false; // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { MachineBasicBlock *MBB = &*MBBb; PeepholeMap.clear(); PeepholeDoubleRegsMap.clear(); // Traverse the basic block. for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); ++MII) { MachineInstr *MI = MII; // Look for sign extends: // %vreg170<def> = SXTW %vreg166 if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) { assert (MI->getNumOperands() == 2); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src.getReg(); // Just handle virtual registers. if (TargetRegisterInfo::isVirtualRegister(DstReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Map the following: // %vreg170<def> = SXTW %vreg166 // PeepholeMap[170] = vreg166 PeepholeMap[DstReg] = SrcReg; } } // Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169) // %vreg170:DoublRegs, %vreg169:IntRegs if (!DisableOptExtTo64 && MI->getOpcode () == Hexagon::A4_combineir) { assert (MI->getNumOperands() == 3); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src1 = MI->getOperand(1); MachineOperand &Src2 = MI->getOperand(2); if (Src1.getImm() != 0) continue; unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src2.getReg(); PeepholeMap[DstReg] = SrcReg; } // Look for this sequence below // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. // and convert into // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. if (MI->getOpcode() == Hexagon::S2_lsr_i_p) { assert(MI->getNumOperands() == 3); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src1 = MI->getOperand(1); MachineOperand &Src2 = MI->getOperand(2); if (Src2.getImm() != 32) continue; unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src1.getReg(); PeepholeDoubleRegsMap[DstReg] = std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/); } // Look for P=NOT(P). if (!DisablePNotP && (MI->getOpcode() == Hexagon::C2_not)) { assert (MI->getNumOperands() == 2); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src.getReg(); // Just handle virtual registers. if (TargetRegisterInfo::isVirtualRegister(DstReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Map the following: // %vreg170<def> = NOT_xx %vreg166 // PeepholeMap[170] = vreg166 PeepholeMap[DstReg] = SrcReg; } } // Look for copy: // %vreg176<def> = COPY %vreg170:subreg_loreg if (!DisableOptSZExt && MI->isCopy()) { assert (MI->getNumOperands() == 2); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); // Make sure we are copying the lower 32 bits. if (Src.getSubReg() != Hexagon::subreg_loreg) continue; unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src.getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { // Change the 1st operand. MI->RemoveOperand(1); MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); } else { DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI = PeepholeDoubleRegsMap.find(SrcReg); if (DI != PeepholeDoubleRegsMap.end()) { std::pair<unsigned,unsigned> PeepholeSrc = DI->second; MI->RemoveOperand(1); MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first, false /*isDef*/, false /*isImp*/, false /*isKill*/, false /*isDead*/, false /*isUndef*/, false /*isEarlyClobber*/, PeepholeSrc.second)); } } } } // Look for Predicated instructions. if (!DisablePNotP) { bool Done = false; if (QII->isPredicated(MI)) { MachineOperand &Op0 = MI->getOperand(0); unsigned Reg0 = Op0.getReg(); const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0); if (RC0->getID() == Hexagon::PredRegsRegClassID) { // Handle instructions that have a prediate register in op0 // (most cases of predicable instructions). if (TargetRegisterInfo::isVirtualRegister(Reg0)) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { // Change the 1st operand and, flip the opcode. MI->getOperand(0).setReg(PeepholeSrc); int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode()); MI->setDesc(QII->get(NewOp)); Done = true; } } } } if (!Done) { // Handle special instructions. unsigned Op = MI->getOpcode(); unsigned NewOp = 0; unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices. switch (Op) { case Hexagon::C2_mux: case Hexagon::C2_muxii: NewOp = Op; break; case Hexagon::C2_muxri: NewOp = Hexagon::C2_muxir; break; case Hexagon::C2_muxir: NewOp = Hexagon::C2_muxri; break; } if (NewOp) { unsigned PSrc = MI->getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { MI->getOperand(PR).setReg(POrig); MI->setDesc(QII->get(NewOp)); // Swap operands S1 and S2. MachineOperand Op1 = MI->getOperand(S1); MachineOperand Op2 = MI->getOperand(S2); ChangeOpInto(MI->getOperand(S1), Op2); ChangeOpInto(MI->getOperand(S2), Op1); } } // if (NewOp) } // if (!Done) } // if (!DisablePNotP) } // Instruction } // Basic Block return true; }
bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD; }
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { bool Modified = false; // Two tranformations to do here: // 1) Find loads and stores that can be merged into a single load or store // pair instruction. // e.g., // ldr x0, [x2] // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] // 2) Find base register updates that can be merged into the load or store // as a base-reg writeback. // e.g., // ldr x0, [x2] // add x2, x2, #4 // ; becomes // ldr x0, [x2], #4 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { MachineInstr *MI = MBBI; switch (MI->getOpcode()) { default: // Just move on to the next instruction. ++MBBI; break; case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: case AArch64::STRXui: case AArch64::STRWui: case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: case AArch64::LDRXui: case AArch64::LDRWui: case AArch64::LDRSWui: // do the unscaled versions as well case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: case AArch64::STURWi: case AArch64::STURXi: case AArch64::LDURSi: case AArch64::LDURDi: case AArch64::LDURQi: case AArch64::LDURWi: case AArch64::LDURXi: case AArch64::LDURSWi: { // If this is a volatile load/store, don't mess with it. if (MI->hasOrderedMemoryRef()) { ++MBBI; break; } // Make sure this is a reg+imm (as opposed to an address reloc). if (!MI->getOperand(2).isImm()) { ++MBBI; break; } // Check if this load/store has a hint to avoid pair formation. // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. if (TII->isLdStPairSuppressed(MI)) { ++MBBI; break; } // Look ahead up to ScanLimit instructions for a pairable instruction. LdStPairFlags Flags; MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit); if (Paired != E) { // Merge the loads into a pair. Keeping the iterator straight is a // pain, so we let the merge routine tell us what the next instruction // is after it's done mucking about. MBBI = mergePairedInsns(MBBI, Paired, Flags); Modified = true; ++NumPairCreated; if (isUnscaledLdst(MI->getOpcode())) ++NumUnscaledPairCreated; break; } ++MBBI; break; } // FIXME: Do the other instructions. } } for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { MachineInstr *MI = MBBI; // Do update merging. It's simpler to keep this separate from the above // switch, though not strictly necessary. unsigned Opc = MI->getOpcode(); switch (Opc) { default: // Just move on to the next instruction. ++MBBI; break; case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: case AArch64::STRXui: case AArch64::STRWui: case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: case AArch64::LDRXui: case AArch64::LDRWui: // do the unscaled versions as well case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: case AArch64::STURWi: case AArch64::STURXi: case AArch64::LDURSi: case AArch64::LDURDi: case AArch64::LDURQi: case AArch64::LDURWi: case AArch64::LDURXi: { // Make sure this is a reg+imm (as opposed to an address reloc). if (!MI->getOperand(2).isImm()) { ++MBBI; break; } // Look ahead up to ScanLimit instructions for a mergable instruction. MachineBasicBlock::iterator Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); if (Update != E) { // Merge the update into the ld/st. MBBI = mergePostIdxUpdateInsn(MBBI, Update); Modified = true; ++NumPostFolded; break; } // Don't know how to handle pre/post-index versions, so move to the next // instruction. if (isUnscaledLdst(Opc)) { ++MBBI; break; } // Look back to try to find a pre-index instruction. For example, // add x0, x0, #8 // ldr x1, [x0] // merged into: // ldr x1, [x0, #8]! Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); if (Update != E) { // Merge the update into the ld/st. MBBI = mergePreIdxUpdateInsn(MBBI, Update); Modified = true; ++NumPreFolded; break; } // Look forward to try to find a post-index instruction. For example, // ldr x1, [x0, #64] // add x0, x0, #64 // merged into: // ldr x1, [x0, #64]! // The immediate in the load/store is scaled by the size of the register // being loaded. The immediate in the add we're looking for, // however, is not, so adjust here. int Value = MI->getOperand(2).getImm() * TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) ->getSize(); Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); if (Update != E) { // Merge the update into the ld/st. MBBI = mergePreIdxUpdateInsn(MBBI, Update); Modified = true; ++NumPreFolded; break; } // Nothing found. Just move to the next instruction. ++MBBI; break; } // FIXME: Do the other instructions. } } return Modified; }
static void LowerTlsAddr(MCStreamer &OutStreamer, X86MCInstLower &MCInstLowering, const MachineInstr &MI) { bool is64Bits = MI.getOpcode() == X86::TLS_addr64 || MI.getOpcode() == X86::TLS_base_addr64; bool needsPadding = MI.getOpcode() == X86::TLS_addr64; MCContext &context = OutStreamer.getContext(); if (needsPadding) { MCInst prefix; prefix.setOpcode(X86::DATA16_PREFIX); OutStreamer.EmitInstruction(prefix); } MCSymbolRefExpr::VariantKind SRVK; switch (MI.getOpcode()) { case X86::TLS_addr32: case X86::TLS_addr64: SRVK = MCSymbolRefExpr::VK_TLSGD; break; case X86::TLS_base_addr32: SRVK = MCSymbolRefExpr::VK_TLSLDM; break; case X86::TLS_base_addr64: SRVK = MCSymbolRefExpr::VK_TLSLD; break; default: llvm_unreachable("unexpected opcode"); } MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context); MCInst LEA; if (is64Bits) { LEA.setOpcode(X86::LEA64r); LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base LEA.addOperand(MCOperand::CreateImm(1)); // scale LEA.addOperand(MCOperand::CreateReg(0)); // index LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp LEA.addOperand(MCOperand::CreateReg(0)); // seg } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) { LEA.setOpcode(X86::LEA32r); LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // base LEA.addOperand(MCOperand::CreateImm(1)); // scale LEA.addOperand(MCOperand::CreateReg(0)); // index LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp LEA.addOperand(MCOperand::CreateReg(0)); // seg } else { LEA.setOpcode(X86::LEA32r); LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest LEA.addOperand(MCOperand::CreateReg(0)); // base LEA.addOperand(MCOperand::CreateImm(1)); // scale LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp LEA.addOperand(MCOperand::CreateReg(0)); // seg } OutStreamer.EmitInstruction(LEA); if (needsPadding) { MCInst prefix; prefix.setOpcode(X86::DATA16_PREFIX); OutStreamer.EmitInstruction(prefix); prefix.setOpcode(X86::DATA16_PREFIX); OutStreamer.EmitInstruction(prefix); prefix.setOpcode(X86::REX64_PREFIX); OutStreamer.EmitInstruction(prefix); } MCInst call; if (is64Bits) call.setOpcode(X86::CALL64pcrel32); else call.setOpcode(X86::CALLpcrel32); StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name); const MCSymbolRefExpr *tlsRef = MCSymbolRefExpr::Create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context); call.addOperand(MCOperand::CreateExpr(tlsRef)); OutStreamer.EmitInstruction(call); }
static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, const TargetRegisterInfo *TRI, MachineBasicBlock::iterator II, unsigned pReg, bool secondReg, bool optLocation, MachineBasicBlock::iterator end, MachineFunction &MF) { MachineInstr *MI = II; // If the second operand of the compare is an imm, make sure it's in the // range specified by the arch. if (!secondReg) { int64_t v = MI->getOperand(2).getImm(); if (!(isUInt<5>(v) || ((MI->getOpcode() == Hexagon::CMPEQri || MI->getOpcode() == Hexagon::CMPGTri) && (v == -1)))) return false; } unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. cmpReg1 = MI->getOperand(1).getReg(); if (secondReg) { cmpOp2 = MI->getOperand(2).getReg(); // Make sure that that second register is not from COPY // At machine code level, we don't need this, but if we decide // to move new value jump prior to RA, we would be needing this. MachineRegisterInfo &MRI = MF.getRegInfo(); if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) { MachineInstr *def = MRI.getVRegDef(cmpOp2); if (def->getOpcode() == TargetOpcode::COPY) return false; } } // Walk the instructions after the compare (predicate def) to the jump, // and satisfy the following conditions. ++II ; for (MachineBasicBlock::iterator localII = II; localII != end; ++localII) { // Check 1. // If "common" checks fail, bail out. if (!commonChecksToProhibitNewValueJump(optLocation, localII)) return false; // Check 2. // If there is a def or use of predicate (result of compare), bail out. if (localII->modifiesRegister(pReg, TRI) || localII->readsRegister(pReg, TRI)) return false; // Check 3. // If there is a def of any of the use of the compare (operands of compare), // bail out. // Eg. // p0 = cmp.eq(r2, r0) // r2 = r4 // if (p0.new) jump:t .LBB28_3 if (localII->modifiesRegister(cmpReg1, TRI) || (secondReg && localII->modifiesRegister(cmpOp2, TRI))) return false; } return true; }
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, int &SPAdj) { assert(Fn.getSubtarget().getRegisterInfo() && "getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); bool InsideCallSequence = false; for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); SPAdj += TII.getSPAdjust(I); MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = std::prev(I); TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else I = std::next(PrevI); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (!MI->getOperand(i).isFI()) continue; // Frame indices in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. if (MI->isDebugValue()) { assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(1); Offset.setImm(Offset.getImm() + TFI->getFrameIndexReference( Fn, MI->getOperand(0).getIndex(), Reg)); MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); continue; } // TODO: This code should be commoned with the code for // PATCHPOINT. There's no good reason for the difference in // implementation other than historical accident. The only // remaining difference is the unconditional use of the stack // pointer as the base register. if (MI->getOpcode() == TargetOpcode::STATEPOINT) { assert((!MI->isDebugValue() || i == 0) && "Frame indicies can only appear as the first operand of a " "DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(i + 1); const unsigned refOffset = TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), Reg); Offset.setImm(Offset.getImm() + refOffset); MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, FrameIndexVirtualScavenging ? nullptr : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = nullptr; break; } // If we are looking at a call sequence, we need to keep track of // the SP adjustment made by each instruction in the sequence. // This includes both the frame setup/destroy pseudos (handled above), // as well as other instructions that have side effects w.r.t the SP. // Note that this must come after eliminateFrameIndex, because // if I itself referred to a frame index, we shouldn't count its own // adjustment. if (MI && InsideCallSequence) SPAdj += TII.getSPAdjust(MI); if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } }
int AlphaCodeEmitter::getMachineOpValue(MachineInstr &MI, MachineOperand &MO) { int rv = 0; // Return value; defaults to 0 for unhandled cases // or things that get fixed up later by the JIT. if (MO.isRegister()) { rv = getAlphaRegNumber(MO.getReg()); } else if (MO.isImmediate()) { rv = MO.getImm(); } else if (MO.isGlobalAddress() || MO.isExternalSymbol() || MO.isConstantPoolIndex()) { DOUT << MO << " is a relocated op for " << MI << "\n"; unsigned Reloc = 0; int Offset = 0; bool useGOT = false; switch (MI.getOpcode()) { case Alpha::BSR: Reloc = Alpha::reloc_bsr; break; case Alpha::LDLr: case Alpha::LDQr: case Alpha::LDBUr: case Alpha::LDWUr: case Alpha::LDSr: case Alpha::LDTr: case Alpha::LDAr: case Alpha::STQr: case Alpha::STLr: case Alpha::STWr: case Alpha::STBr: case Alpha::STSr: case Alpha::STTr: Reloc = Alpha::reloc_gprellow; break; case Alpha::LDAHr: Reloc = Alpha::reloc_gprelhigh; break; case Alpha::LDQl: Reloc = Alpha::reloc_literal; useGOT = true; break; case Alpha::LDAg: case Alpha::LDAHg: Reloc = Alpha::reloc_gpdist; Offset = MI.getOperand(3).getImm(); break; default: assert(0 && "unknown relocatable instruction"); abort(); } if (MO.isGlobalAddress()) MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, MO.getGlobal(), Offset, isa<Function>(MO.getGlobal()), useGOT)); else if (MO.isExternalSymbol()) MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), Reloc, MO.getSymbolName(), Offset, true)); else MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), Reloc, MO.getIndex(), Offset)); } else if (MO.isMachineBasicBlock()) { MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), Alpha::reloc_bsr, MO.getMBB())); }else { cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; abort(); } return rv; }
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; MachineInstr *BundleMI = 0; // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; if (MI->isBundle()) { BundleMI = MI; continue; } LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); unsigned Opcode = MI->getOpcode(); DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); if (OPI != ReduceOpcodeMap.end()) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } goto ProcessNext; } // Try to transform to a 16-bit two-address instruction. if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; goto ProcessNext; } // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } } ProcessNext: if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. if (BundleMI->killsRegister(ARM::CPSR)) LiveCPSR = false; MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); if (MO && !MO->isDead()) LiveCPSR = true; } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; IsSelfLoop = false; } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; IsSelfLoop = false; } } return Modified; }
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the /// result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; if (MRI->hasOneNonDBGUse(SrcReg)) // No other uses. return false; // Ensure DstReg can get a register class that actually supports // sub-registers. Don't change the class until we commit. const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); if (!DstRC) return false; // The ext instr may be operating on a sub-register of SrcReg as well. // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit // register. // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of // SrcReg:SubIdx should be replaced. bool UseSrcSubIdx = TM->getRegisterInfo()-> getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) ReachedBBs.insert(UI.getParent()); // Uses that are in the same BB of uses of the result of the instruction. SmallVector<MachineOperand*, 8> Uses; // Uses that the result of the instruction can reach. SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { MachineInstr *UseMI = UseMO.getParent(); if (UseMI == MI) continue; if (UseMI->isPHI()) { ExtendLife = false; continue; } // Only accept uses of SrcReg:SubIdx. if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) continue; // It's an error to translate this: // // %reg1025 = <sext> %reg1024 // ... // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 // // into this: // // %reg1025 = <sext> %reg1024 // ... // %reg1027 = COPY %reg1025:4 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 // // The problem here is that SUBREG_TO_REG is there to assert that an // implicit zext occurs. It doesn't insert a zext instruction. If we allow // the COPY here, it will give us the value after the <sext>, not the // original value of %reg1024 before <sext>. if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) continue; MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. if (!LocalMIs.count(UseMI)) Uses.push_back(&UseMO); } else if (ReachedBBs.count(UseMBB)) { // Non-local uses where the result of the extension is used. Always // replace these unless it's a PHI. Uses.push_back(&UseMO); } else if (Aggressive && DT->dominates(MBB, UseMBB)) { // We may want to extend the live range of the extension result in order // to replace these uses. ExtendedUses.push_back(&UseMO); } else { // Both will be live out of the def MBB anyway. Don't extend live range of // the extension result. ExtendLife = false; break; } } if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. std::copy(ExtendedUses.begin(), ExtendedUses.end(), std::back_inserter(Uses)); // Now replace all uses. bool Changed = false; if (!Uses.empty()) { SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) if (UI.isPHI()) PHIBBs.insert(UI.getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); for (unsigned i = 0, e = Uses.size(); i != e; ++i) { MachineOperand *UseMO = Uses[i]; MachineInstr *UseMI = UseMO->getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (PHIBBs.count(UseMBB)) continue; // About to add uses of DstReg, clear DstReg's kill flags. if (!Changed) { MRI->clearKillFlags(DstReg); MRI->constrainRegClass(DstReg, DstRC); } unsigned NewVR = MRI->createVirtualRegister(RC); MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. if (UseSrcSubIdx) { Copy->getOperand(0).setSubReg(SubIdx); Copy->getOperand(0).setIsUndef(); } UseMO->setReg(NewVR); ++NumReuse; Changed = true; } } return Changed; }
static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, const TargetRegisterInfo *TRI, MachineBasicBlock::iterator II, unsigned pReg, bool secondReg, bool optLocation, MachineBasicBlock::iterator end, MachineFunction &MF) { MachineInstr &MI = *II; // If the second operand of the compare is an imm, make sure it's in the // range specified by the arch. if (!secondReg) { int64_t v = MI.getOperand(2).getImm(); bool Valid = false; switch (MI.getOpcode()) { case Hexagon::C2_cmpeqi: case Hexagon::C4_cmpneqi: case Hexagon::C2_cmpgti: case Hexagon::C4_cmpltei: Valid = (isUInt<5>(v) || v == -1); break; case Hexagon::C2_cmpgtui: case Hexagon::C4_cmplteui: Valid = isUInt<5>(v); break; case Hexagon::S2_tstbit_i: case Hexagon::S4_ntstbit_i: Valid = (v == 0); break; } if (!Valid) return false; } unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. cmpReg1 = MI.getOperand(1).getReg(); if (secondReg) { cmpOp2 = MI.getOperand(2).getReg(); // If the same register appears as both operands, we cannot generate a new // value compare. Only one operand may use the .new suffix. if (cmpReg1 == cmpOp2) return false; // Make sure that that second register is not from COPY // At machine code level, we don't need this, but if we decide // to move new value jump prior to RA, we would be needing this. MachineRegisterInfo &MRI = MF.getRegInfo(); if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) { MachineInstr *def = MRI.getVRegDef(cmpOp2); if (def->getOpcode() == TargetOpcode::COPY) return false; } } // Walk the instructions after the compare (predicate def) to the jump, // and satisfy the following conditions. ++II ; for (MachineBasicBlock::iterator localII = II; localII != end; ++localII) { if (localII->isDebugValue()) continue; // Check 1. // If "common" checks fail, bail out. if (!commonChecksToProhibitNewValueJump(optLocation, localII)) return false; // Check 2. // If there is a def or use of predicate (result of compare), bail out. if (localII->modifiesRegister(pReg, TRI) || localII->readsRegister(pReg, TRI)) return false; // Check 3. // If there is a def of any of the use of the compare (operands of compare), // bail out. // Eg. // p0 = cmp.eq(r2, r0) // r2 = r4 // if (p0.new) jump:t .LBB28_3 if (localII->modifiesRegister(cmpReg1, TRI) || (secondReg && localII->modifiesRegister(cmpOp2, TRI))) return false; } return true; }
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, int &SPAdj) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) Size = -Size; SPAdj += Size; MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else I = llvm::next(PrevI); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (!MI->getOperand(i).isFI()) continue; // Frame indicies in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. if (MI->isDebugValue() || MI->getOpcode() == TargetOpcode::STATEPOINT || MI->getOpcode() == TargetOpcode::STACKMAP || MI->getOpcode() == TargetOpcode::PATCHPOINT) { assert((!MI->isDebugValue() || i == 0) && "Frame indicies can only appear as the first operand of a " "DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(i + 1); //errs() << "offset: " << Offset.getImm() << "\n"; const unsigned refOffset = (MI->getOpcode() == TargetOpcode::STATEPOINT) ? // GC/STATEPOINT specific TFI->getFrameIndexReferenceForGC(Fn, MI->getOperand(i).getIndex(), Reg) : // General case TFI->getFrameIndexReference(Fn, MI->getOperand(i).getIndex(), Reg); Offset.setImm(Offset.getImm() + refOffset); MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, FrameIndexVirtualScavenging ? NULL : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = 0; break; } if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } }
void PatmosPostRASchedStrategy::postprocessDAG(ScheduleDAGPostRA *dag) { DAG = dag; SUnit *CFL = NULL; // Find the inline asm statement, if any. Note that asm is a barrier, // therefore there is at most one CFL or inline asm. SUnit *Asm = NULL; // Push up loads to ensure load delay slot across BBs // TODO For some reasons, loads do not always have exit edges, and a latency // of 1; find out why. Happens e.g. in coremark with 16k methods setup. for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(), ie = DAG->SUnits.rend(); it != ie; it++) { MachineInstr *MI = it->getInstr(); if (!MI) continue; if (MI->mayLoad()) { SDep Dep(&*it, SDep::Artificial); Dep.setLatency(computeExitLatency(*it)); DAG->ExitSU.addPred(Dep); } } // Find the branch/call/ret instruction if available for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(), ie = DAG->SUnits.rend(); it != ie; it++) { MachineInstr *MI = it->getInstr(); if (!MI) continue; if (isPatmosCFL(MI->getOpcode(), MI->getDesc().TSFlags)) { CFL = &*it; break; } if (MI->isInlineAsm()) { Asm = &*it; break; } } const PatmosSubtarget *PST = PTM.getSubtargetImpl(); unsigned DelaySlot = CFL ? PST->getDelaySlotCycles(CFL->getInstr()) : 0; if (CFL) { // RET and CALL have implicit deps on the return values and call // arguments. Remove all those edges to schedule them into the delay slot // if the registers are not actually used by CALL and RET if (CFL->getInstr()->isReturn() || CFL->getInstr()->isCall()) removeImplicitCFLDeps(*CFL); // Add an artificial dep from CFL to exit for the delay slot SDep DelayDep(CFL, SDep::Artificial); DelayDep.setLatency(DelaySlot + 1); DAG->ExitSU.addPred(DelayDep); CFL->isScheduleLow = true; if (PTM.getSubtargetImpl()->getCFLType() != PatmosSubtarget::CFL_DELAYED) { // Push up single instructions that can be scheduled in the same // cycle as the branch unsigned LowCount = 0; SUnit *LowSU = 0; for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(), ie = DAG->SUnits.rend(); it != ie; it++) { if (&*it == CFL) continue; MachineInstr *MI = it->getInstr(); if (!MI) continue; if (it->getHeight() <= DelaySlot) { LowCount++; if (PII.canIssueInSlot(MI, LowCount)) { LowSU = &*it; } } } if (LowSU && LowCount == 1) { SDep Dep(LowSU, SDep::Artificial); Dep.setLatency(DelaySlot + 1); DAG->ExitSU.addPred(Dep); } } if (PTM.getSubtargetImpl()->getCFLType() == PatmosSubtarget::CFL_NON_DELAYED) { // Add dependencies from all other instructions to exit for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(), ie = DAG->SUnits.rend(); it != ie; it++) { if (&*it == CFL) continue; MachineInstr *MI = it->getInstr(); if (!MI) continue; SDep Dep(&*it, SDep::Artificial); Dep.setLatency(DelaySlot + 1); DAG->ExitSU.addPred(Dep); } } } // Add an exit delay between loads and inline asm, in case asm is empty if (Asm) { std::vector<SUnit*> PredLoads; for (SUnit::pred_iterator it = Asm->Preds.begin(), ie = Asm->Preds.end(); it != ie; it++) { if (!it->getSUnit()) continue; MachineInstr *MI = it->getSUnit()->getInstr(); // Check for loads if (!MI || !MI->mayLoad()) continue; PredLoads.push_back(it->getSUnit()); } for (std::vector<SUnit*>::iterator it = PredLoads.begin(), ie = PredLoads.end(); it != ie; it++) { // Add a delay between loads and inline-asm, even if the operand is not // used. SDep Dep(*it, SDep::Artificial); Dep.setLatency( computeExitLatency(**it) ); Asm->addPred(Dep); } } // remove barriers between loads/stores with different memory type removeTypedMemBarriers(); // remove any dependency between instructions with mutually exclusive // predicates removeExclusivePredDeps(); // TODO SWS and LWS do not have ST as implicit def edges // TODO CALL has chain edges to all SWS/.. instructions, remove // TODO remove edges from MUL to other MULs to overlap MUL and MFS for // pipelined muls. }