bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { RegMap Defs; bool Modified = false; // Walk over MBB tracking the def points of the registers. MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; if (MI->getOpcode() == ARM::VMOVD && !TII->isPredicated(MI)) { unsigned SrcReg = MI->getOperand(1).getReg(); // If we do not find an instruction defining the reg, this means the // register should be live-in for this BB. It's always to better to use // NEON reg-reg moves. unsigned Domain = ARMII::DomainNEON; RegMap::iterator DefMI = Defs.find(SrcReg); if (DefMI != Defs.end()) { Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask; // Instructions in general domain are subreg accesses. // Map them to NEON reg-reg moves. if (Domain == ARMII::DomainGeneral) Domain = ARMII::DomainNEON; } if (inNEONDomain(Domain, isA8)) { // Convert VMOVD to VORRd unsigned DestReg = MI->getOperand(0).getReg(); DEBUG({errs() << "vmov convert: "; MI->dump();}); // It's safe to ignore imp-defs / imp-uses here, since: // - We're running late, no intelligent condegen passes should be run // afterwards // - The imp-defs / imp-uses are superregs only, we don't care about // them. AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(), TII->get(ARM::VORRd), DestReg) .addReg(SrcReg).addReg(SrcReg)); MBB.erase(MI); MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; DEBUG({errs() << " into: "; MI->dump();}); Modified = true; ++NumVMovs; } else {
// Compute base address using Addr and return the final register. unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, const MemAddress &Addr) { MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator MBBI = MI.getIterator(); DebugLoc DL = MI.getDebugLoc(); assert((TRI->getRegSizeInBits(Addr.Base.LoReg, *MRI) == 32 || Addr.Base.LoSubReg) && "Expected 32-bit Base-Register-Low!!"); assert((TRI->getRegSizeInBits(Addr.Base.HiReg, *MRI) == 32 || Addr.Base.HiSubReg) && "Expected 32-bit Base-Register-Hi!!"); LLVM_DEBUG(dbgs() << " Re-Computed Anchor-Base:\n"); MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI); MachineOperand OffsetHi = createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI); unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned DeadCarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineInstr *LoHalf = BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0) .addReg(CarryReg, RegState::Define) .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg) .add(OffsetLo); (void)LoHalf; LLVM_DEBUG(dbgs() << " "; LoHalf->dump(););
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map SourceMap SrcMap; // Src -> Def map DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (MI->isCopy()) { unsigned Def = MI->getOperand(0).getReg(); unsigned Src = MI->getOperand(1).getReg(); if (TargetRegisterInfo::isVirtualRegister(Def) || TargetRegisterInfo::isVirtualRegister(Src)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; if (!MRI->isReserved(Def) && (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && isNopCopy(CopyMI, Def, Src, TRI)) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ECX<def> = COPY %EAX<kill> // ... nothing clobbered EAX. // %EAX<def> = COPY %ECX // => // %ECX<def> = COPY %EAX // // Also avoid eliminating a copy from reserved registers unless the // definition is proven not clobbered. e.g. // %RSP<def> = COPY %RAX // CALL // %RAX<def> = COPY %RSP DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump()); // Clear any kills of Def between CopyMI and MI. This extends the // live range. for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) I->clearRegisterKills(Def, TRI); removeCopy(MI); Changed = true; ++NumDeletes; continue; } }
MachineOperand SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) { APInt V(32, Val, true); if (TII->isInlineConstant(V)) return MachineOperand::CreateImm(Val); unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr *Mov = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), Reg) .addImm(Val); (void)Mov; LLVM_DEBUG(dbgs() << " "; Mov->dump()); return MachineOperand::CreateReg(Reg, false); }
bool isBundlableWithCurrentPMI(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PV, std::vector<R600InstrInfo::BankSwizzle> &BS, bool &isTransSlot) { isTransSlot = TII->isTransOnly(MI); assert (!isTransSlot || VLIW5); // Is the dst reg sequence legal ? if (!isTransSlot && !CurrentPacketMIs.empty()) { if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { if (ConsideredInstUsesAlreadyWrittenVectorElement && !TII->isVectorOnly(MI) && VLIW5) { isTransSlot = true; LLVM_DEBUG({ dbgs() << "Considering as Trans Inst :"; MI.dump(); }); } else return false;
bool runOnMachineFunction(MachineFunction &MF) override { ST = &MF.getSubtarget<R600Subtarget>(); MaxFetchInst = ST->getTexVTXClauseSize(); TII = ST->getInstrInfo(); TRI = ST->getRegisterInfo(); R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); CFStack CFStack(ST, MF.getFunction().getCallingConv()); for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { MachineBasicBlock &MBB = *MB; unsigned CfCount = 0; std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; std::vector<MachineInstr * > IfThenElseStack; if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), getHWInstrDesc(CF_CALL_FS)); CfCount++; } std::vector<ClauseFile> FetchClauses, AluClauses; std::vector<MachineInstr *> LastAlu(1); std::vector<MachineInstr *> ToPopAfter; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; LastAlu.back() = nullptr; continue; } MachineBasicBlock::iterator MI = I; if (MI->getOpcode() != R600::ENDIF) LastAlu.back() = nullptr; if (MI->getOpcode() == R600::CF_ALU) LastAlu.back() = &*MI; I++; bool RequiresWorkAround = CFStack.requiresWorkAroundForInst(MI->getOpcode()); switch (MI->getOpcode()) { case R600::CF_ALU_PUSH_BEFORE: if (RequiresWorkAround) { LLVM_DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) .addImm(CfCount + 1) .addImm(1); MI->setDesc(TII->get(R600::CF_ALU)); CfCount++; CFStack.pushBranch(R600::CF_PUSH_EG); } else CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); LLVM_FALLTHROUGH; case R600::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; case R600::WHILELOOP: { CFStack.pushLoop(); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_WHILE_LOOP)) .addImm(1); std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, std::set<MachineInstr *>()); Pair.second.insert(MIb); LoopStack.push_back(std::move(Pair)); MI->eraseFromParent(); CfCount++; break; } case R600::ENDLOOP: { CFStack.popLoop(); std::pair<unsigned, std::set<MachineInstr *>> Pair = std::move(LoopStack.back()); LoopStack.pop_back(); CounterPropagateAddr(Pair.second, CfCount); BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) .addImm(Pair.first + 1); MI->eraseFromParent(); CfCount++; break; } case R600::IF_PREDICATE_SET: { LastAlu.push_back(nullptr); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(0) .addImm(0); IfThenElseStack.push_back(MIb); LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); MI->eraseFromParent(); CfCount++; break; }
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); bool MadeChange = false; MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>(); std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges; LiveIntervals *LIS = &getAnalysis<LiveIntervals>(); LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); MachineBasicBlock *Entry = MF.begin(); // Use a depth first order so that in SSA, we encounter all defs before // uses. Once the defs of the block have been found, attempt to insert // SGPR_USE instructions in successor blocks if required. for (MachineBasicBlock *MBB : depth_first(Entry)) { for (const MachineInstr &MI : *MBB) { for (const MachineOperand &MO : MI.defs()) { if (MO.isImplicit()) continue; unsigned Def = MO.getReg(); if (TargetRegisterInfo::isVirtualRegister(Def)) { if (TRI->isSGPRClass(MRI.getRegClass(Def))) { // Only consider defs that are live outs. We don't care about def / // use within the same block. LiveRange &LR = LIS->getInterval(Def); if (LIS->isLiveOutOfMBB(LR, MBB)) SGPRLiveRanges.push_back(std::make_pair(Def, &LR)); } } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) { SGPRLiveRanges.push_back(std::make_pair(Def, &LIS->getRegUnit(Def))); } } } if (MBB->succ_size() < 2) continue; // We have structured control flow, so the number of successors should be // two. assert(MBB->succ_size() == 2); MachineBasicBlock *SuccA = *MBB->succ_begin(); MachineBasicBlock *SuccB = *(++MBB->succ_begin()); MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB); if (!NCD) continue; MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator(); if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) { assert(NCD->succ_size() == 2); // We want to make sure we insert the Use after the ENDIF, not after // the ELSE. NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(), *(++NCD->succ_begin())); } for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) { unsigned Reg = RegLR.first; LiveRange *LR = RegLR.second; // FIXME: We could be smarter here. If the register is Live-In to one // block, but the other doesn't have any SGPR defs, then there won't be a // conflict. Also, if the branch condition is uniform then there will be // no conflict. bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA); bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB); if (!LiveInToA && !LiveInToB) { DEBUG(dbgs() << PrintReg(Reg, TRI, 0) << " is live into neither successor\n"); continue; } if (LiveInToA && LiveInToB) { DEBUG(dbgs() << PrintReg(Reg, TRI, 0) << " is live into both successors\n"); continue; } // This interval is live in to one successor, but not the other, so // we need to update its range so it is live in to both. DEBUG(dbgs() << "Possible SGPR conflict detected for " << PrintReg(Reg, TRI, 0) << " in " << *LR << " BB#" << SuccA->getNumber() << ", BB#" << SuccB->getNumber() << " with NCD = BB#" << NCD->getNumber() << '\n'); assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Not expecting to extend live range of physreg"); // FIXME: Need to figure out how to update LiveRange here so this pass // will be able to preserve LiveInterval analysis. MachineInstr *NCDSGPRUse = BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::SGPR_USE)) .addReg(Reg, RegState::Implicit); MadeChange = true; SlotIndex SI = LIS->InsertMachineInstrInMaps(NCDSGPRUse); LIS->extendToIndices(*LR, SI.getRegSlot()); if (LV) { // TODO: This won't work post-SSA LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse); } DEBUG(NCDSGPRUse->dump()); } } return MadeChange; }
/*! \note This code was kiped from PPC. There may be more branch analysis for CellSPU than what's currently done here. */ bool SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (isUncondBranch(LastInst)) { // Check for jump tables if (!LastInst->getOperand(0).isMBB()) return true; TBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(LastInst)) { // Block ends with fall-through condbranch. TBB = LastInst->getOperand(1).getMBB(); DEBUG(errs() << "Pushing LastInst: "); DEBUG(LastInst->dump()); Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); return false; } // Otherwise, don't know what this is. return true; } // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with a conditional and unconditional branch, handle it. if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) { TBB = SecondLastInst->getOperand(1).getMBB(); DEBUG(errs() << "Pushing SecondLastInst: "); DEBUG(SecondLastInst->dump()); Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; }
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " << MF.getName() << "\n"); #if 0 // for now disable this, if we move NewValueJump before register // allocation we need this information. LiveVariables &LVs = getAnalysis<LiveVariables>(); #endif QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo()); if (!QRI->Subtarget.hasV4TOps() || DisableNewValueJumps) { return false; } int nvjCount = DbgNVJCount; int nvjGenerated = 0; // Loop through all the bb's of the function for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { MachineBasicBlock* MBB = MBBb; DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); DEBUG(MBB->dump()); DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); bool foundJump = false; bool foundCompare = false; bool invertPredicate = false; unsigned predReg = 0; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; bool MO1IsKill = false; bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; MachineInstr *cmpInstr = NULL, *jmpInstr = NULL; MachineBasicBlock *jmpTarget = NULL; bool afterRA = false; bool isSecondOpReg = false; bool isSecondOpNewified = false; // Traverse the basic block - bottom up for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); MII != E;) { MachineInstr *MI = --MII; if (MI->isDebugValue()) { continue; } if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) break; DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); if (!foundJump && (MI->getOpcode() == Hexagon::JMP_c || MI->getOpcode() == Hexagon::JMP_cNot || MI->getOpcode() == Hexagon::JMP_cdnPt || MI->getOpcode() == Hexagon::JMP_cdnPnt || MI->getOpcode() == Hexagon::JMP_cdnNotPt || MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; jmpInstr = MI; predReg = MI->getOperand(0).getReg(); afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. // if(!jmpInstr->getOperand(0).isKill()) break; // This predicate register is live out out of BB // this would only work if we can actually use Live // variable analysis on phy regs - but LLVM does not // provide LV analysis on phys regs. //if(LVs.isLiveOut(predReg, *MBB)) break; // Get all the successors of this block - which will always // be 2. Check if the predicate register is live in in those // successor. If yes, we can not delete the predicate - // I am doing this only because LLVM does not provide LiveOut // at the BB level. bool predLive = false; for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SIE = MBB->succ_end(); SI != SIE; ++SI) { MachineBasicBlock* succMBB = *SI; if (succMBB->isLiveIn(predReg)) { predLive = true; } } if (predLive) break; jmpTarget = MI->getOperand(1).getMBB(); foundJump = true; if (MI->getOpcode() == Hexagon::JMP_cNot || MI->getOpcode() == Hexagon::JMP_cdnNotPt || MI->getOpcode() == Hexagon::JMP_cdnNotPnt) { invertPredicate = true; } continue; } // No new value jump if there is a barrier. A barrier has to be in its // own packet. A barrier has zero operands. We conservatively bail out // here if we see any instruction with zero operands. if (foundJump && MI->getNumOperands() == 0) break; if (foundJump && !foundCompare && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 if (QII->isNewValueJumpCandidate(MI)) { assert((MI->getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); isSecondOpReg = MI->getOperand(2).isReg(); if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, afterRA, jmpPos, MF)) break; cmpInstr = MI; cmpPos = MII; foundCompare = true; // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. cmpReg1 = MI->getOperand(1).getReg(); if (MI->getOperand(1).isKill()) MO1IsKill = true; if (isSecondOpReg) { cmpOp2 = MI->getOperand(2).getReg(); if (MI->getOperand(2).isKill()) MO2IsKill = true; } else cmpOp2 = MI->getOperand(2).getImm(); continue; } } if (foundCompare && foundJump) { // If "common" checks fail, bail out on this BB. if (!commonChecksToProhibitNewValueJump(afterRA, MII)) break; bool foundFeeder = false; MachineBasicBlock::iterator feederPos = MII; if (MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && (MI->getOperand(0).getReg() == cmpReg1 || (isSecondOpReg && MI->getOperand(0).getReg() == (unsigned) cmpOp2))) { unsigned feederReg = MI->getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true // (second operand of the compare is also register), try that one. // TODO: Try to come up with some heuristic to figure out which // feeder would benefit. if (feederReg == cmpReg1) { if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { if (!isSecondOpReg) break; else continue; } else foundFeeder = true; } if (!foundFeeder && isSecondOpReg && feederReg == (unsigned) cmpOp2) if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) break; if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. if (cmpInstr->getOpcode() == Hexagon::CMPLTrr || cmpInstr->getOpcode() == Hexagon::CMPLTUrr || (cmpInstr->getOpcode() == Hexagon::CMPEQrr && feederReg == (unsigned) cmpOp2)) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; MO1IsKill = MO2IsKill; cmpOp2 = tmp; MO2IsKill = tmpIsKill; } // Now we have swapped the operands, all we need to check is, // if the second operand (after swap) is the feeder. // And if it is, make a note. if (feederReg == (unsigned)cmpOp2) isSecondOpNewified = true; } // Now that we are moving feeder close the jump, // make sure we are respecting the kill values of // the operands of the feeder. bool updatedIsKill = false; for (unsigned i = 0; i < MI->getNumOperands(); i++) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned feederReg = MO.getReg(); for (MachineBasicBlock::iterator localII = feederPos, end = jmpPos; localII != end; localII++) { MachineInstr *localMI = localII; for (unsigned j = 0; j < localMI->getNumOperands(); j++) { MachineOperand &localMO = localMI->getOperand(j); if (localMO.isReg() && localMO.isUse() && localMO.isKill() && feederReg == localMO.getReg()) { // We found that there is kill of a use register // Set up a kill flag on the register localMO.setIsKill(false); MO.setIsKill(); updatedIsKill = true; break; } } if (updatedIsKill) break; } } if (updatedIsKill) break; } MBB->splice(jmpPos, MI->getParent(), MI); MBB->splice(jmpPos, MI->getParent(), cmpInstr); DebugLoc dl = MI->getDebugLoc(); MachineInstr *NewMI; assert((QII->isNewValueJumpCandidate(cmpInstr)) && "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified); if (invertPredicate) opc = QII->getInvertedPredicatedOpcode(opc); // Manage the conversions from CMPGEUri to either CMPEQrr // or CMPGTUri properly. See Arch spec for CMPGEUri instructions. // This has to be after the getNewValueJumpOpcode function call as // second operand of the compare could be modified in this logic. if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) { if (cmpOp2 == 0) { cmpOp2 = cmpReg1; MO2IsKill = MO1IsKill; isSecondOpReg = true; } else --cmpOp2; } // Manage the conversions from CMPGEri to CMPGTUri properly. // See Arch spec for CMPGEri instructions. if (cmpInstr->getOpcode() == Hexagon::CMPGEri) --cmpOp2; if (isSecondOpReg) { NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); } else { NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addImm(cmpOp2) .addMBB(jmpTarget); } assert(NewMI && "New Value Jump Instruction Not created!"); if (cmpInstr->getOperand(0).isReg() && cmpInstr->getOperand(0).isKill()) cmpInstr->getOperand(0).setIsKill(false); if (cmpInstr->getOperand(1).isReg() && cmpInstr->getOperand(1).isKill()) cmpInstr->getOperand(1).setIsKill(false); cmpInstr->eraseFromParent(); jmpInstr->eraseFromParent(); ++nvjGenerated; ++NumNVJGenerated; break; } } } } return true; }
virtual bool runOnMachineFunction(MachineFunction &MF) { TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo()); unsigned MaxStack = 0; unsigned CurrentStack = 0; unsigned CurrentLoopDepth = 0; bool HasPush = false; for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { MachineBasicBlock &MBB = *MB; unsigned CfCount = 0; std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack; std::vector<MachineInstr * > IfThenElseStack; R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); if (MFI->ShaderType == 1) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), getHWInstrDesc(CF_CALL_FS)); CfCount++; MaxStack = 1; } std::vector<ClauseFile> FetchClauses, AluClauses; std::vector<MachineInstr *> LastAlu(1); std::vector<MachineInstr *> ToPopAfter; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; continue; } MachineBasicBlock::iterator MI = I; if (MI->getOpcode() != AMDGPU::ENDIF) LastAlu.back() = 0; if (MI->getOpcode() == AMDGPU::CF_ALU) LastAlu.back() = MI; I++; switch (MI->getOpcode()) { case AMDGPU::CF_ALU_PUSH_BEFORE: CurrentStack++; MaxStack = std::max(MaxStack, CurrentStack); HasPush = true; if (ST.hasCaymanISA() && CurrentLoopDepth > 1) { BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM)) .addImm(CfCount + 1) .addImm(1); MI->setDesc(TII->get(AMDGPU::CF_ALU)); CfCount++; } case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; case AMDGPU::WHILELOOP: { CurrentStack+=4; CurrentLoopDepth++; MaxStack = std::max(MaxStack, CurrentStack); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_WHILE_LOOP)) .addImm(1); std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, std::set<MachineInstr *>()); Pair.second.insert(MIb); LoopStack.push_back(Pair); MI->eraseFromParent(); CfCount++; break; } case AMDGPU::ENDLOOP: { CurrentStack-=4; CurrentLoopDepth--; std::pair<unsigned, std::set<MachineInstr *> > Pair = LoopStack.back(); LoopStack.pop_back(); CounterPropagateAddr(Pair.second, CfCount); BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) .addImm(Pair.first + 1); MI->eraseFromParent(); CfCount++; break; } case AMDGPU::IF_PREDICATE_SET: { LastAlu.push_back(0); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(0) .addImm(0); IfThenElseStack.push_back(MIb); DEBUG(dbgs() << CfCount << ":"; MIb->dump();); MI->eraseFromParent(); CfCount++; break; }
void VPreRegAllocSched::buildMemDepEdges(VSchedGraph &G, ArrayRef<VSUnit*> SUs){ // The schedule unit and the corresponding memory operand. typedef std::vector<std::pair<MachineMemOperand*, VSUnit*> > MemOpMapTy; MemOpMapTy VisitedOps; Loop *IRL = LI->getLoopFor(G.getEntryBB()->getBasicBlock()); typedef ArrayRef<VSUnit*>::iterator it; for (it I = SUs.begin(), E = SUs.end(); I != E; ++I) { VSUnit *DstU = *I; MachineInstr *DstMI = DstU->getRepresentativePtr(); // Skip the non-memory operation and non-call operation. if (!mayAccessMemory(DstMI->getDesc())) continue; bool isDstWrite = VInstrInfo::mayStore(DstMI); // Dirty Hack: Is the const_cast safe? MachineMemOperand *DstMO = 0; // TODO: Also try to get the address information for call instruction. if (!DstMI->memoperands_empty() && !DstMI->hasVolatileMemoryRef()) { assert(DstMI->hasOneMemOperand() && "Can not handle multiple mem ops!"); assert(!DstMI->hasVolatileMemoryRef() && "Can not handle volatile op!"); // FIXME: DstMO maybe null in a VOpCmdSeq if ((DstMO = /*ASSIGNMENT*/ *DstMI->memoperands_begin())){ assert(!isa<PseudoSourceValue>(DstMO->getValue()) && "Unexpected frame stuffs!"); } } typedef MemOpMapTy::iterator visited_it; for (visited_it I = VisitedOps.begin(), E = VisitedOps.end(); I != E; ++I) { MachineMemOperand *SrcMO = I->first; VSUnit *SrcU = I->second; MachineInstr *SrcMI = SrcU->getRepresentativePtr(); bool MayBothActive = !VInstrInfo::isPredicateMutex(SrcMI, DstMI); if (!MayBothActive) ++MutexPredNoAlias; // Handle unanalyzable memory access. if (DstMO == 0 || SrcMO == 0) { // Build the Src -> Dst dependence. unsigned Latency = G.getStepsToFinish(SrcMI); //if (MayBothActive || SrcMO != DstMO) DstU->addDep<true>(SrcU, VDEdge::CreateMemDep(Latency, 0)); // Build the Dst -> Src (in next iteration) dependence, the dependence // occur even if SrcMI and DstMI are mutual exclusive. if (G.enablePipeLine()) { Latency = G.getStepsToFinish(SrcMI); SrcU->addDep<true>(DstU, VDEdge::CreateMemDep(Latency, 1)); } // Go on handle next visited SUnit. continue; } bool isSrcWrite = VInstrInfo::mayStore(SrcMI); // Ignore RAR dependence. if (!isDstWrite && !isSrcWrite) continue; if (!isMachineMemOperandAlias(SrcMO, DstMO, AA, SE)) continue; if (G.enablePipeLine()) { assert(IRL && "Can not handle machine loop without IR loop!"); DEBUG(SrcMI->dump(); dbgs() << "vs\n"; DstMI->dump(); dbgs() << '\n'); // Dst not depend on Src if they are mutual exclusive. if (MayBothActive) { // Compute the iterate distance. int DepDst = analyzeLoopDep(SrcMO, DstMO, *IRL, true); if (DepDst >= 0) { unsigned Latency = G.getStepsToFinish(SrcMI); DstU->addDep<true>(SrcU, VDEdge::CreateMemDep(Latency, DepDst)); } } // We need to compute if Src depend on Dst even if Dst not depend on Src. // Because dependence depends on execute order, if SrcMI and DstMI are // mutual exclusive. int DepDst = analyzeLoopDep(DstMO, SrcMO, *IRL, false); if (DepDst >=0 ) { unsigned Latency = G.getStepsToFinish(SrcMI); SrcU->addDep<true>(DstU, VDEdge::CreateMemDep(Latency, DepDst)); } } else if (MayBothActive) { unsigned Latency = G.getStepsToFinish(SrcMI); DstU->addDep<true>(SrcU, VDEdge::CreateMemDep(Latency, 0)); } }