unsigned SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); removeHBR(MBB); if (I == MBB.begin()) return 0; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return 0; --I; } if (!isCondBranch(I) && !isUncondBranch(I)) return 0; // Remove the first branch. DEBUG(errs() << "Removing branch: "); DEBUG(I->dump()); I->eraseFromParent(); I = MBB.end(); if (I == MBB.begin()) return 1; --I; if (!(isCondBranch(I) || isUncondBranch(I))) return 1; // Remove the second branch. DEBUG(errs() << "Removing second branch: "); DEBUG(I->dump()); I->eraseFromParent(); return 2; }
bool runOnMachineFunction(MachineFunction &MF) override { ST = &MF.getSubtarget<R600Subtarget>(); MaxFetchInst = ST->getTexVTXClauseSize(); TII = ST->getInstrInfo(); TRI = ST->getRegisterInfo(); R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); CFStack CFStack(ST, MF.getFunction().getCallingConv()); for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { MachineBasicBlock &MBB = *MB; unsigned CfCount = 0; std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; std::vector<MachineInstr * > IfThenElseStack; if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), getHWInstrDesc(CF_CALL_FS)); CfCount++; } std::vector<ClauseFile> FetchClauses, AluClauses; std::vector<MachineInstr *> LastAlu(1); std::vector<MachineInstr *> ToPopAfter; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; LastAlu.back() = nullptr; continue; } MachineBasicBlock::iterator MI = I; if (MI->getOpcode() != R600::ENDIF) LastAlu.back() = nullptr; if (MI->getOpcode() == R600::CF_ALU) LastAlu.back() = &*MI; I++; bool RequiresWorkAround = CFStack.requiresWorkAroundForInst(MI->getOpcode()); switch (MI->getOpcode()) { case R600::CF_ALU_PUSH_BEFORE: if (RequiresWorkAround) { LLVM_DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) .addImm(CfCount + 1) .addImm(1); MI->setDesc(TII->get(R600::CF_ALU)); CfCount++; CFStack.pushBranch(R600::CF_PUSH_EG); } else CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); LLVM_FALLTHROUGH; case R600::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; case R600::WHILELOOP: { CFStack.pushLoop(); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_WHILE_LOOP)) .addImm(1); std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, std::set<MachineInstr *>()); Pair.second.insert(MIb); LoopStack.push_back(std::move(Pair)); MI->eraseFromParent(); CfCount++; break; } case R600::ENDLOOP: { CFStack.popLoop(); std::pair<unsigned, std::set<MachineInstr *>> Pair = std::move(LoopStack.back()); LoopStack.pop_back(); CounterPropagateAddr(Pair.second, CfCount); BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) .addImm(Pair.first + 1); MI->eraseFromParent(); CfCount++; break; } case R600::IF_PREDICATE_SET: { LastAlu.push_back(nullptr); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(0) .addImm(0); IfThenElseStack.push_back(MIb); LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); MI->eraseFromParent(); CfCount++; break; }
void X86CmovConverterPass::convertCmovInstsToBranches( SmallVectorImpl<MachineInstr *> &Group) const { assert(!Group.empty() && "No CMOV instructions to convert"); ++NumOfOptimizedCmovGroups; // If the CMOV group is not packed, e.g., there are debug instructions between // first CMOV and last CMOV, then pack the group and make the CMOV instruction // consecutive by moving the debug instructions to after the last CMOV. packCmovGroup(Group.front(), Group.back()); // To convert a CMOVcc instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to // select between, and a branch opcode to use. // Before // ----- // MBB: // cond = cmp ... // v1 = CMOVge t1, f1, cond // v2 = CMOVlt t2, f2, cond // v3 = CMOVge v1, f3, cond // // After // ----- // MBB: // cond = cmp ... // jge %SinkMBB // // FalseMBB: // jmp %SinkMBB // // SinkMBB: // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch // ; true-value with false-value // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use // ; previous Phi instruction result MachineInstr &MI = *Group.front(); MachineInstr *LastCMOV = Group.back(); DebugLoc DL = MI.getDebugLoc(); X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode())); X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); // Potentially swap the condition codes so that any memory operand to a CMOV // is in the *false* position instead of the *true* position. We can invert // any non-memory operand CMOV instructions to cope with this and we ensure // memory operand CMOVs are only included with a single condition code. if (llvm::any_of(Group, [&](MachineInstr *I) { return I->mayLoad() && X86::getCondFromCMovOpc(I->getOpcode()) == CC; })) std::swap(CC, OppCC); MachineBasicBlock *MBB = MI.getParent(); MachineFunction::iterator It = ++MBB->getIterator(); MachineFunction *F = MBB->getParent(); const BasicBlock *BB = MBB->getBasicBlock(); MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB); MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB); F->insert(It, FalseMBB); F->insert(It, SinkMBB); // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. if (checkEFLAGSLive(LastCMOV)) { FalseMBB->addLiveIn(X86::EFLAGS); SinkMBB->addLiveIn(X86::EFLAGS); } // Transfer the remainder of BB and its successor edges to SinkMBB. SinkMBB->splice(SinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end()); SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // Add the false and sink blocks as its successors. MBB->addSuccessor(FalseMBB); MBB->addSuccessor(SinkMBB); // Create the conditional branch instruction. BuildMI(MBB, DL, TII->get(X86::GetCondBranchFromCond(CC))).addMBB(SinkMBB); // Add the sink block to the false block successors. FalseMBB->addSuccessor(SinkMBB); MachineInstrBuilder MIB; MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); MachineBasicBlock::iterator MIItEnd = std::next(MachineBasicBlock::iterator(LastCMOV)); MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin(); MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); // First we need to insert an explicit load on the false path for any memory // operand. We also need to potentially do register rewriting here, but it is // simpler as the memory operands are always on the false path so we can // simply take that input, whatever it is. DenseMap<unsigned, unsigned> FalseBBRegRewriteTable; for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) { auto &MI = *MIIt++; // Skip any CMOVs in this group which don't load from memory. if (!MI.mayLoad()) { // Remember the false-side register input. unsigned FalseReg = MI.getOperand(X86::getCondFromCMovOpc(MI.getOpcode()) == CC ? 1 : 2) .getReg(); // Walk back through any intermediate cmovs referenced. while (true) { auto FRIt = FalseBBRegRewriteTable.find(FalseReg); if (FRIt == FalseBBRegRewriteTable.end()) break; FalseReg = FRIt->second; } FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg; continue; } // The condition must be the *opposite* of the one we've decided to branch // on as the branch will go *around* the load and the load should happen // when the CMOV condition is false. assert(X86::getCondFromCMovOpc(MI.getOpcode()) == OppCC && "Can only handle memory-operand cmov instructions with a condition " "opposite to the selected branch direction."); // The goal is to rewrite the cmov from: // // MBB: // %A = CMOVcc %B (tied), (mem) // // to // // MBB: // %A = CMOVcc %B (tied), %C // FalseMBB: // %C = MOV (mem) // // Which will allow the next loop to rewrite the CMOV in terms of a PHI: // // MBB: // JMP!cc SinkMBB // FalseMBB: // %C = MOV (mem) // SinkMBB: // %A = PHI [ %C, FalseMBB ], [ %B, MBB] // Get a fresh register to use as the destination of the MOV. const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg()); unsigned TmpReg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 4> NewMIs; bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, NewMIs); (void)Unfolded; assert(Unfolded && "Should never fail to unfold a loading cmov!"); // Move the new CMOV to just before the old one and reset any impacted // iterator. auto *NewCMOV = NewMIs.pop_back_val(); assert(X86::getCondFromCMovOpc(NewCMOV->getOpcode()) == OppCC && "Last new instruction isn't the expected CMOV!"); LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump()); MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV); if (&*MIItBegin == &MI) MIItBegin = MachineBasicBlock::iterator(NewCMOV); // Sink whatever instructions were needed to produce the unfolded operand // into the false block. for (auto *NewMI : NewMIs) { LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump()); FalseMBB->insert(FalseInsertionPoint, NewMI); // Re-map any operands that are from other cmovs to the inputs for this block. for (auto &MOp : NewMI->uses()) { if (!MOp.isReg()) continue; auto It = FalseBBRegRewriteTable.find(MOp.getReg()); if (It == FalseBBRegRewriteTable.end()) continue; MOp.setReg(It->second); // This might have been a kill when it referenced the cmov result, but // it won't necessarily be once rewritten. // FIXME: We could potentially improve this by tracking whether the // operand to the cmov was also a kill, and then skipping the PHI node // construction below. MOp.setIsKill(false); } } MBB->erase(MachineBasicBlock::iterator(MI), std::next(MachineBasicBlock::iterator(MI))); // Add this PHI to the rewrite table. FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg; } // As we are creating the PHIs, we have to be careful if there is more than // one. Later CMOVs may reference the results of earlier CMOVs, but later // PHIs have to reference the individual true/false inputs from earlier PHIs. // That also means that PHI construction must work forward from earlier to // later, and that the code must maintain a mapping from earlier PHI's // destination registers, and the registers that went into the PHI. DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { unsigned DestReg = MIIt->getOperand(0).getReg(); unsigned Op1Reg = MIIt->getOperand(1).getReg(); unsigned Op2Reg = MIIt->getOperand(2).getReg(); // If this CMOV we are processing is the opposite condition from the jump we // generated, then we have to swap the operands for the PHI that is going to // be generated. if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC) std::swap(Op1Reg, Op2Reg); auto Op1Itr = RegRewriteTable.find(Op1Reg); if (Op1Itr != RegRewriteTable.end()) Op1Reg = Op1Itr->second.first; auto Op2Itr = RegRewriteTable.find(Op2Reg); if (Op2Itr != RegRewriteTable.end()) Op2Reg = Op2Itr->second.second; // SinkMBB: // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ] // ... MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) .addReg(Op1Reg) .addMBB(FalseMBB) .addReg(Op2Reg) .addMBB(MBB); (void)MIB; LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump()); LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump()); // Add this PHI to the rewrite table. RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); } // Now remove the CMOV(s). MBB->erase(MIItBegin, MIItEnd); }
virtual bool runOnMachineFunction(MachineFunction &MF) { TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo()); unsigned MaxStack = 0; unsigned CurrentStack = 0; unsigned CurrentLoopDepth = 0; bool HasPush = false; for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { MachineBasicBlock &MBB = *MB; unsigned CfCount = 0; std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack; std::vector<MachineInstr * > IfThenElseStack; R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); if (MFI->ShaderType == 1) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), getHWInstrDesc(CF_CALL_FS)); CfCount++; MaxStack = 1; } std::vector<ClauseFile> FetchClauses, AluClauses; std::vector<MachineInstr *> LastAlu(1); std::vector<MachineInstr *> ToPopAfter; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; continue; } MachineBasicBlock::iterator MI = I; if (MI->getOpcode() != AMDGPU::ENDIF) LastAlu.back() = 0; if (MI->getOpcode() == AMDGPU::CF_ALU) LastAlu.back() = MI; I++; switch (MI->getOpcode()) { case AMDGPU::CF_ALU_PUSH_BEFORE: CurrentStack++; MaxStack = std::max(MaxStack, CurrentStack); HasPush = true; if (ST.hasCaymanISA() && CurrentLoopDepth > 1) { BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM)) .addImm(CfCount + 1) .addImm(1); MI->setDesc(TII->get(AMDGPU::CF_ALU)); CfCount++; } case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; case AMDGPU::WHILELOOP: { CurrentStack+=4; CurrentLoopDepth++; MaxStack = std::max(MaxStack, CurrentStack); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_WHILE_LOOP)) .addImm(1); std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, std::set<MachineInstr *>()); Pair.second.insert(MIb); LoopStack.push_back(Pair); MI->eraseFromParent(); CfCount++; break; } case AMDGPU::ENDLOOP: { CurrentStack-=4; CurrentLoopDepth--; std::pair<unsigned, std::set<MachineInstr *> > Pair = LoopStack.back(); LoopStack.pop_back(); CounterPropagateAddr(Pair.second, CfCount); BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) .addImm(Pair.first + 1); MI->eraseFromParent(); CfCount++; break; } case AMDGPU::IF_PREDICATE_SET: { LastAlu.push_back(0); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(0) .addImm(0); IfThenElseStack.push_back(MIb); DEBUG(dbgs() << CfCount << ":"; MIb->dump();); MI->eraseFromParent(); CfCount++; break; }
bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) { bool Changed = false; MachineBasicBlock::iterator I0, I1; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { const MCInstrDesc& MCid = I->getDesc(); switch(MCid.getOpcode()) { default: ++I; continue; case Mips::SETGP2: // Convert "setgp2 $globalreg, $t9" to "addu $globalreg, $v0, $t9" BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu), I->getOperand(0).getReg()) .addReg(Mips::V0).addReg(I->getOperand(1).getReg()); break; /* // trick for SH case Mips::ULW: I0 = I++; I1 = I++; BuildMI(MBB, I0, I0->getDebugLoc(), TII->get(Mips::SB)).addOperand(I1->getOperand(1)) .addOperand(I0->getOperand(1)).addOperand(I0->getOperand(2)); BuildMI(MBB, I0, I0->getDebugLoc(), TII->get(Mips::SRL)).addOperand(I0->getOperand(0)) .addOperand(I1->getOperand(1)).addImm(8); BuildMI(MBB, I0, I0->getDebugLoc(), TII->get(Mips::SB)).addOperand(I1->getOperand(1)) .addOperand(I0->getOperand(1)).addImm(I0->getOperand(2).getImm() + 1); MBB.erase(I0); MBB.erase(I1); MBB.erase(I++); continue; // trick for LH case Mips::ULH: I->dump(); BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::LB)).addOperand(I->getOperand(0)). addOperand(I->getOperand(1)).addImm(I->getOperand(2).getImm()+1); break; // trick for LHu case Mips::ULHu: I->dump(); BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::LBu)).addOperand(I->getOperand(0)). addOperand(I->getOperand(1)).addImm(I->getOperand(2).getImm()+1); break; */ case Mips::SYNC: I->dump(); BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::NOP)); break; case Mips::ADD: I->dump(); BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu)).addOperand(I->getOperand(0)). addOperand(I->getOperand(1)).addOperand(I->getOperand(2)); break; case Mips::ADDi: I->dump(); BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDiu)).addOperand(I->getOperand(0)). addOperand(I->getOperand(1)).addOperand(I->getOperand(2)); break; case Mips::SUB: I->dump(); BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::SUBu)).addOperand(I->getOperand(0)). addOperand(I->getOperand(1)).addOperand(I->getOperand(2)); break; case Mips::B: I->dump(); BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::BGEZ)).addReg(Mips::ZERO). addOperand(I->getOperand(0)); break; case Mips::LWL: case Mips::LWR: case Mips::SWL: case Mips::SWR: case Mips::LL: case Mips::LL_P8: case Mips::SC: case Mips::SC_P8: case Mips::MOVZ_I_I: case Mips::MOVN_I_I: case Mips::ROTR: case Mips::ROTRV: case Mips::EXT: case Mips::INS: case Mips::RDHWR: case Mips::MUL: case Mips::MULTu: case Mips::MULT: case Mips::SDIV: case Mips::UDIV: case Mips::ULW: case Mips::USW: /* case Mips::MTHI: case Mips::MTLO: case Mips::MFHI: case Mips::MFLO: */ /* case Mips::BGEZAL: case Mips::BLTZAL: */ I->dump(); ++I; continue; case Mips::BuildPairF64: ExpandBuildPairF64(MBB, I); break; case Mips::ExtractElementF64: ExpandExtractElementF64(MBB, I); break; } // delete original instr MBB.erase(I++); Changed = true; } return Changed; }