bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we // are optimizing / minimizing for size. return false; unsigned Limit = ~0U; if (Entry.Imm1Limit) Limit = (1 << Entry.Imm1Limit) - 1; const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { if (MCID.OpInfo[i].isPredicate()) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) return false; } else if (MO.isImm() && !MCID.OpInfo[i].isPredicate()) { if (((unsigned)MO.getImm()) > Limit) return false; } } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else AddNoT1CC(MIB); } // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if ((MCID.getOpcode() == ARM::t2RSBSri || MCID.getOpcode() == ARM::t2RSBri || MCID.getOpcode() == ARM::t2SXTB || MCID.getOpcode() == ARM::t2SXTH || MCID.getOpcode() == ARM::t2UXTB || MCID.getOpcode() == ARM::t2UXTH) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); if (SkipPred && isPred) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) // Skip implicit def of CPSR. Either it's modeled as an optional // def now or it's already an implicit def on the new instruction. continue; MIB.addOperand(MO); } if (!MCID.isPredicable() && NewMCID.isPredicable()) AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumNarrows; return true; }
bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we // are optimizing / minimizing for size. return false; unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. if (MI->getOpcode() == ARM::t2MUL) { unsigned Reg2 = MI->getOperand(2).getReg(); // Early exit if the regs aren't all low regs. if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) || !isARMLowRegister(Reg2)) return false; if (Reg0 != Reg2) { // If the other operand also isn't the same as the destination, we // can't reduce. if (Reg1 != Reg0) return false; // Try to commute the operands to make it a 2-address instruction. MachineInstr *CommutedMI = TII->commuteInstruction(MI); if (!CommutedMI) return false; } } else if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1, CommOpIdx2; if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) return false; MachineInstr *CommutedMI = TII->commuteInstruction(MI); if (!CommutedMI) return false; } if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { unsigned Imm = MI->getOperand(2).getImm(); unsigned Limit = (1 << Entry.Imm2Limit) - 1; if (Imm > Limit) return false; } else { unsigned Reg2 = MI->getOperand(2).getReg(); if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) return false; } // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; const MCInstrDesc &MCID = MI->getDesc(); if (MCID.hasOptionalDef()) { unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; } if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) return false; // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else AddNoT1CC(MIB); } // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; MIB.addOperand(MI->getOperand(i)); } // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++Num2Addrs; return true; }
bool Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry) { if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) return false; unsigned Scale = 1; bool HasImmOffset = false; bool HasShift = false; bool HasOffReg = true; bool isLdStMul = false; unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. uint8_t ImmLimit = Entry.Imm1Limit; switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: case ARM::t2STRi12: if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; HasOffReg = false; } Scale = 4; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRHi12: case ARM::t2STRHi12: Scale = 2; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRs: case ARM::t2LDRBs: case ARM::t2LDRHs: case ARM::t2LDRSBs: case ARM::t2LDRSHs: case ARM::t2STRs: case ARM::t2STRBs: case ARM::t2STRHs: HasShift = true; OpNum = 4; break; case ARM::t2LDMIA: case ARM::t2LDMDB: { unsigned BaseReg = MI->getOperand(0).getReg(); if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA) return false; // For the non-writeback version (this one), the base register must be // one of the registers being loaded. bool isOK = false; for (unsigned i = 4; i < MI->getNumOperands(); ++i) { if (MI->getOperand(i).getReg() == BaseReg) { isOK = true; break; } } if (!isOK) return false; OpNum = 0; isLdStMul = true; break; } case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET OpNum = 2; isLdStMul = true; break; } case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { OpNum = 0; unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == ARM::SP && (Entry.WideOpc == ARM::t2LDMIA_UPD || Entry.WideOpc == ARM::t2STMDB_UPD)) { Opc = Entry.NarrowOpc2; // tPOP or tPUSH OpNum = 2; } else if (!isARMLowRegister(BaseReg) || (Entry.WideOpc != ARM::t2LDMIA_UPD && Entry.WideOpc != ARM::t2STMIA_UPD)) { return false; } isLdStMul = true; break; } } unsigned OffsetReg = 0; bool OffsetKill = false; if (HasShift) { OffsetReg = MI->getOperand(2).getReg(); OffsetKill = MI->getOperand(2).isKill(); if (MI->getOperand(3).getImm()) // Thumb1 addressing mode doesn't support shift. return false; } unsigned OffsetImm = 0; if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; } // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)); MIB.addOperand(MI->getOperand(1)); if (HasImmOffset) MIB.addImm(OffsetImm / Scale); assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); if (HasOffReg) MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); } // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumLdSts; return true; }
bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::t2ADDri) { // If the source register is SP, try to reduce to tADDrSPi, otherwise // it's a normal reduce. if (MI->getOperand(1).getReg() != ARM::SP) { if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } // Try to reduce to tADDrSPi. unsigned Imm = MI->getOperand(2).getImm(); // The immediate must be in range, the destination register must be a low // reg, the predicate must be "always" and the condition flags must not // be being set. if (Imm & 3 || Imm > 1020) return false; if (!isARMLowRegister(MI->getOperand(0).getReg())) return false; if (MI->getOperand(3).getImm() != ARMCC::AL) return false; const MCInstrDesc &MCID = MI->getDesc(); if (MCID.hasOptionalDef() && MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(ARM::tADDrSPi)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); MBB.erase_instr(MI); ++NumNarrows; return true; } if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; if (MI->mayLoad() || MI->mayStore()) return ReduceLoadStore(MBB, MI, Entry); switch (Opc) { default: break; case ARM::t2ADDSri: case ARM::t2ADDSrr: { unsigned PredReg = 0; if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { switch (Opc) { default: break; case ARM::t2ADDSri: { if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; // fallthrough } case ARM::t2ADDSrr: return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } } break; } case ARM::t2RSBri: case ARM::t2RSBSri: case ARM::t2SXTB: case ARM::t2SXTH: case ARM::t2UXTB: case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two // versions of the instruction is a mystery. // It would be nice to just have two entries in the master table that // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } } return false; }
bool Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry) { if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) return false; unsigned Scale = 1; bool HasImmOffset = false; bool HasShift = false; bool HasOffReg = true; bool isLdStMul = false; unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. uint8_t ImmLimit = Entry.Imm1Limit; switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: case ARM::t2STRi12: if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; } Scale = 4; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRHi12: case ARM::t2STRHi12: Scale = 2; HasImmOffset = true; HasOffReg = false; break; case ARM::t2LDRs: case ARM::t2LDRBs: case ARM::t2LDRHs: case ARM::t2LDRSBs: case ARM::t2LDRSHs: case ARM::t2STRs: case ARM::t2STRBs: case ARM::t2STRHs: HasShift = true; OpNum = 4; break; case ARM::t2LDR_POST: case ARM::t2STR_POST: { if (!MBB.getParent()->getFunction().optForMinSize()) return false; if (!MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 4) return false; // We're creating a completely different type of load/store - LDM from LDR. // For this reason we can't reuse the logic at the end of this function; we // have to implement the MI building here. bool IsStore = Entry.WideOpc == ARM::t2STR_POST; unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); unsigned Offset = MI->getOperand(3).getImm(); unsigned PredImm = MI->getOperand(4).getImm(); unsigned PredReg = MI->getOperand(5).getReg(); assert(isARMLowRegister(Rt)); assert(isARMLowRegister(Rn)); if (Offset != 4) return false; // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)) .addReg(Rn, RegState::Define) .addReg(Rn) .addImm(PredImm) .addReg(PredReg) .addReg(Rt, IsStore ? 0 : RegState::Define); // Transfer memoperands. MIB.setMemRefs(MI->memoperands()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); // Kill the old instruction. MI->eraseFromBundle(); ++NumLdSts; return true; } case ARM::t2LDMIA: { unsigned BaseReg = MI->getOperand(0).getReg(); assert(isARMLowRegister(BaseReg)); // For the non-writeback version (this one), the base register must be // one of the registers being loaded. bool isOK = false; for (unsigned i = 3; i < MI->getNumOperands(); ++i) { if (MI->getOperand(i).getReg() == BaseReg) { isOK = true; break; } } if (!isOK) return false; OpNum = 0; isLdStMul = true; break; } case ARM::t2STMIA: // If the base register is killed, we don't care what its value is after the // instruction, so we can use an updating STMIA. if (!MI->getOperand(0).isKill()) return false; break; case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET OpNum = 2; isLdStMul = true; break; } case ARM::t2LDMIA_UPD: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { OpNum = 0; unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == ARM::SP && (Entry.WideOpc == ARM::t2LDMIA_UPD || Entry.WideOpc == ARM::t2STMDB_UPD)) { Opc = Entry.NarrowOpc2; // tPOP or tPUSH OpNum = 2; } else if (!isARMLowRegister(BaseReg) || (Entry.WideOpc != ARM::t2LDMIA_UPD && Entry.WideOpc != ARM::t2STMIA_UPD)) { return false; } isLdStMul = true; break; } } unsigned OffsetReg = 0; bool OffsetKill = false; bool OffsetInternal = false; if (HasShift) { OffsetReg = MI->getOperand(2).getReg(); OffsetKill = MI->getOperand(2).isKill(); OffsetInternal = MI->getOperand(2).isInternalRead(); if (MI->getOperand(3).getImm()) // Thumb1 addressing mode doesn't support shift. return false; } unsigned OffsetImm = 0; if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; } // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); // tSTMIA_UPD takes a defining register operand. We've already checked that // the register is killed, so mark it as dead here. if (Entry.WideOpc == ARM::t2STMIA) MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); if (!isLdStMul) { MIB.add(MI->getOperand(0)); MIB.add(MI->getOperand(1)); if (HasImmOffset) MIB.addImm(OffsetImm / Scale); assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); if (HasOffReg) MIB.addReg(OffsetReg, getKillRegState(OffsetKill) | getInternalReadRegState(OffsetInternal)); } // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.add(MI->getOperand(OpNum)); // Transfer memoperands. MIB.setMemRefs(MI->memoperands()); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); ++NumLdSts; return true; }