bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); unsigned StackPtr = RegInfo.getStackRegister(); int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); MachineBasicBlock::iterator FrameSetup = I++; // For globals in PIC mode, we can have some LEAs here. // Ignore them, they don't bother us. // TODO: Extend this to something that covers more cases. while (I->getOpcode() == X86::LEA32r) ++I; // We expect a copy instruction here. // TODO: The copy instruction is a lowering artifact. // We should also support a copy-less version, where the stack // pointer is used directly. if (!I->isCopy() || !I->getOperand(0).isReg()) return false; MachineBasicBlock::iterator SPCopy = I++; StackPtr = SPCopy->getOperand(0).getReg(); // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of MOV32mi or MOV32mr // instructions, that push a sequence of 32-bit values onto the stack, with // no gaps between them. SmallVector<MachineInstr*, 4> MovVector(4, nullptr); unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4; if (MaxAdjust > 4) MovVector.resize(MaxAdjust, nullptr); do { int Opcode = I->getOpcode(); if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) break; // We only want movs of the form: // movl imm/r32, k(%esp) // If we run into something else, bail. // Note that AddrBaseReg may, counter to its name, not be a register, // but rather a frame index. // TODO: Support the fi case. This should probably work now that we // have the infrastructure to track the stack pointer within a call // sequence. if (!I->getOperand(X86::AddrBaseReg).isReg() || (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || !I->getOperand(X86::AddrScaleAmt).isImm() || (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || !I->getOperand(X86::AddrDisp).isImm()) return false; int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); // We really don't want to consider the unaligned case. if (StackDisp % 4) return false; StackDisp /= 4; assert((size_t)StackDisp < MovVector.size() && "Function call has more parameters than the stack is adjusted for."); // If the same stack slot is being filled twice, something's fishy. if (MovVector[StackDisp] != nullptr) return false; MovVector[StackDisp] = I; ++I; } while (I != MBB.end()); // We now expect the end of the sequence - a call and a stack adjust. if (I == MBB.end()) return false; // For PCrel calls, we expect an additional COPY of the basereg. // If we find one, skip it. if (I->isCopy()) { if (I->getOperand(1).getReg() == MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg()) ++I; else return false; } if (!I->isCall()) return false; MachineBasicBlock::iterator Call = I; if ((++I)->getOpcode() != FrameDestroyOpcode) return false; // Now, go through the vector, and see that we don't have any gaps, // but only a series of 32-bit MOVs. int64_t ExpectedDist = 0; auto MMI = MovVector.begin(), MME = MovVector.end(); for (; MMI != MME; ++MMI, ExpectedDist += 4) if (*MMI == nullptr) break; // If the call had no parameters, do nothing if (!ExpectedDist) return false; // We are either at the last parameter, or a gap. // Make sure it's not a gap for (; MMI != MME; ++MMI) if (*MMI != nullptr) return false; // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. FrameSetup->getOperand(1).setImm(ExpectedDist); DebugLoc DL = I->getDebugLoc(); // Now, iterate through the vector in reverse order, and replace the movs // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); if (MOV->getOpcode() == X86::MOV32mi) { unsigned PushOpcode = X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a // PUSH instruction with a shorter encoding. // Note that isImm() may fail even though this is a MOVmi, because // the operand can also be a symbol. if (PushOp.isImm()) { int64_t Val = PushOp.getImm(); if (isInt<8>(Val)) PushOpcode = X86::PUSH32i8; } BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp); } else { unsigned int Reg = PushOp.getReg(); // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); bool SlowPUSHrmm = ST.isAtom() || ST.isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) Push->addOperand(DefMov->getOperand(i)); DefMov->eraseFromParent(); } else { BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr(); } } MBB.erase(MOV); } // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: if (MRI->use_empty(SPCopy->getOperand(0).getReg())) SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved // frame. X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); FuncInfo->setHasPushSequences(true); return true; }
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, CallContext &Context) { // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( STI->getRegisterInfo()); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); MachineBasicBlock::iterator FrameSetup = I++; Context.FrameSetup = FrameSetup; // How much do we adjust the stack? This puts an upper bound on // the number of parameters actually passed on it. unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4; // A zero adjustment means no stack parameters if (!MaxAdjust) { Context.NoStackParams = true; return; } // For globals in PIC mode, we can have some LEAs here. // Ignore them, they don't bother us. // TODO: Extend this to something that covers more cases. while (I->getOpcode() == X86::LEA32r) ++I; // We expect a copy instruction here. // TODO: The copy instruction is a lowering artifact. // We should also support a copy-less version, where the stack // pointer is used directly. if (!I->isCopy() || !I->getOperand(0).isReg()) return; Context.SPCopy = I++; unsigned StackPtr = Context.SPCopy->getOperand(0).getReg(); // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of MOV32mi or MOV32mr // instructions, that push a sequence of 32-bit values onto the stack, with // no gaps between them. if (MaxAdjust > 4) Context.MovVector.resize(MaxAdjust, nullptr); InstClassification Classification; DenseSet<unsigned int> UsedRegs; while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) != Exit) { if (Classification == Skip) { ++I; continue; } // We know the instruction is a MOV32mi/MOV32mr. // We only want movs of the form: // movl imm/r32, k(%esp) // If we run into something else, bail. // Note that AddrBaseReg may, counter to its name, not be a register, // but rather a frame index. // TODO: Support the fi case. This should probably work now that we // have the infrastructure to track the stack pointer within a call // sequence. if (!I->getOperand(X86::AddrBaseReg).isReg() || (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || !I->getOperand(X86::AddrScaleAmt).isImm() || (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || !I->getOperand(X86::AddrDisp).isImm()) return; int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); // We really don't want to consider the unaligned case. if (StackDisp % 4) return; StackDisp /= 4; assert((size_t)StackDisp < Context.MovVector.size() && "Function call has more parameters than the stack is adjusted for."); // If the same stack slot is being filled twice, something's fishy. if (Context.MovVector[StackDisp] != nullptr) return; Context.MovVector[StackDisp] = I; for (const MachineOperand &MO : I->uses()) { if (!MO.isReg()) continue; unsigned int Reg = MO.getReg(); if (RegInfo.isPhysicalRegister(Reg)) UsedRegs.insert(Reg); } ++I; } // We now expect the end of the sequence. If we stopped early, // or reached the end of the block without finding a call, bail. if (I == MBB.end() || !I->isCall()) return; Context.Call = I; if ((++I)->getOpcode() != FrameDestroyOpcode) return; // Now, go through the vector, and see that we don't have any gaps, // but only a series of 32-bit MOVs. auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end(); for (; MMI != MME; ++MMI, Context.ExpectedDist += 4) if (*MMI == nullptr) break; // If the call had no parameters, do nothing if (MMI == Context.MovVector.begin()) return; // We are either at the last parameter, or a gap. // Make sure it's not a gap for (; MMI != MME; ++MMI) if (*MMI != nullptr) return; Context.UsePush = true; }
bool GCMachineCodeFixup::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. if (!MF.getFunction()->hasGC()) return false; const TargetMachine &TM = MF.getTarget(); const TargetInstrInfo *TII = TM.getInstrInfo(); GCModuleInfo &GMI = getAnalysis<GCModuleInfo>(); GCFunctionInfo &GCFI = GMI.getFunctionInfo(*MF.getFunction()); for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); MBBI != MBBE; ++MBBI) { for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end(); MII != MIE;) { if (!MII->isGCRegRoot() || !MII->getOperand(0).isReg()) { ++MII; continue; } // Trace the register back to its location at the site of the call (either // a physical reg or a frame index). bool TracingReg = true; unsigned TracedReg = MII->getOperand(0).getReg(); int FrameIndex; MachineBasicBlock::iterator PrevII = MII; for (--PrevII;; --PrevII) { if (PrevII->isGCRegRoot() && PrevII->getOperand(0).isReg()) break; if (PrevII->isCall()) break; int FI; // Trace back through register reloads. unsigned Reg = TM.getInstrInfo()->isLoadFromStackSlotPostFE(&*PrevII, FI); if (Reg) { // This is a reload. If we're tracing this register, start tracing the // frame index instead. if (TracingReg && TracedReg == Reg) { TracingReg = false; FrameIndex = FI; } continue; } // Trace back through spills. if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&*PrevII, FI)) continue; // Trace back through register-to-register copies. if (PrevII->isCopy()) { if (TracingReg && TracedReg == PrevII->getOperand(0).getReg()) TracedReg = PrevII->getOperand(1).getReg(); continue; } // Trace back through non-register GC_REG_ROOT instructions. if (PrevII->isGCRegRoot() && !PrevII->getOperand(0).isReg()) continue; DEBUG(dbgs() << "Bad instruction: " << *PrevII); llvm_unreachable("GC_REG_ROOT found in an unexpected location!"); } // Now we've reached either a call or another GC_REG_ROOT instruction. // Move the GC_REG_ROOT instruction we're considering to the right place, // and rewrite it if necessary. // // Also, tell the GCFunctionInfo about the frame index, since this is // our only chance -- the frame indices will be deleted by the time // GCMachineCodeAnalysis runs. ++PrevII; unsigned RootIndex = MII->getOperand(1).getImm(); MachineInstr *NewMI; if (TracingReg) { MachineInstrBuilder MIB = BuildMI(MF, MII->getDebugLoc(), TII->get(TargetOpcode::GC_REG_ROOT)); MIB.addReg(TracedReg).addImm(RootIndex); NewMI = MIB; } else { NewMI = TII->emitFrameIndexGCRegRoot(MF, FrameIndex, RootIndex, MII->getDebugLoc()); GCFI.spillRegRoot(RootIndex, FrameIndex); } MBBI->insert(PrevII, NewMI); MachineBasicBlock::iterator NextII = MII; ++NextII; MII->eraseFromParent(); MII = NextII; } } return true; }