SlowPathCallContext::SlowPathCallContext( RegisterSet usedRegisters, CCallHelpers& jit, unsigned numArgs, GPRReg returnRegister) : m_jit(jit) , m_numArgs(numArgs) , m_returnRegister(returnRegister) { // We don't care that you're using callee-save, stack, or hardware registers. usedRegisters.exclude(RegisterSet::stackRegisters()); usedRegisters.exclude(RegisterSet::reservedHardwareRegisters()); usedRegisters.exclude(RegisterSet::calleeSaveRegisters()); // The return register doesn't need to be saved. if (m_returnRegister != InvalidGPRReg) usedRegisters.clear(m_returnRegister); size_t stackBytesNeededForReturnAddress = wordSize; m_offsetToSavingArea = (std::max(m_numArgs, NUMBER_OF_ARGUMENT_REGISTERS) - NUMBER_OF_ARGUMENT_REGISTERS) * wordSize; for (unsigned i = std::min(NUMBER_OF_ARGUMENT_REGISTERS, numArgs); i--;) m_argumentRegisters.set(GPRInfo::toArgumentRegister(i)); m_callingConventionRegisters.merge(m_argumentRegisters); if (returnRegister != InvalidGPRReg) m_callingConventionRegisters.set(GPRInfo::returnValueGPR); m_callingConventionRegisters.filter(usedRegisters); unsigned numberOfCallingConventionRegisters = m_callingConventionRegisters.numberOfSetRegisters(); size_t offsetToThunkSavingArea = m_offsetToSavingArea + numberOfCallingConventionRegisters * wordSize; m_stackBytesNeeded = offsetToThunkSavingArea + stackBytesNeededForReturnAddress + (usedRegisters.numberOfSetRegisters() - numberOfCallingConventionRegisters) * wordSize; m_stackBytesNeeded = (m_stackBytesNeeded + stackAlignmentBytes() - 1) & ~(stackAlignmentBytes() - 1); m_jit.subPtr(CCallHelpers::TrustedImm32(m_stackBytesNeeded), CCallHelpers::stackPointerRegister); m_thunkSaveSet = usedRegisters; // This relies on all calling convention registers also being temp registers. unsigned stackIndex = 0; for (unsigned i = GPRInfo::numberOfRegisters; i--;) { GPRReg reg = GPRInfo::toRegister(i); if (!m_callingConventionRegisters.get(reg)) continue; m_jit.storePtr(reg, CCallHelpers::Address(CCallHelpers::stackPointerRegister, m_offsetToSavingArea + (stackIndex++) * wordSize)); m_thunkSaveSet.clear(reg); } m_offset = offsetToThunkSavingArea; }
void AssemblyHelpers::callExceptionFuzz() { if (!Options::enableExceptionFuzz()) return; ASSERT(stackAlignmentBytes() >= sizeof(void*) * 2); subPtr(TrustedImm32(stackAlignmentBytes()), stackPointerRegister); poke(GPRInfo::returnValueGPR, 0); poke(GPRInfo::returnValueGPR2, 1); move(TrustedImmPtr(bitwise_cast<void*>(operationExceptionFuzz)), GPRInfo::nonPreservedNonReturnGPR); call(GPRInfo::nonPreservedNonReturnGPR); peek(GPRInfo::returnValueGPR, 0); peek(GPRInfo::returnValueGPR2, 1); addPtr(TrustedImm32(stackAlignmentBytes()), stackPointerRegister); }
void JIT::compileSetupVarargsFrame(Instruction* instruction, CallLinkInfo* info) { int thisValue = instruction[3].u.operand; int arguments = instruction[4].u.operand; int firstFreeRegister = instruction[5].u.operand; int firstVarArgOffset = instruction[6].u.operand; emitLoad(arguments, regT1, regT0); callOperation(operationSizeFrameForVarargs, regT1, regT0, -firstFreeRegister, firstVarArgOffset); move(TrustedImm32(-firstFreeRegister), regT1); emitSetVarargsFrame(*this, returnValueGPR, false, regT1, regT1); addPtr(TrustedImm32(-(sizeof(CallerFrameAndPC) + WTF::roundUpToMultipleOf(stackAlignmentBytes(), 6 * sizeof(void*)))), regT1, stackPointerRegister); emitLoad(arguments, regT2, regT4); callOperation(operationSetupVarargsFrame, regT1, regT2, regT4, firstVarArgOffset, regT0); move(returnValueGPR, regT1); // Profile the argument count. load32(Address(regT1, JSStack::ArgumentCount * static_cast<int>(sizeof(Register)) + PayloadOffset), regT2); load8(info->addressOfMaxNumArguments(), regT0); Jump notBiggest = branch32(Above, regT0, regT2); Jump notSaturated = branch32(BelowOrEqual, regT2, TrustedImm32(255)); move(TrustedImm32(255), regT2); notSaturated.link(this); store8(regT2, info->addressOfMaxNumArguments()); notBiggest.link(this); // Initialize 'this'. emitLoad(thisValue, regT2, regT0); store32(regT0, Address(regT1, PayloadOffset + (CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))))); store32(regT2, Address(regT1, TagOffset + (CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))))); addPtr(TrustedImm32(sizeof(CallerFrameAndPC)), regT1, stackPointerRegister); }
void ScratchRegisterAllocator::restoreRegistersFromStackForCall(MacroAssembler& jit, const RegisterSet& usedRegisters, const RegisterSet& ignore, unsigned numberOfStackBytesUsedForRegisterPreservation, unsigned extraBytesAtTopOfStack) { RELEASE_ASSERT(extraBytesAtTopOfStack % sizeof(void*) == 0); if (!usedRegisters.numberOfSetRegisters()) { RELEASE_ASSERT(numberOfStackBytesUsedForRegisterPreservation == 0); return; } unsigned count = 0; for (GPRReg reg = MacroAssembler::firstRegister(); reg <= MacroAssembler::lastRegister(); reg = MacroAssembler::nextRegister(reg)) { if (usedRegisters.get(reg)) { if (!ignore.get(reg)) jit.loadPtr(MacroAssembler::Address(MacroAssembler::stackPointerRegister, extraBytesAtTopOfStack + (sizeof(EncodedJSValue) * count)), reg); count++; } } for (FPRReg reg = MacroAssembler::firstFPRegister(); reg <= MacroAssembler::lastFPRegister(); reg = MacroAssembler::nextFPRegister(reg)) { if (usedRegisters.get(reg)) { if (!ignore.get(reg)) jit.loadDouble(MacroAssembler::Address(MacroAssembler::stackPointerRegister, extraBytesAtTopOfStack + (sizeof(EncodedJSValue) * count)), reg); count++; } } unsigned stackOffset = (usedRegisters.numberOfSetRegisters()) * sizeof(EncodedJSValue); stackOffset += extraBytesAtTopOfStack; stackOffset = WTF::roundUpToMultipleOf(stackAlignmentBytes(), stackOffset); RELEASE_ASSERT(count == usedRegisters.numberOfSetRegisters()); RELEASE_ASSERT(stackOffset == numberOfStackBytesUsedForRegisterPreservation); jit.addPtr( MacroAssembler::TrustedImm32(stackOffset), MacroAssembler::stackPointerRegister); }
unsigned ScratchRegisterAllocator::preserveRegistersToStackForCall(MacroAssembler& jit, const RegisterSet& usedRegisters, unsigned extraBytesAtTopOfStack) { RELEASE_ASSERT(extraBytesAtTopOfStack % sizeof(void*) == 0); if (!usedRegisters.numberOfSetRegisters()) return 0; unsigned stackOffset = (usedRegisters.numberOfSetRegisters()) * sizeof(EncodedJSValue); stackOffset += extraBytesAtTopOfStack; stackOffset = WTF::roundUpToMultipleOf(stackAlignmentBytes(), stackOffset); jit.subPtr( MacroAssembler::TrustedImm32(stackOffset), MacroAssembler::stackPointerRegister); unsigned count = 0; for (GPRReg reg = MacroAssembler::firstRegister(); reg <= MacroAssembler::lastRegister(); reg = MacroAssembler::nextRegister(reg)) { if (usedRegisters.get(reg)) { jit.storePtr(reg, MacroAssembler::Address(MacroAssembler::stackPointerRegister, extraBytesAtTopOfStack + (count * sizeof(EncodedJSValue)))); count++; } } for (FPRReg reg = MacroAssembler::firstFPRegister(); reg <= MacroAssembler::lastFPRegister(); reg = MacroAssembler::nextFPRegister(reg)) { if (usedRegisters.get(reg)) { jit.storeDouble(reg, MacroAssembler::Address(MacroAssembler::stackPointerRegister, extraBytesAtTopOfStack + (count * sizeof(EncodedJSValue)))); count++; } } RELEASE_ASSERT(count == usedRegisters.numberOfSetRegisters()); return stackOffset; }
MacroAssemblerCodeRef baselineSetterReturnThunkGenerator(VM* vm) { JSInterfaceJIT jit(vm); unsigned numberOfParameters = 0; numberOfParameters++; // The 'this' argument. numberOfParameters++; // The value to set. numberOfParameters++; // The true return PC. unsigned numberOfRegsForCall = JSStack::CallFrameHeaderSize + numberOfParameters; unsigned numberOfBytesForCall = numberOfRegsForCall * sizeof(Register) - sizeof(CallerFrameAndPC); unsigned alignedNumberOfBytesForCall = WTF::roundUpToMultipleOf(stackAlignmentBytes(), numberOfBytesForCall); // The real return address is stored above the arguments. We passed two arguments, so // the argument at index 2 is the return address. jit.loadPtr( AssemblyHelpers::Address( AssemblyHelpers::stackPointerRegister, (virtualRegisterForArgument(2).offset() - JSStack::CallerFrameAndPCSize) * sizeof(Register)), GPRInfo::regT2); jit.addPtr( AssemblyHelpers::TrustedImm32(alignedNumberOfBytesForCall), AssemblyHelpers::stackPointerRegister); jit.jump(GPRInfo::regT2); LinkBuffer patchBuffer(*vm, jit, GLOBAL_THUNK_ID); return FINALIZE_CODE(patchBuffer, ("baseline setter return thunk")); }
void JIT::compileSetupVarargsFrame(Instruction* instruction, CallLinkInfo* info) { int thisValue = instruction[3].u.operand; int arguments = instruction[4].u.operand; int firstFreeRegister = instruction[5].u.operand; int firstVarArgOffset = instruction[6].u.operand; JumpList slowCase; JumpList end; bool canOptimize = m_codeBlock->usesArguments() && arguments == m_codeBlock->argumentsRegister().offset() && !m_codeBlock->symbolTable()->slowArguments(); if (canOptimize) { emitGetVirtualRegister(arguments, regT0); slowCase.append(branch64(NotEqual, regT0, TrustedImm64(JSValue::encode(JSValue())))); move(TrustedImm32(-firstFreeRegister), regT1); emitSetupVarargsFrameFastCase(*this, regT1, regT0, regT1, regT2, firstVarArgOffset, slowCase); end.append(jump()); slowCase.link(this); } emitGetVirtualRegister(arguments, regT1); callOperation(operationSizeFrameForVarargs, regT1, -firstFreeRegister, firstVarArgOffset); move(TrustedImm32(-firstFreeRegister), regT1); emitSetVarargsFrame(*this, returnValueGPR, false, regT1, regT1); addPtr(TrustedImm32(-(sizeof(CallerFrameAndPC) + WTF::roundUpToMultipleOf(stackAlignmentBytes(), 5 * sizeof(void*)))), regT1, stackPointerRegister); emitGetVirtualRegister(arguments, regT2); callOperation(operationSetupVarargsFrame, regT1, regT2, firstVarArgOffset, regT0); move(returnValueGPR, regT1); if (canOptimize) end.link(this); // Profile the argument count. load32(Address(regT1, JSStack::ArgumentCount * static_cast<int>(sizeof(Register)) + PayloadOffset), regT2); load8(&info->maxNumArguments, regT0); Jump notBiggest = branch32(Above, regT0, regT2); Jump notSaturated = branch32(BelowOrEqual, regT2, TrustedImm32(255)); move(TrustedImm32(255), regT2); notSaturated.link(this); store8(regT2, &info->maxNumArguments); notBiggest.link(this); // Initialize 'this'. emitGetVirtualRegister(thisValue, regT0); store64(regT0, Address(regT1, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register)))); addPtr(TrustedImm32(sizeof(CallerFrameAndPC)), regT1, stackPointerRegister); }
Vector<Arg> computeCCallingConvention(Code& code, CCallValue* value) { Vector<Arg> result; result.append(Tmp(CCallSpecial::scratchRegister)); unsigned gpArgumentCount = 0; unsigned fpArgumentCount = 0; unsigned stackOffset = 0; for (unsigned i = 1; i < value->numChildren(); ++i) { result.append( marshallCCallArgument(gpArgumentCount, fpArgumentCount, stackOffset, value->child(i))); } code.requestCallArgAreaSizeInBytes(WTF::roundUpToMultipleOf(stackAlignmentBytes(), stackOffset)); return result; }
std::enable_if_t< Op::opcodeID == op_call_varargs || Op::opcodeID == op_construct_varargs || Op::opcodeID == op_tail_call_varargs || Op::opcodeID == op_tail_call_forward_arguments , void> JIT::compileSetupFrame(const Op& bytecode, CallLinkInfo* info) { OpcodeID opcodeID = Op::opcodeID; int thisValue = bytecode.m_thisValue.offset(); int arguments = bytecode.m_arguments.offset(); int firstFreeRegister = bytecode.m_firstFree.offset(); int firstVarArgOffset = bytecode.m_firstVarArg; emitLoad(arguments, regT1, regT0); Z_JITOperation_EJZZ sizeOperation; if (Op::opcodeID == op_tail_call_forward_arguments) sizeOperation = operationSizeFrameForForwardArguments; else sizeOperation = operationSizeFrameForVarargs; callOperation(sizeOperation, JSValueRegs(regT1, regT0), -firstFreeRegister, firstVarArgOffset); move(TrustedImm32(-firstFreeRegister), regT1); emitSetVarargsFrame(*this, returnValueGPR, false, regT1, regT1); addPtr(TrustedImm32(-(sizeof(CallerFrameAndPC) + WTF::roundUpToMultipleOf(stackAlignmentBytes(), 6 * sizeof(void*)))), regT1, stackPointerRegister); emitLoad(arguments, regT2, regT4); F_JITOperation_EFJZZ setupOperation; if (opcodeID == op_tail_call_forward_arguments) setupOperation = operationSetupForwardArgumentsFrame; else setupOperation = operationSetupVarargsFrame; callOperation(setupOperation, regT1, JSValueRegs(regT2, regT4), firstVarArgOffset, regT0); move(returnValueGPR, regT1); // Profile the argument count. load32(Address(regT1, CallFrameSlot::argumentCount * static_cast<int>(sizeof(Register)) + PayloadOffset), regT2); load32(info->addressOfMaxNumArguments(), regT0); Jump notBiggest = branch32(Above, regT0, regT2); store32(regT2, info->addressOfMaxNumArguments()); notBiggest.link(this); // Initialize 'this'. emitLoad(thisValue, regT2, regT0); store32(regT0, Address(regT1, PayloadOffset + (CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))))); store32(regT2, Address(regT1, TagOffset + (CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))))); addPtr(TrustedImm32(sizeof(CallerFrameAndPC)), regT1, stackPointerRegister); }
void JIT::compileSetupVarargsFrame(OpcodeID opcode, Instruction* instruction, CallLinkInfo* info) { int thisValue = instruction[3].u.operand; int arguments = instruction[4].u.operand; int firstFreeRegister = instruction[5].u.operand; int firstVarArgOffset = instruction[6].u.operand; emitGetVirtualRegister(arguments, regT1); Z_JITOperation_EJZZ sizeOperation; if (opcode == op_tail_call_forward_arguments) sizeOperation = operationSizeFrameForForwardArguments; else sizeOperation = operationSizeFrameForVarargs; callOperation(sizeOperation, regT1, -firstFreeRegister, firstVarArgOffset); move(TrustedImm32(-firstFreeRegister), regT1); emitSetVarargsFrame(*this, returnValueGPR, false, regT1, regT1); addPtr(TrustedImm32(-(sizeof(CallerFrameAndPC) + WTF::roundUpToMultipleOf(stackAlignmentBytes(), 5 * sizeof(void*)))), regT1, stackPointerRegister); emitGetVirtualRegister(arguments, regT2); F_JITOperation_EFJZZ setupOperation; if (opcode == op_tail_call_forward_arguments) setupOperation = operationSetupForwardArgumentsFrame; else setupOperation = operationSetupVarargsFrame; callOperation(setupOperation, regT1, regT2, firstVarArgOffset, regT0); move(returnValueGPR, regT1); // Profile the argument count. load32(Address(regT1, CallFrameSlot::argumentCount * static_cast<int>(sizeof(Register)) + PayloadOffset), regT2); load32(info->addressOfMaxNumArguments(), regT0); Jump notBiggest = branch32(Above, regT0, regT2); store32(regT2, info->addressOfMaxNumArguments()); notBiggest.link(this); // Initialize 'this'. emitGetVirtualRegister(thisValue, regT0); store64(regT0, Address(regT1, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register)))); addPtr(TrustedImm32(sizeof(CallerFrameAndPC)), regT1, stackPointerRegister); }
void allocateStack(Code& code) { PhaseScope phaseScope(code, "allocateStack"); // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or // is explicitly escaped in the code. IndexSet<StackSlot> escapingStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->isLocked()) escapingStackSlots.add(slot); } for (BasicBlock* block : code) { for (Inst& inst : *block) { inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (role == Arg::UseAddr && arg.isStack()) escapingStackSlots.add(arg.stackSlot()); }); } } // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for // the possibility of stack slots being assigned frame offsets before we even get here. ASSERT(!code.frameSize()); Vector<StackSlot*> assignedEscapedStackSlots; Vector<StackSlot*> escapedStackSlotsWorklist; for (StackSlot* slot : code.stackSlots()) { if (escapingStackSlots.contains(slot)) { if (slot->offsetFromFP()) assignedEscapedStackSlots.append(slot); else escapedStackSlotsWorklist.append(slot); } else { // It would be super strange to have an unlocked stack slot that has an offset already. ASSERT(!slot->offsetFromFP()); } } // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of // escaped stack slots. while (!escapedStackSlotsWorklist.isEmpty()) { StackSlot* slot = escapedStackSlotsWorklist.takeLast(); assign(slot, assignedEscapedStackSlots); assignedEscapedStackSlots.append(slot); } // Now we handle the anonymous slots. StackSlotLiveness liveness(code); IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size()); Vector<StackSlot*> slots; for (BasicBlock* block : code) { StackSlotLiveness::LocalCalc localCalc(liveness, block); auto interfere = [&] (Inst& inst) { if (verbose) dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n"); inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (!Arg::isDef(role)) return; if (!arg.isStack()) return; StackSlot* slot = arg.stackSlot(); if (slot->kind() != StackSlotKind::Anonymous) return; for (StackSlot* otherSlot : localCalc.live()) { interference[slot].add(otherSlot); interference[otherSlot].add(slot); } }); }; for (unsigned instIndex = block->size(); instIndex--;) { if (verbose) dataLog("Analyzing: ", block->at(instIndex), "\n"); Inst& inst = block->at(instIndex); interfere(inst); localCalc.execute(instIndex); } Inst nop; interfere(nop); } if (verbose) { for (StackSlot* slot : code.stackSlots()) dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n"); } // Now we assign stack locations. At its heart this algorithm is just first-fit. For each // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no // overlap with other StackSlots that this overlaps with. Vector<StackSlot*> otherSlots = assignedEscapedStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->offsetFromFP()) { // Already assigned an offset. continue; } HashSet<StackSlot*>& interferingSlots = interference[slot]; otherSlots.resize(assignedEscapedStackSlots.size()); otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size()); unsigned nextIndex = assignedEscapedStackSlots.size(); for (StackSlot* otherSlot : interferingSlots) otherSlots[nextIndex++] = otherSlot; assign(slot, otherSlots); } // Figure out how much stack we're using for stack slots. unsigned frameSizeForStackSlots = 0; for (StackSlot* slot : code.stackSlots()) { frameSizeForStackSlots = std::max( frameSizeForStackSlots, static_cast<unsigned>(-slot->offsetFromFP())); } frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots); // Now we need to deduce how much argument area we need. for (BasicBlock* block : code) { for (Inst& inst : *block) { for (Arg& arg : inst.args) { if (arg.isCallArg()) { // For now, we assume that we use 8 bytes of the call arg. But that's not // such an awesome assumption. // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454 ASSERT(arg.offset() >= 0); code.requestCallArgAreaSize(arg.offset() + 8); } } } } code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize()); // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless // transformation since we can search the StackSlots array to figure out which StackSlot any // offset-from-FP refers to. for (BasicBlock* block : code) { for (Inst& inst : *block) { for (Arg& arg : inst.args) { switch (arg.kind()) { case Arg::Stack: arg = Arg::addr( Tmp(GPRInfo::callFrameRegister), arg.offset() + arg.stackSlot()->offsetFromFP()); break; case Arg::CallArg: arg = Arg::addr( Tmp(GPRInfo::callFrameRegister), arg.offset() - code.frameSize()); break; default: break; } } } } }
void allocateStack(Code& code) { PhaseScope phaseScope(code, "allocateStack"); // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or // is explicitly escaped in the code. IndexSet<StackSlot> escapingStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->isLocked()) escapingStackSlots.add(slot); } for (BasicBlock* block : code) { for (Inst& inst : *block) { inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (role == Arg::UseAddr && arg.isStack()) escapingStackSlots.add(arg.stackSlot()); }); } } // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for // the possibility of stack slots being assigned frame offsets before we even get here. ASSERT(!code.frameSize()); Vector<StackSlot*> assignedEscapedStackSlots; Vector<StackSlot*> escapedStackSlotsWorklist; for (StackSlot* slot : code.stackSlots()) { if (escapingStackSlots.contains(slot)) { if (slot->offsetFromFP()) assignedEscapedStackSlots.append(slot); else escapedStackSlotsWorklist.append(slot); } else { // It would be super strange to have an unlocked stack slot that has an offset already. ASSERT(!slot->offsetFromFP()); } } // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of // escaped stack slots. while (!escapedStackSlotsWorklist.isEmpty()) { StackSlot* slot = escapedStackSlotsWorklist.takeLast(); assign(slot, assignedEscapedStackSlots); assignedEscapedStackSlots.append(slot); } // Now we handle the anonymous slots. StackSlotLiveness liveness(code); IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size()); Vector<StackSlot*> slots; for (BasicBlock* block : code) { StackSlotLiveness::LocalCalc localCalc(liveness, block); auto interfere = [&] (unsigned instIndex) { if (verbose) dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n"); Inst::forEachDef<Arg>( block->get(instIndex), block->get(instIndex + 1), [&] (Arg& arg, Arg::Role, Arg::Type, Arg::Width) { if (!arg.isStack()) return; StackSlot* slot = arg.stackSlot(); if (slot->kind() != StackSlotKind::Anonymous) return; for (StackSlot* otherSlot : localCalc.live()) { interference[slot].add(otherSlot); interference[otherSlot].add(slot); } }); }; for (unsigned instIndex = block->size(); instIndex--;) { if (verbose) dataLog("Analyzing: ", block->at(instIndex), "\n"); // Kill dead stores. For simplicity we say that a store is killable if it has only late // defs and those late defs are to things that are dead right now. We only do that // because that's the only kind of dead stack store we will see here. Inst& inst = block->at(instIndex); if (!inst.hasNonArgEffects()) { bool ok = true; inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (Arg::isEarlyDef(role)) { ok = false; return; } if (!Arg::isLateDef(role)) return; if (!arg.isStack()) { ok = false; return; } StackSlot* slot = arg.stackSlot(); if (slot->kind() != StackSlotKind::Anonymous) { ok = false; return; } if (localCalc.isLive(slot)) { ok = false; return; } }); if (ok) inst = Inst(); } interfere(instIndex); localCalc.execute(instIndex); } interfere(-1); block->insts().removeAllMatching( [&] (const Inst& inst) -> bool { return !inst; }); } if (verbose) { for (StackSlot* slot : code.stackSlots()) dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n"); } // Now we assign stack locations. At its heart this algorithm is just first-fit. For each // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no // overlap with other StackSlots that this overlaps with. Vector<StackSlot*> otherSlots = assignedEscapedStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->offsetFromFP()) { // Already assigned an offset. continue; } HashSet<StackSlot*>& interferingSlots = interference[slot]; otherSlots.resize(assignedEscapedStackSlots.size()); otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size()); unsigned nextIndex = assignedEscapedStackSlots.size(); for (StackSlot* otherSlot : interferingSlots) otherSlots[nextIndex++] = otherSlot; assign(slot, otherSlots); } // Figure out how much stack we're using for stack slots. unsigned frameSizeForStackSlots = 0; for (StackSlot* slot : code.stackSlots()) { frameSizeForStackSlots = std::max( frameSizeForStackSlots, static_cast<unsigned>(-slot->offsetFromFP())); } frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots); // Now we need to deduce how much argument area we need. for (BasicBlock* block : code) { for (Inst& inst : *block) { for (Arg& arg : inst.args) { if (arg.isCallArg()) { // For now, we assume that we use 8 bytes of the call arg. But that's not // such an awesome assumption. // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454 ASSERT(arg.offset() >= 0); code.requestCallArgAreaSize(arg.offset() + 8); } } } } code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize()); // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless // transformation since we can search the StackSlots array to figure out which StackSlot any // offset-from-FP refers to. // FIXME: This may produce addresses that aren't valid if we end up with a ginormous stack frame. // We would have to scavenge for temporaries if this happened. Fortunately, this case will be // extremely rare so we can do crazy things when it arises. // https://bugs.webkit.org/show_bug.cgi?id=152530 InsertionSet insertionSet(code); for (BasicBlock* block : code) { for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { Inst& inst = block->at(instIndex); inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width width) { auto stackAddr = [&] (int32_t offset) -> Arg { return Arg::stackAddr(offset, code.frameSize(), width); }; switch (arg.kind()) { case Arg::Stack: { StackSlot* slot = arg.stackSlot(); if (Arg::isZDef(role) && slot->kind() == StackSlotKind::Anonymous && slot->byteSize() > Arg::bytes(width)) { // Currently we only handle this simple case because it's the only one // that arises: ZDef's are only 32-bit right now. So, when we hit these // assertions it means that we need to implement those other kinds of // zero fills. RELEASE_ASSERT(slot->byteSize() == 8); RELEASE_ASSERT(width == Arg::Width32); RELEASE_ASSERT(isValidForm(StoreZero32, Arg::Stack)); insertionSet.insert( instIndex + 1, StoreZero32, inst.origin, stackAddr(arg.offset() + 4 + slot->offsetFromFP())); } arg = stackAddr(arg.offset() + slot->offsetFromFP()); break; } case Arg::CallArg: arg = stackAddr(arg.offset() - code.frameSize()); break; default: break; } } ); } insertionSet.execute(block); } }