void breakCriticalEdges(Procedure& proc) { BlockInsertionSet insertionSet(proc); for (BasicBlock* block : proc) { // Non-void terminals that are the moral equivalent of jumps trigger critical edge breaking // because of fixSSA's demoteValues. if (block->numSuccessors() <= 1 && block->last()->type() == Void) continue; for (BasicBlock*& successor : block->successorBlocks()) { if (successor->numPredecessors() <= 1) continue; BasicBlock* pad = insertionSet.insertBefore(successor, successor->frequency()); pad->appendNew<Value>(proc, Jump, successor->at(0)->origin()); pad->setSuccessors(FrequentedBlock(successor)); pad->addPredecessor(block); successor->replacePredecessor(block, pad); successor = pad; } } if (insertionSet.execute()) proc.invalidateCFG(); }
bool run() { DFG_ASSERT(m_graph, nullptr, m_graph.m_form == LoadStore); InsertionSet insertionSet(m_graph); for (BasicBlock* block : m_graph.blocksInNaturalOrder()) { treatRegularBlock(block, insertionSet); insertionSet.execute(block); } treatRootBlock(m_graph.block(0), insertionSet); insertionSet.execute(m_graph.block(0)); return true; }
bool run() { DFG_ASSERT(m_graph, nullptr, m_graph.m_form == LoadStore); if (!m_graph.m_hasExceptionHandlers) return false; InsertionSet insertionSet(m_graph); if (m_graph.m_hasExceptionHandlers) { for (BasicBlock* block : m_graph.blocksInNaturalOrder()) { handleBlockForTryCatch(block, insertionSet); insertionSet.execute(block); } } return true; }
bool run() { InsertionSet insertionSet(m_graph); for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) { Node* node = block->at(nodeIndex); if (!node->hasResult()) continue; insertionSet.insertNode( nodeIndex + 1, SpecNone, Phantom, node->origin, Edge(node)); } insertionSet.execute(block); } return true; }
void breakCriticalEdges(Code& code) { BlockInsertionSet insertionSet(code); for (BasicBlock* block : code) { if (block->numSuccessors() <= 1) continue; for (BasicBlock*& successor : block->successorBlocks()) { if (successor->numPredecessors() <= 1) continue; BasicBlock* pad = insertionSet.insertBefore(successor, successor->frequency()); pad->append(Jump, successor->at(0).origin); pad->setSuccessors(successor); pad->addPredecessor(block); successor->replacePredecessor(block, pad); successor = pad; } } insertionSet.execute(); }
void lowerStackArgs(Code& code) { PhaseScope phaseScope(code, "lowerStackArgs"); // Now we need to deduce how much argument area we need. for (BasicBlock* block : code) { for (Inst& inst : *block) { for (Arg& arg : inst.args) { if (arg.isCallArg()) { // For now, we assume that we use 8 bytes of the call arg. But that's not // such an awesome assumption. // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454 ASSERT(arg.offset() >= 0); code.requestCallArgAreaSizeInBytes(arg.offset() + 8); } } } } code.setFrameSize(code.frameSize() + code.callArgAreaSizeInBytes()); // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless // transformation since we can search the StackSlots array to figure out which StackSlot any // offset-from-FP refers to. InsertionSet insertionSet(code); for (BasicBlock* block : code) { // FIXME We can keep track of the last large offset which was materialized in this block, and reuse the register // if it hasn't been clobbered instead of renetating imm+add+addr every time. https://bugs.webkit.org/show_bug.cgi?id=171387 for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { Inst& inst = block->at(instIndex); inst.forEachArg( [&] (Arg& arg, Arg::Role role, Bank, Width width) { auto stackAddr = [&] (Value::OffsetType offsetFromFP) -> Arg { int32_t offsetFromSP = offsetFromFP + code.frameSize(); if (inst.admitsExtendedOffsetAddr(arg)) { // Stackmaps and patchpoints expect addr inputs relative to SP or FP only. We might as well // not even bother generating an addr with valid form for these opcodes since extended offset // addr is always valid. return Arg::extendedOffsetAddr(offsetFromFP); } Arg result = Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP); if (result.isValidForm(width)) return result; result = Arg::addr(Air::Tmp(MacroAssembler::stackPointerRegister), offsetFromSP); if (result.isValidForm(width)) return result; #if CPU(ARM64) ASSERT(pinnedExtendedOffsetAddrRegister()); Air::Tmp tmp = Air::Tmp(*pinnedExtendedOffsetAddrRegister()); Arg largeOffset = Arg::isValidImmForm(offsetFromSP) ? Arg::imm(offsetFromSP) : Arg::bigImm(offsetFromSP); insertionSet.insert(instIndex, Move, inst.origin, largeOffset, tmp); insertionSet.insert(instIndex, Add64, inst.origin, Air::Tmp(MacroAssembler::stackPointerRegister), tmp); result = Arg::addr(tmp, 0); return result; #elif CPU(X86_64) // Can't happen on x86: immediates are always big enough for frame size. RELEASE_ASSERT_NOT_REACHED(); #else #error Unhandled architecture. #endif }; switch (arg.kind()) { case Arg::Stack: { StackSlot* slot = arg.stackSlot(); if (Arg::isZDef(role) && slot->kind() == StackSlotKind::Spill && slot->byteSize() > bytes(width)) { // Currently we only handle this simple case because it's the only one // that arises: ZDef's are only 32-bit right now. So, when we hit these // assertions it means that we need to implement those other kinds of // zero fills. RELEASE_ASSERT(slot->byteSize() == 8); RELEASE_ASSERT(width == Width32); RELEASE_ASSERT(isValidForm(StoreZero32, Arg::Stack)); insertionSet.insert( instIndex + 1, StoreZero32, inst.origin, stackAddr(arg.offset() + 4 + slot->offsetFromFP())); } arg = stackAddr(arg.offset() + slot->offsetFromFP()); break; } case Arg::CallArg: arg = stackAddr(arg.offset() - code.frameSize()); break; default: break; } } ); } insertionSet.execute(block); } }
void spillEverything(Code& code) { PhaseScope phaseScope(code, "spillEverything"); // We want to know the set of registers used at every point in every basic block. IndexMap<BasicBlock, Vector<RegisterSet>> usedRegisters(code.size()); Liveness<Tmp> liveness(code); for (BasicBlock* block : code) { Liveness<Tmp>::LocalCalc localCalc(liveness, block); usedRegisters[block].resize(block->size() + 1); auto setUsedRegisters = [&] (unsigned index, Inst& inst) { RegisterSet& registerSet = usedRegisters[block][index]; for (Tmp tmp : localCalc.live()) { if (tmp.isReg()) registerSet.set(tmp.reg()); } // Gotta account for dead assignments to registers. These may happen because the input // code is suboptimal. inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type) { if (Arg::isDef(role) && arg.isReg()) registerSet.set(arg.reg()); }); }; for (unsigned instIndex = block->size(); instIndex--;) { Inst& inst = block->at(instIndex); setUsedRegisters(instIndex + 1, inst); localCalc.execute(inst); } Inst nop; setUsedRegisters(0, nop); } // Allocate a stack slot for each tmp. Vector<StackSlot*> allStackSlots[Arg::numTypes]; for (unsigned typeIndex = 0; typeIndex < Arg::numTypes; ++typeIndex) { Vector<StackSlot*>& stackSlots = allStackSlots[typeIndex]; Arg::Type type = static_cast<Arg::Type>(typeIndex); stackSlots.resize(code.numTmps(type)); for (unsigned tmpIndex = code.numTmps(type); tmpIndex--;) stackSlots[tmpIndex] = code.addStackSlot(8, StackSlotKind::Anonymous); } InsertionSet insertionSet(code); for (BasicBlock* block : code) { for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { RegisterSet& setBefore = usedRegisters[block][instIndex]; RegisterSet& setAfter = usedRegisters[block][instIndex + 1]; Inst& inst = block->at(instIndex); inst.forEachTmp( [&] (Tmp& tmp, Arg::Role role, Arg::Type type) { if (tmp.isReg()) return; StackSlot* stackSlot = allStackSlots[type][tmp.tmpIndex()]; Arg arg = Arg::stack(stackSlot); // Need to figure out a register to use. How we do that depends on the role. Reg chosenReg; switch (role) { case Arg::Use: for (Reg reg : regsInPriorityOrder(type)) { if (!setBefore.get(reg)) { setBefore.set(reg); chosenReg = reg; break; } } break; case Arg::Def: for (Reg reg : regsInPriorityOrder(type)) { if (!setAfter.get(reg)) { setAfter.set(reg); chosenReg = reg; break; } } break; case Arg::UseDef: for (Reg reg : regsInPriorityOrder(type)) { if (!setBefore.get(reg) && !setAfter.get(reg)) { setAfter.set(reg); setBefore.set(reg); chosenReg = reg; break; } } break; } RELEASE_ASSERT(chosenReg); tmp = Tmp(chosenReg); Opcode move = type == Arg::GP ? Move : MoveDouble; if (Arg::isUse(role)) { insertionSet.insert( instIndex, move, inst.origin, arg, tmp); } if (Arg::isDef(role)) { insertionSet.insert( instIndex + 1, move, inst.origin, tmp, arg); } }); } insertionSet.execute(block); } }
bool run() { ASSERT(m_graph.m_form == ThreadedCPS); for (unsigned i = m_graph.m_variableAccessData.size(); i--;) { VariableAccessData* variable = &m_graph.m_variableAccessData[i]; if (!variable->isRoot()) continue; variable->clearVotes(); } // Identify the set of variables that are always subject to the same structure // checks. For now, only consider monomorphic structure checks (one structure). for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) { BasicBlock* block = m_graph.m_blocks[blockIndex].get(); if (!block) continue; for (unsigned indexInBlock = 0; indexInBlock < block->size(); ++indexInBlock) { Node* node = block->at(indexInBlock); switch (node->op()) { case CheckStructure: case StructureTransitionWatchpoint: { Node* child = node->child1().node(); if (child->op() != GetLocal) break; VariableAccessData* variable = child->variableAccessData(); variable->vote(VoteStructureCheck); if (!shouldConsiderForHoisting(variable)) break; noticeStructureCheck(variable, node->structureSet()); break; } case ForwardCheckStructure: case ForwardStructureTransitionWatchpoint: // We currently rely on the fact that we're the only ones who would // insert this node. RELEASE_ASSERT_NOT_REACHED(); break; case GetByOffset: case PutByOffset: case PutStructure: case AllocatePropertyStorage: case ReallocatePropertyStorage: case GetButterfly: case GetByVal: case PutByVal: case PutByValAlias: case GetArrayLength: case CheckArray: case GetIndexedPropertyStorage: case Phantom: // Don't count these uses. break; case ArrayifyToStructure: case Arrayify: if (node->arrayMode().conversion() == Array::RageConvert) { // Rage conversion changes structures. We should avoid tying to do // any kind of hoisting when rage conversion is in play. Node* child = node->child1().node(); if (child->op() != GetLocal) break; VariableAccessData* variable = child->variableAccessData(); variable->vote(VoteOther); if (!shouldConsiderForHoisting(variable)) break; noticeStructureCheck(variable, 0); } break; case SetLocal: { // Find all uses of the source of the SetLocal. If any of them are a // kind of CheckStructure, then we should notice them to ensure that // we're not hoisting a check that would contravene checks that are // already being performed. VariableAccessData* variable = node->variableAccessData(); if (!shouldConsiderForHoisting(variable)) break; Node* source = node->child1().node(); for (unsigned subIndexInBlock = 0; subIndexInBlock < block->size(); ++subIndexInBlock) { Node* subNode = block->at(subIndexInBlock); switch (subNode->op()) { case CheckStructure: { if (subNode->child1() != source) break; noticeStructureCheck(variable, subNode->structureSet()); break; } case StructureTransitionWatchpoint: { if (subNode->child1() != source) break; noticeStructureCheck(variable, subNode->structure()); break; } default: break; } } m_graph.voteChildren(node, VoteOther); break; } case GarbageValue: break; default: m_graph.voteChildren(node, VoteOther); break; } } } // Disable structure hoisting on variables that appear to mostly be used in // contexts where it doesn't make sense. for (unsigned i = m_graph.m_variableAccessData.size(); i--;) { VariableAccessData* variable = &m_graph.m_variableAccessData[i]; if (!variable->isRoot()) continue; if (variable->voteRatio() >= Options::structureCheckVoteRatioForHoisting()) continue; HashMap<VariableAccessData*, CheckData>::iterator iter = m_map.find(variable); if (iter == m_map.end()) continue; #if DFG_ENABLE(DEBUG_PROPAGATION_VERBOSE) dataLog( "Zeroing the structure to hoist for ", VariableAccessDataDump(m_graph, variable), " because the ratio is ", variable->voteRatio(), ".\n"); #endif iter->value.m_structure = 0; } // Disable structure check hoisting for variables that cross the OSR entry that // we're currently taking, and where the value currently does not have the // structure we want. for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) { BasicBlock* block = m_graph.m_blocks[blockIndex].get(); if (!block) continue; ASSERT(block->isReachable); if (!block->isOSRTarget) continue; if (block->bytecodeBegin != m_graph.m_osrEntryBytecodeIndex) continue; for (size_t i = 0; i < m_graph.m_mustHandleValues.size(); ++i) { int operand = m_graph.m_mustHandleValues.operandForIndex(i); Node* node = block->variablesAtHead.operand(operand); if (!node) continue; VariableAccessData* variable = node->variableAccessData(); HashMap<VariableAccessData*, CheckData>::iterator iter = m_map.find(variable); if (iter == m_map.end()) continue; if (!iter->value.m_structure) continue; JSValue value = m_graph.m_mustHandleValues[i]; if (!value || !value.isCell()) { #if DFG_ENABLE(DEBUG_PROPAGATION_VERBOSE) dataLog( "Zeroing the structure to hoist for ", VariableAccessDataDump(m_graph, variable), " because the OSR entry value is not a cell: ", value, ".\n"); #endif iter->value.m_structure = 0; continue; } if (value.asCell()->structure() != iter->value.m_structure) { #if DFG_ENABLE(DEBUG_PROPAGATION_VERBOSE) dataLog( "Zeroing the structure to hoist for ", VariableAccessDataDump(m_graph, variable), " because the OSR entry value has structure ", RawPointer(value.asCell()->structure()), " and we wanted ", RawPointer(iter->value.m_structure), ".\n"); #endif iter->value.m_structure = 0; continue; } } } bool changed = false; #if DFG_ENABLE(DEBUG_PROPAGATION_VERBOSE) for (HashMap<VariableAccessData*, CheckData>::iterator it = m_map.begin(); it != m_map.end(); ++it) { if (!it->value.m_structure) { dataLog( "Not hoisting checks for ", VariableAccessDataDump(m_graph, it->key), " because of heuristics.\n"); continue; } dataLog("Hoisting checks for ", VariableAccessDataDump(m_graph, it->key), "\n"); } #endif // DFG_ENABLE(DEBUG_PROPAGATION_VERBOSE) // Place CheckStructure's at SetLocal sites. InsertionSet insertionSet(m_graph); for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) { BasicBlock* block = m_graph.m_blocks[blockIndex].get(); if (!block) continue; for (unsigned indexInBlock = 0; indexInBlock < block->size(); ++indexInBlock) { Node* node = block->at(indexInBlock); // Be careful not to use 'node' after appending to the graph. In those switch // cases where we need to append, we first carefully extract everything we need // from the node, before doing any appending. switch (node->op()) { case SetArgument: { ASSERT(!blockIndex); // Insert a GetLocal and a CheckStructure immediately following this // SetArgument, if the variable was a candidate for structure hoisting. // If the basic block previously only had the SetArgument as its // variable-at-tail, then replace it with this GetLocal. VariableAccessData* variable = node->variableAccessData(); HashMap<VariableAccessData*, CheckData>::iterator iter = m_map.find(variable); if (iter == m_map.end()) break; if (!iter->value.m_structure) break; CodeOrigin codeOrigin = node->codeOrigin; Node* getLocal = insertionSet.insertNode( indexInBlock + 1, variable->prediction(), GetLocal, codeOrigin, OpInfo(variable), Edge(node)); insertionSet.insertNode( indexInBlock + 1, SpecNone, CheckStructure, codeOrigin, OpInfo(m_graph.addStructureSet(iter->value.m_structure)), Edge(getLocal, CellUse)); if (block->variablesAtTail.operand(variable->local()) == node) block->variablesAtTail.operand(variable->local()) = getLocal; m_graph.substituteGetLocal(*block, indexInBlock, variable, getLocal); changed = true; break; } case SetLocal: { VariableAccessData* variable = node->variableAccessData(); HashMap<VariableAccessData*, CheckData>::iterator iter = m_map.find(variable); if (iter == m_map.end()) break; if (!iter->value.m_structure) break; // First insert a dead SetLocal to tell OSR that the child's value should // be dropped into this bytecode variable if the CheckStructure decides // to exit. CodeOrigin codeOrigin = node->codeOrigin; Edge child1 = node->child1(); insertionSet.insertNode( indexInBlock, SpecNone, SetLocal, codeOrigin, OpInfo(variable), child1); // Use a ForwardCheckStructure to indicate that we should exit to the // next bytecode instruction rather than reexecuting the current one. insertionSet.insertNode( indexInBlock, SpecNone, ForwardCheckStructure, codeOrigin, OpInfo(m_graph.addStructureSet(iter->value.m_structure)), Edge(child1.node(), CellUse)); changed = true; break; } default: break; } } insertionSet.execute(block); } return changed; }
void handleCalleeSaves(Code& code) { PhaseScope phaseScope(code, "handleCalleeSaves"); RegisterSet usedCalleeSaves; for (BasicBlock* block : code) { for (Inst& inst : *block) { inst.forEachTmpFast( [&] (Tmp& tmp) { // At first we just record all used regs. usedCalleeSaves.set(tmp.reg()); }); if (inst.hasSpecial()) usedCalleeSaves.merge(inst.extraClobberedRegs()); } } // Now we filter to really get the callee saves. usedCalleeSaves.filter(RegisterSet::calleeSaveRegisters()); usedCalleeSaves.exclude(RegisterSet::stackRegisters()); // We don't need to save FP here. if (!usedCalleeSaves.numberOfSetRegisters()) return; code.calleeSaveRegisters() = RegisterAtOffsetList(usedCalleeSaves); size_t byteSize = 0; for (const RegisterAtOffset& entry : code.calleeSaveRegisters()) byteSize = std::max(static_cast<size_t>(-entry.offset()), byteSize); StackSlot* savesArea = code.addStackSlot(byteSize, StackSlotKind::Locked); // This is a bit weird since we could have already pinned a different stack slot to this // area. Also, our runtime does not require us to pin the saves area. Maybe we shouldn't pin it? savesArea->setOffsetFromFP(-byteSize); auto argFor = [&] (const RegisterAtOffset& entry) -> Arg { return Arg::stack(savesArea, entry.offset() + byteSize); }; InsertionSet insertionSet(code); // First insert saving code in the prologue. for (const RegisterAtOffset& entry : code.calleeSaveRegisters()) { insertionSet.insert( 0, entry.reg().isGPR() ? Move : MoveDouble, code[0]->at(0).origin, Tmp(entry.reg()), argFor(entry)); } insertionSet.execute(code[0]); // Now insert restore code at epilogues. for (BasicBlock* block : code) { Inst& last = block->last(); if (!isReturn(last.opcode)) continue; for (const RegisterAtOffset& entry : code.calleeSaveRegisters()) { insertionSet.insert( block->size() - 1, entry.reg().isGPR() ? Move : MoveDouble, last.origin, argFor(entry), Tmp(entry.reg())); } insertionSet.execute(block); } }
void allocateStack(Code& code) { PhaseScope phaseScope(code, "allocateStack"); // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or // is explicitly escaped in the code. IndexSet<StackSlot> escapingStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->isLocked()) escapingStackSlots.add(slot); } for (BasicBlock* block : code) { for (Inst& inst : *block) { inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (role == Arg::UseAddr && arg.isStack()) escapingStackSlots.add(arg.stackSlot()); }); } } // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for // the possibility of stack slots being assigned frame offsets before we even get here. ASSERT(!code.frameSize()); Vector<StackSlot*> assignedEscapedStackSlots; Vector<StackSlot*> escapedStackSlotsWorklist; for (StackSlot* slot : code.stackSlots()) { if (escapingStackSlots.contains(slot)) { if (slot->offsetFromFP()) assignedEscapedStackSlots.append(slot); else escapedStackSlotsWorklist.append(slot); } else { // It would be super strange to have an unlocked stack slot that has an offset already. ASSERT(!slot->offsetFromFP()); } } // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of // escaped stack slots. while (!escapedStackSlotsWorklist.isEmpty()) { StackSlot* slot = escapedStackSlotsWorklist.takeLast(); assign(slot, assignedEscapedStackSlots); assignedEscapedStackSlots.append(slot); } // Now we handle the anonymous slots. StackSlotLiveness liveness(code); IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size()); Vector<StackSlot*> slots; for (BasicBlock* block : code) { StackSlotLiveness::LocalCalc localCalc(liveness, block); auto interfere = [&] (unsigned instIndex) { if (verbose) dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n"); Inst::forEachDef<Arg>( block->get(instIndex), block->get(instIndex + 1), [&] (Arg& arg, Arg::Role, Arg::Type, Arg::Width) { if (!arg.isStack()) return; StackSlot* slot = arg.stackSlot(); if (slot->kind() != StackSlotKind::Anonymous) return; for (StackSlot* otherSlot : localCalc.live()) { interference[slot].add(otherSlot); interference[otherSlot].add(slot); } }); }; for (unsigned instIndex = block->size(); instIndex--;) { if (verbose) dataLog("Analyzing: ", block->at(instIndex), "\n"); // Kill dead stores. For simplicity we say that a store is killable if it has only late // defs and those late defs are to things that are dead right now. We only do that // because that's the only kind of dead stack store we will see here. Inst& inst = block->at(instIndex); if (!inst.hasNonArgEffects()) { bool ok = true; inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (Arg::isEarlyDef(role)) { ok = false; return; } if (!Arg::isLateDef(role)) return; if (!arg.isStack()) { ok = false; return; } StackSlot* slot = arg.stackSlot(); if (slot->kind() != StackSlotKind::Anonymous) { ok = false; return; } if (localCalc.isLive(slot)) { ok = false; return; } }); if (ok) inst = Inst(); } interfere(instIndex); localCalc.execute(instIndex); } interfere(-1); block->insts().removeAllMatching( [&] (const Inst& inst) -> bool { return !inst; }); } if (verbose) { for (StackSlot* slot : code.stackSlots()) dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n"); } // Now we assign stack locations. At its heart this algorithm is just first-fit. For each // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no // overlap with other StackSlots that this overlaps with. Vector<StackSlot*> otherSlots = assignedEscapedStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->offsetFromFP()) { // Already assigned an offset. continue; } HashSet<StackSlot*>& interferingSlots = interference[slot]; otherSlots.resize(assignedEscapedStackSlots.size()); otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size()); unsigned nextIndex = assignedEscapedStackSlots.size(); for (StackSlot* otherSlot : interferingSlots) otherSlots[nextIndex++] = otherSlot; assign(slot, otherSlots); } // Figure out how much stack we're using for stack slots. unsigned frameSizeForStackSlots = 0; for (StackSlot* slot : code.stackSlots()) { frameSizeForStackSlots = std::max( frameSizeForStackSlots, static_cast<unsigned>(-slot->offsetFromFP())); } frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots); // Now we need to deduce how much argument area we need. for (BasicBlock* block : code) { for (Inst& inst : *block) { for (Arg& arg : inst.args) { if (arg.isCallArg()) { // For now, we assume that we use 8 bytes of the call arg. But that's not // such an awesome assumption. // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454 ASSERT(arg.offset() >= 0); code.requestCallArgAreaSize(arg.offset() + 8); } } } } code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize()); // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless // transformation since we can search the StackSlots array to figure out which StackSlot any // offset-from-FP refers to. // FIXME: This may produce addresses that aren't valid if we end up with a ginormous stack frame. // We would have to scavenge for temporaries if this happened. Fortunately, this case will be // extremely rare so we can do crazy things when it arises. // https://bugs.webkit.org/show_bug.cgi?id=152530 InsertionSet insertionSet(code); for (BasicBlock* block : code) { for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { Inst& inst = block->at(instIndex); inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width width) { auto stackAddr = [&] (int32_t offset) -> Arg { return Arg::stackAddr(offset, code.frameSize(), width); }; switch (arg.kind()) { case Arg::Stack: { StackSlot* slot = arg.stackSlot(); if (Arg::isZDef(role) && slot->kind() == StackSlotKind::Anonymous && slot->byteSize() > Arg::bytes(width)) { // Currently we only handle this simple case because it's the only one // that arises: ZDef's are only 32-bit right now. So, when we hit these // assertions it means that we need to implement those other kinds of // zero fills. RELEASE_ASSERT(slot->byteSize() == 8); RELEASE_ASSERT(width == Arg::Width32); RELEASE_ASSERT(isValidForm(StoreZero32, Arg::Stack)); insertionSet.insert( instIndex + 1, StoreZero32, inst.origin, stackAddr(arg.offset() + 4 + slot->offsetFromFP())); } arg = stackAddr(arg.offset() + slot->offsetFromFP()); break; } case Arg::CallArg: arg = stackAddr(arg.offset() - code.frameSize()); break; default: break; } } ); } insertionSet.execute(block); } }
bool run() { RELEASE_ASSERT(m_graph.m_plan.mode == FTLForOSREntryMode); RELEASE_ASSERT(m_graph.m_form == ThreadedCPS); unsigned bytecodeIndex = m_graph.m_plan.osrEntryBytecodeIndex; RELEASE_ASSERT(bytecodeIndex); RELEASE_ASSERT(bytecodeIndex != UINT_MAX); // Needed by createPreHeader(). m_graph.ensureDominators(); CodeBlock* baseline = m_graph.m_profiledBlock; BasicBlock* target = 0; for (unsigned blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; unsigned nodeIndex = 0; Node* firstNode = block->at(0); while (firstNode->isSemanticallySkippable()) firstNode = block->at(++nodeIndex); if (firstNode->op() == LoopHint && firstNode->origin.semantic == CodeOrigin(bytecodeIndex)) { target = block; break; } } if (!target) { // This is a terrible outcome. It shouldn't often happen but it might // happen and so we should defend against it. If it happens, then this // compilation is a failure. return false; } BlockInsertionSet insertionSet(m_graph); // We say that the execution count of the entry block is 1, because we know for sure // that this must be the case. Under our definition of executionCount, "1" means "once // per invocation". We could have said NaN here, since that would ask any clients of // executionCount to use best judgement - but that seems unnecessary since we know for // sure what the executionCount should be in this case. BasicBlock* newRoot = insertionSet.insert(0, 1); // We'd really like to use an unset origin, but ThreadedCPS won't allow that. NodeOrigin origin = NodeOrigin(CodeOrigin(0), CodeOrigin(0), false); Vector<Node*> locals(baseline->m_numCalleeLocals); for (int local = 0; local < baseline->m_numCalleeLocals; ++local) { Node* previousHead = target->variablesAtHead.local(local); if (!previousHead) continue; VariableAccessData* variable = previousHead->variableAccessData(); locals[local] = newRoot->appendNode( m_graph, variable->prediction(), ExtractOSREntryLocal, origin, OpInfo(variable->local().offset())); newRoot->appendNode( m_graph, SpecNone, MovHint, origin, OpInfo(variable->local().offset()), Edge(locals[local])); } // Now use the origin of the target, since it's not OK to exit, and we will probably hoist // type checks to here. origin = target->at(0)->origin; for (int argument = 0; argument < baseline->numParameters(); ++argument) { Node* oldNode = target->variablesAtHead.argument(argument); if (!oldNode) { // Just for sanity, always have a SetArgument even if it's not needed. oldNode = m_graph.m_arguments[argument]; } Node* node = newRoot->appendNode( m_graph, SpecNone, SetArgument, origin, OpInfo(oldNode->variableAccessData())); m_graph.m_arguments[argument] = node; } for (int local = 0; local < baseline->m_numCalleeLocals; ++local) { Node* previousHead = target->variablesAtHead.local(local); if (!previousHead) continue; VariableAccessData* variable = previousHead->variableAccessData(); Node* node = locals[local]; newRoot->appendNode( m_graph, SpecNone, SetLocal, origin, OpInfo(variable), Edge(node)); } newRoot->appendNode( m_graph, SpecNone, Jump, origin, OpInfo(createPreHeader(m_graph, insertionSet, target))); insertionSet.execute(); m_graph.resetReachability(); m_graph.killUnreachableBlocks(); return true; }
void demoteValues(Procedure& proc, const IndexSet<Value>& values) { HashMap<Value*, StackSlotValue*> map; HashMap<Value*, StackSlotValue*> phiMap; // Create stack slots. InsertionSet insertionSet(proc); for (Value* value : values.values(proc.values())) { StackSlotValue* stack = insertionSet.insert<StackSlotValue>( 0, value->origin(), sizeofType(value->type()), StackSlotKind::Anonymous); map.add(value, stack); if (value->opcode() == Phi) { StackSlotValue* phiStack = insertionSet.insert<StackSlotValue>( 0, value->origin(), sizeofType(value->type()), StackSlotKind::Anonymous); phiMap.add(value, phiStack); } } insertionSet.execute(proc[0]); if (verbose) { dataLog("Demoting values as follows:\n"); dataLog(" map = "); CommaPrinter comma; for (auto& entry : map) dataLog(comma, *entry.key, "=>", *entry.value); dataLog("\n"); dataLog(" phiMap = "); comma = CommaPrinter(); for (auto& entry : phiMap) dataLog(comma, *entry.key, "=>", *entry.value); dataLog("\n"); } // Change accesses to the values to accesses to the stack slots. for (BasicBlock* block : proc) { for (unsigned valueIndex = 0; valueIndex < block->size(); ++valueIndex) { Value* value = block->at(valueIndex); if (value->opcode() == Phi) { if (StackSlotValue* stack = phiMap.get(value)) { value->replaceWithIdentity( insertionSet.insert<MemoryValue>( valueIndex, Load, value->type(), value->origin(), stack)); } } else { for (Value*& child : value->children()) { if (StackSlotValue* stack = map.get(child)) { child = insertionSet.insert<MemoryValue>( valueIndex, Load, child->type(), value->origin(), stack); } } if (UpsilonValue* upsilon = value->as<UpsilonValue>()) { if (StackSlotValue* stack = phiMap.get(upsilon->phi())) { insertionSet.insert<MemoryValue>( valueIndex, Store, upsilon->origin(), upsilon->child(0), stack); value->replaceWithNop(); } } } if (StackSlotValue* stack = map.get(value)) { insertionSet.insert<MemoryValue>( valueIndex + 1, Store, value->origin(), value, stack); } } insertionSet.execute(block); } }
bool fixSSA(Procedure& proc) { PhaseScope phaseScope(proc, "fixSSA"); // Collect the stack "variables". If there aren't any, then we don't have anything to do. // That's a fairly common case. HashMap<StackSlotValue*, Type> stackVariable; for (Value* value : proc.values()) { if (StackSlotValue* stack = value->as<StackSlotValue>()) { if (stack->kind() == StackSlotKind::Anonymous) stackVariable.add(stack, Void); } } if (stackVariable.isEmpty()) return false; // Make sure that we know how to optimize all of these. We only know how to handle Load and // Store on anonymous variables. for (Value* value : proc.values()) { auto reject = [&] (Value* value) { if (StackSlotValue* stack = value->as<StackSlotValue>()) stackVariable.remove(stack); }; auto handleAccess = [&] (Value* access, Type type) { StackSlotValue* stack = access->lastChild()->as<StackSlotValue>(); if (!stack) return; if (value->as<MemoryValue>()->offset()) { stackVariable.remove(stack); return; } auto result = stackVariable.find(stack); if (result == stackVariable.end()) return; if (result->value == Void) { result->value = type; return; } if (result->value == type) return; stackVariable.remove(result); }; switch (value->opcode()) { case Load: // We're OK with loads from stack variables at an offset of zero. handleAccess(value, value->type()); break; case Store: // We're OK with stores to stack variables, but not storing stack variables. reject(value->child(0)); handleAccess(value, value->child(0)->type()); break; default: for (Value* child : value->children()) reject(child); break; } } Vector<StackSlotValue*> deadValues; for (auto& entry : stackVariable) { if (entry.value == Void) deadValues.append(entry.key); } for (StackSlotValue* deadValue : deadValues) { deadValue->replaceWithNop(); stackVariable.remove(deadValue); } if (stackVariable.isEmpty()) return false; // We know that we have variables to optimize, so do that now. breakCriticalEdges(proc); SSACalculator ssa(proc); // Create a SSACalculator::Variable for every stack variable. Vector<StackSlotValue*> variableToStack; HashMap<StackSlotValue*, SSACalculator::Variable*> stackToVariable; for (auto& entry : stackVariable) { StackSlotValue* stack = entry.key; SSACalculator::Variable* variable = ssa.newVariable(); RELEASE_ASSERT(variable->index() == variableToStack.size()); variableToStack.append(stack); stackToVariable.add(stack, variable); } // Create Defs for all of the stores to the stack variable. for (BasicBlock* block : proc) { for (Value* value : *block) { if (value->opcode() != Store) continue; StackSlotValue* stack = value->child(1)->as<StackSlotValue>(); if (!stack) continue; if (SSACalculator::Variable* variable = stackToVariable.get(stack)) ssa.newDef(variable, block, value->child(0)); } } // Decide where Phis are to be inserted. This creates them but does not insert them. ssa.computePhis( [&] (SSACalculator::Variable* variable, BasicBlock* block) -> Value* { StackSlotValue* stack = variableToStack[variable->index()]; Value* phi = proc.add<Value>(Phi, stackVariable.get(stack), stack->origin()); if (verbose) { dataLog( "Adding Phi for ", pointerDump(stack), " at ", *block, ": ", deepDump(proc, phi), "\n"); } return phi; }); // Now perform the conversion. InsertionSet insertionSet(proc); HashMap<StackSlotValue*, Value*> mapping; for (BasicBlock* block : proc.blocksInPreOrder()) { mapping.clear(); for (auto& entry : stackToVariable) { StackSlotValue* stack = entry.key; SSACalculator::Variable* variable = entry.value; SSACalculator::Def* def = ssa.reachingDefAtHead(block, variable); if (def) mapping.set(stack, def->value()); } for (SSACalculator::Def* phiDef : ssa.phisForBlock(block)) { StackSlotValue* stack = variableToStack[phiDef->variable()->index()]; insertionSet.insertValue(0, phiDef->value()); mapping.set(stack, phiDef->value()); } for (unsigned valueIndex = 0; valueIndex < block->size(); ++valueIndex) { Value* value = block->at(valueIndex); value->performSubstitution(); switch (value->opcode()) { case Load: { if (StackSlotValue* stack = value->child(0)->as<StackSlotValue>()) { if (Value* replacement = mapping.get(stack)) value->replaceWithIdentity(replacement); } break; } case Store: { if (StackSlotValue* stack = value->child(1)->as<StackSlotValue>()) { if (stackToVariable.contains(stack)) { mapping.set(stack, value->child(0)); value->replaceWithNop(); } } break; } default: break; } } unsigned upsilonInsertionPoint = block->size() - 1; Origin upsilonOrigin = block->last()->origin(); for (BasicBlock* successorBlock : block->successorBlocks()) { for (SSACalculator::Def* phiDef : ssa.phisForBlock(successorBlock)) { Value* phi = phiDef->value(); SSACalculator::Variable* variable = phiDef->variable(); StackSlotValue* stack = variableToStack[variable->index()]; Value* mappedValue = mapping.get(stack); if (verbose) { dataLog( "Mapped value for ", *stack, " with successor Phi ", *phi, " at end of ", *block, ": ", pointerDump(mappedValue), "\n"); } if (!mappedValue) mappedValue = insertionSet.insertBottom(upsilonInsertionPoint, phi); insertionSet.insert<UpsilonValue>( upsilonInsertionPoint, upsilonOrigin, mappedValue, phi); } } insertionSet.execute(block); } // Finally, kill the stack slots. for (StackSlotValue* stack : variableToStack) stack->replaceWithNop(); if (verbose) { dataLog("B3 after SSA conversion:\n"); dataLog(proc); } return true; }
bool run() { RELEASE_ASSERT(m_graph.m_plan.mode == DFGMode); if (!Options::useFTLJIT()) return false; if (m_graph.m_profiledBlock->m_didFailFTLCompilation) return false; if (!Options::bytecodeRangeToFTLCompile().isInRange(m_graph.m_profiledBlock->instructionCount())) return false; #if ENABLE(FTL_JIT) FTL::CapabilityLevel level = FTL::canCompile(m_graph); if (level == FTL::CannotCompile) return false; if (!Options::useOSREntryToFTL()) level = FTL::CanCompile; m_graph.ensureNaturalLoops(); NaturalLoops& naturalLoops = *m_graph.m_naturalLoops; HashMap<const NaturalLoop*, unsigned> naturalLoopToLoopHint = buildNaturalLoopToLoopHintMap(naturalLoops); HashMap<unsigned, LoopHintDescriptor> tierUpHierarchy; InsertionSet insertionSet(m_graph); for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) { Node* node = block->at(nodeIndex); if (node->op() != LoopHint) continue; NodeOrigin origin = node->origin; bool canOSREnter = canOSREnterAtLoopHint(level, block, nodeIndex); NodeType tierUpType = CheckTierUpAndOSREnter; if (!canOSREnter) tierUpType = CheckTierUpInLoop; insertionSet.insertNode(nodeIndex + 1, SpecNone, tierUpType, origin); unsigned bytecodeIndex = origin.semantic.bytecodeIndex; if (canOSREnter) m_graph.m_plan.tierUpAndOSREnterBytecodes.append(bytecodeIndex); if (const NaturalLoop* loop = naturalLoops.innerMostLoopOf(block)) { LoopHintDescriptor descriptor; descriptor.canOSREnter = canOSREnter; const NaturalLoop* outerLoop = loop; while ((outerLoop = naturalLoops.innerMostOuterLoop(*outerLoop))) { auto it = naturalLoopToLoopHint.find(outerLoop); if (it != naturalLoopToLoopHint.end()) descriptor.osrEntryCandidates.append(it->value); } if (!descriptor.osrEntryCandidates.isEmpty()) tierUpHierarchy.add(bytecodeIndex, WTFMove(descriptor)); } break; } NodeAndIndex terminal = block->findTerminal(); if (terminal.node->isFunctionTerminal()) { insertionSet.insertNode( terminal.index, SpecNone, CheckTierUpAtReturn, terminal.node->origin); } insertionSet.execute(block); } // Add all the candidates that can be OSR Entered. for (auto entry : tierUpHierarchy) { Vector<unsigned> tierUpCandidates; for (unsigned bytecodeIndex : entry.value.osrEntryCandidates) { auto descriptorIt = tierUpHierarchy.find(bytecodeIndex); if (descriptorIt != tierUpHierarchy.end() && descriptorIt->value.canOSREnter) tierUpCandidates.append(bytecodeIndex); } if (!tierUpCandidates.isEmpty()) m_graph.m_plan.tierUpInLoopHierarchy.add(entry.key, WTFMove(tierUpCandidates)); } m_graph.m_plan.willTryToTierUp = true; return true; #else // ENABLE(FTL_JIT) RELEASE_ASSERT_NOT_REACHED(); return false; #endif // ENABLE(FTL_JIT) }
void spillEverything(Code& code) { PhaseScope phaseScope(code, "spillEverything"); // We want to know the set of registers used at every point in every basic block. IndexMap<BasicBlock, Vector<RegisterSet>> usedRegisters(code.size()); GPLiveness gpLiveness(code); FPLiveness fpLiveness(code); for (BasicBlock* block : code) { GPLiveness::LocalCalc gpLocalCalc(gpLiveness, block); FPLiveness::LocalCalc fpLocalCalc(fpLiveness, block); usedRegisters[block].resize(block->size() + 1); auto setUsedRegisters = [&] (unsigned index) { RegisterSet& registerSet = usedRegisters[block][index]; for (Tmp tmp : gpLocalCalc.live()) { if (tmp.isReg()) registerSet.set(tmp.reg()); } for (Tmp tmp : fpLocalCalc.live()) { if (tmp.isReg()) registerSet.set(tmp.reg()); } // Gotta account for dead assignments to registers. These may happen because the input // code is suboptimal. Inst::forEachDefWithExtraClobberedRegs<Tmp>( block->get(index - 1), block->get(index), [&] (const Tmp& tmp, Arg::Role, Arg::Type, Arg::Width) { if (tmp.isReg()) registerSet.set(tmp.reg()); }); }; for (unsigned instIndex = block->size(); instIndex--;) { setUsedRegisters(instIndex + 1); gpLocalCalc.execute(instIndex); fpLocalCalc.execute(instIndex); } setUsedRegisters(0); } // Allocate a stack slot for each tmp. Vector<StackSlot*> allStackSlots[Arg::numTypes]; for (unsigned typeIndex = 0; typeIndex < Arg::numTypes; ++typeIndex) { Vector<StackSlot*>& stackSlots = allStackSlots[typeIndex]; Arg::Type type = static_cast<Arg::Type>(typeIndex); stackSlots.resize(code.numTmps(type)); for (unsigned tmpIndex = code.numTmps(type); tmpIndex--;) stackSlots[tmpIndex] = code.addStackSlot(8, StackSlotKind::Anonymous); } InsertionSet insertionSet(code); for (BasicBlock* block : code) { for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { RegisterSet& setBefore = usedRegisters[block][instIndex]; RegisterSet& setAfter = usedRegisters[block][instIndex + 1]; Inst& inst = block->at(instIndex); // First try to spill directly. for (unsigned i = 0; i < inst.args.size(); ++i) { Arg& arg = inst.args[i]; if (arg.isTmp()) { if (arg.isReg()) continue; if (inst.admitsStack(i)) { StackSlot* stackSlot = allStackSlots[arg.type()][arg.tmpIndex()]; arg = Arg::stack(stackSlot); continue; } } } // Now fall back on spilling using separate Move's to load/store the tmp. inst.forEachTmp( [&] (Tmp& tmp, Arg::Role role, Arg::Type type, Arg::Width) { if (tmp.isReg()) return; StackSlot* stackSlot = allStackSlots[type][tmp.tmpIndex()]; Arg arg = Arg::stack(stackSlot); // Need to figure out a register to use. How we do that depends on the role. Reg chosenReg; switch (role) { case Arg::Use: case Arg::ColdUse: for (Reg reg : regsInPriorityOrder(type)) { if (!setBefore.get(reg)) { setBefore.set(reg); chosenReg = reg; break; } } break; case Arg::Def: case Arg::ZDef: for (Reg reg : regsInPriorityOrder(type)) { if (!setAfter.get(reg)) { setAfter.set(reg); chosenReg = reg; break; } } break; case Arg::UseDef: case Arg::UseZDef: case Arg::LateUse: case Arg::LateColdUse: case Arg::Scratch: case Arg::EarlyDef: for (Reg reg : regsInPriorityOrder(type)) { if (!setBefore.get(reg) && !setAfter.get(reg)) { setAfter.set(reg); setBefore.set(reg); chosenReg = reg; break; } } break; case Arg::UseAddr: // We will never UseAddr a Tmp, that doesn't make sense. RELEASE_ASSERT_NOT_REACHED(); break; } RELEASE_ASSERT(chosenReg); tmp = Tmp(chosenReg); Opcode move = type == Arg::GP ? Move : MoveDouble; if (Arg::isAnyUse(role) && role != Arg::Scratch) insertionSet.insert(instIndex, move, inst.origin, arg, tmp); if (Arg::isAnyDef(role)) insertionSet.insert(instIndex + 1, move, inst.origin, tmp, arg); }); } insertionSet.execute(block); } }
bool run() { ASSERT(m_graph.m_form == SSA); m_graph.clearFlagsOnAllNodes(NodeNeedsPhantom | NodeNeedsHardPhantom | NodeRelevantToOSR); for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; for (unsigned i = block->size(); i--;) { Node* node = block->at(i); if (node->op() == MovHint) node->child1()->mergeFlags(NodeRelevantToOSR); } } for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; unsigned sourceIndex = 0; unsigned targetIndex = 0; while (sourceIndex < block->size()) { Node* node = block->at(sourceIndex++); if (node->op() == HardPhantom || node->op() == Phantom || node->op() == Check) { for (unsigned i = 0; i < AdjacencyList::Size; ++i) { Edge edge = node->children.child(i); if (!edge) break; if (node->op() == HardPhantom) edge->mergeFlags(NodeNeedsHardPhantom); if ((edge->flags() & NodeRelevantToOSR) && node->op() == Phantom) { // A Phantom on a node that is RelevantToOSR means that we need to keep // a Phantom on this node instead of just having a Check. edge->mergeFlags(NodeNeedsPhantom); } if (edge.willHaveCheck()) continue; // Keep the type check. node->children.removeEdge(i--); } if (node->children.isEmpty()) { m_graph.m_allocator.free(node); continue; } node->convertToCheck(); } block->at(targetIndex++) = node; } block->resize(targetIndex); } InsertionSet insertionSet(m_graph); for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) { Node* node = block->at(nodeIndex); if (node->flags() & NodeNeedsHardPhantom) { insertionSet.insertNode( nodeIndex + 1, SpecNone, HardPhantom, node->origin, node->defaultEdge()); } else if (node->flags() & NodeNeedsPhantom) { insertionSet.insertNode( nodeIndex + 1, SpecNone, Phantom, node->origin, node->defaultEdge()); } } insertionSet.execute(block); } return true; }
void fixPartialRegisterStalls(Code& code) { if (!isX86()) return; PhaseScope phaseScope(code, "fixPartialRegisterStalls"); Vector<BasicBlock*> candidates; for (BasicBlock* block : code) { for (const Inst& inst : *block) { if (hasPartialXmmRegUpdate(inst)) { candidates.append(block); break; } } } // Fortunately, Partial Stalls are rarely used. Return early if no block // cares about them. if (candidates.isEmpty()) return; // For each block, this provides the distance to the last instruction setting each register // on block *entry*. IndexMap<BasicBlock, FPDefDistance> lastDefDistance(code.size()); // Blocks with dirty distance at head. IndexSet<BasicBlock> dirty; // First, we compute the local distance for each block and push it to the successors. for (BasicBlock* block : code) { FPDefDistance localDistance; unsigned distanceToBlockEnd = block->size(); for (Inst& inst : *block) updateDistances(inst, localDistance, distanceToBlockEnd); for (BasicBlock* successor : block->successorBlocks()) { if (lastDefDistance[successor].updateFromPrecessor(localDistance)) dirty.add(successor); } } // Now we propagate the minimums accross blocks. bool changed; do { changed = false; for (BasicBlock* block : code) { if (!dirty.remove(block)) continue; // Little shortcut: if the block is big enough, propagating it won't add any information. if (block->size() >= minimumSafeDistance) continue; unsigned blockSize = block->size(); FPDefDistance& blockDistance = lastDefDistance[block]; for (BasicBlock* successor : block->successorBlocks()) { if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) { dirty.add(successor); changed = true; } } } } while (changed); // Finally, update each block as needed. InsertionSet insertionSet(code); for (BasicBlock* block : candidates) { unsigned distanceToBlockEnd = block->size(); FPDefDistance& localDistance = lastDefDistance[block]; for (unsigned i = 0; i < block->size(); ++i) { Inst& inst = block->at(i); if (hasPartialXmmRegUpdate(inst)) { RegisterSet defs; RegisterSet uses; inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type) { if (tmp.isFPR()) { if (Arg::isDef(role)) defs.set(tmp.fpr()); if (Arg::isAnyUse(role)) uses.set(tmp.fpr()); } }); // We only care about values we define but not use. Otherwise we have to wait // for the value to be resolved anyway. defs.exclude(uses); defs.forEach([&] (Reg reg) { if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance) insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg)); }); } updateDistances(inst, localDistance, distanceToBlockEnd); } insertionSet.execute(block); } }
bool run() { RELEASE_ASSERT(m_graph.m_plan.mode == DFGMode); if (!Options::useFTLJIT()) return false; if (m_graph.m_profiledBlock->m_didFailFTLCompilation) return false; if (!Options::bytecodeRangeToFTLCompile().isInRange(m_graph.m_profiledBlock->instructionCount())) return false; #if ENABLE(FTL_JIT) FTL::CapabilityLevel level = FTL::canCompile(m_graph); if (level == FTL::CannotCompile) return false; if (!Options::useOSREntryToFTL()) level = FTL::CanCompile; // First we find all the loops that contain a LoopHint for which we cannot OSR enter. // We use that information to decide if we need CheckTierUpAndOSREnter or CheckTierUpWithNestedTriggerAndOSREnter. m_graph.ensureNaturalLoops(); NaturalLoops& naturalLoops = *m_graph.m_naturalLoops; HashSet<const NaturalLoop*> loopsContainingLoopHintWithoutOSREnter = findLoopsContainingLoopHintWithoutOSREnter(naturalLoops, level); bool canTierUpAndOSREnter = false; InsertionSet insertionSet(m_graph); for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) { Node* node = block->at(nodeIndex); if (node->op() != LoopHint) continue; NodeOrigin origin = node->origin; if (canOSREnterAtLoopHint(level, block, nodeIndex)) { canTierUpAndOSREnter = true; const NaturalLoop* loop = naturalLoops.innerMostLoopOf(block); if (loop && loopsContainingLoopHintWithoutOSREnter.contains(loop)) insertionSet.insertNode(nodeIndex + 1, SpecNone, CheckTierUpWithNestedTriggerAndOSREnter, origin); else insertionSet.insertNode(nodeIndex + 1, SpecNone, CheckTierUpAndOSREnter, origin); } else insertionSet.insertNode(nodeIndex + 1, SpecNone, CheckTierUpInLoop, origin); break; } NodeAndIndex terminal = block->findTerminal(); if (terminal.node->isFunctionTerminal()) { insertionSet.insertNode( terminal.index, SpecNone, CheckTierUpAtReturn, terminal.node->origin); } insertionSet.execute(block); } m_graph.m_plan.canTierUpAndOSREnter = canTierUpAndOSREnter; m_graph.m_plan.willTryToTierUp = true; return true; #else // ENABLE(FTL_JIT) RELEASE_ASSERT_NOT_REACHED(); return false; #endif // ENABLE(FTL_JIT) }
bool run() { RELEASE_ASSERT(m_graph.m_plan.mode == DFGMode); if (!Options::useFTLJIT()) return false; if (m_graph.m_profiledBlock->m_didFailFTLCompilation) return false; #if ENABLE(FTL_JIT) FTL::CapabilityLevel level = FTL::canCompile(m_graph); if (level == FTL::CannotCompile) return false; if (!Options::enableOSREntryToFTL()) level = FTL::CanCompile; InsertionSet insertionSet(m_graph); for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) { Node* node = block->at(nodeIndex); if (node->op() != LoopHint) continue; // We only put OSR checks for the first LoopHint in the block. Note that // more than one LoopHint could happen in cases where we did a lot of CFG // simplification in the bytecode parser, but it should be very rare. NodeOrigin origin = node->origin; if (level != FTL::CanCompileAndOSREnter || origin.semantic.inlineCallFrame) { insertionSet.insertNode( nodeIndex + 1, SpecNone, CheckTierUpInLoop, origin); break; } bool isAtTop = true; for (unsigned subNodeIndex = nodeIndex; subNodeIndex--;) { if (!block->at(subNodeIndex)->isSemanticallySkippable()) { isAtTop = false; break; } } if (!isAtTop) { insertionSet.insertNode( nodeIndex + 1, SpecNone, CheckTierUpInLoop, origin); break; } insertionSet.insertNode( nodeIndex + 1, SpecNone, CheckTierUpAndOSREnter, origin); break; } NodeAndIndex terminal = block->findTerminal(); if (terminal.node->op() == Return) { insertionSet.insertNode( terminal.index, SpecNone, CheckTierUpAtReturn, terminal.node->origin); } insertionSet.execute(block); } m_graph.m_plan.willTryToTierUp = true; return true; #else // ENABLE(FTL_JIT) RELEASE_ASSERT_NOT_REACHED(); return false; #endif // ENABLE(FTL_JIT) }
bool run() { RELEASE_ASSERT(m_graph.m_plan.mode == FTLForOSREntryMode); RELEASE_ASSERT(m_graph.m_form == ThreadedCPS); unsigned bytecodeIndex = m_graph.m_plan.osrEntryBytecodeIndex; RELEASE_ASSERT(bytecodeIndex); RELEASE_ASSERT(bytecodeIndex != UINT_MAX); // Needed by createPreHeader(). m_graph.m_dominators.computeIfNecessary(m_graph); CodeBlock* baseline = m_graph.m_profiledBlock; BasicBlock* target = 0; for (unsigned blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; unsigned nodeIndex = 0; Node* firstNode = block->at(0); while (firstNode->isSemanticallySkippable()) firstNode = block->at(++nodeIndex); if (firstNode->op() == LoopHint && firstNode->origin.semantic == CodeOrigin(bytecodeIndex)) { target = block; break; } } if (!target) { // This is a terrible outcome. It shouldn't often happen but it might // happen and so we should defend against it. If it happens, then this // compilation is a failure. return false; } BlockInsertionSet insertionSet(m_graph); BasicBlock* newRoot = insertionSet.insert(0, QNaN); NodeOrigin origin = target->at(0)->origin; Vector<Node*> locals(baseline->m_numCalleeRegisters); for (int local = 0; local < baseline->m_numCalleeRegisters; ++local) { Node* previousHead = target->variablesAtHead.local(local); if (!previousHead) continue; VariableAccessData* variable = previousHead->variableAccessData(); locals[local] = newRoot->appendNode( m_graph, variable->prediction(), ExtractOSREntryLocal, origin, OpInfo(variable->local().offset())); newRoot->appendNode( m_graph, SpecNone, MovHint, origin, OpInfo(variable->local().offset()), Edge(locals[local])); } for (int argument = 0; argument < baseline->numParameters(); ++argument) { Node* oldNode = target->variablesAtHead.argument(argument); if (!oldNode) { // Just for sanity, always have a SetArgument even if it's not needed. oldNode = m_graph.m_arguments[argument]; } Node* node = newRoot->appendNode( m_graph, SpecNone, SetArgument, origin, OpInfo(oldNode->variableAccessData())); m_graph.m_arguments[argument] = node; } for (int local = 0; local < baseline->m_numCalleeRegisters; ++local) { Node* previousHead = target->variablesAtHead.local(local); if (!previousHead) continue; VariableAccessData* variable = previousHead->variableAccessData(); Node* node = locals[local]; newRoot->appendNode( m_graph, SpecNone, SetLocal, origin, OpInfo(variable), Edge(node)); } newRoot->appendNode( m_graph, SpecNone, Jump, origin, OpInfo(createPreHeader(m_graph, insertionSet, target))); insertionSet.execute(); m_graph.resetReachability(); m_graph.killUnreachableBlocks(); return true; }