PhysReg forceAlloc(const SSATmp& tmp) { if (tmp.type() <= TBottom) return InvalidReg; auto inst = tmp.inst(); auto opc = inst->op(); auto const forceStkPtrs = [&] { switch (arch()) { case Arch::X64: return false; case Arch::ARM: return true; case Arch::PPC64: not_implemented(); break; } not_reached(); }(); if (forceStkPtrs && tmp.isA(TStkPtr)) { assert_flog( opc == DefSP || opc == Mov, "unexpected StkPtr dest from {}", opcodeName(opc) ); return rvmsp(); } // LdContActRec and LdAFWHActRec, loading a generator's AR, is the only time // we have a pointer to an AR that is not in rvmfp(). if (opc != LdContActRec && opc != LdAFWHActRec && tmp.isA(TFramePtr)) { return rvmfp(); } return InvalidReg; }
SSATmp* IRBuilder::preOptimizeCheckType(IRInstruction* inst) { SSATmp* src = inst->src(0); auto const oldType = src->type(); auto const newType = inst->typeParam(); if (oldType.isBoxed() && newType.isBoxed() && (oldType.not(newType) || newType < oldType)) { /* This CheckType serves to update the inner type hint for a boxed value, * which requires no runtime work. This depends on the type being boxed, * and constraining it with DataTypeCountness will do it. */ constrainValue(src, DataTypeCountness); return gen(AssertType, newType, src); } if (oldType.not(newType)) { /* This check will always fail. It's probably due to an incorrect * prediction. Generate a Jmp, and return src because * following instructions may depend on the output of CheckType * (they'll be DCEd later). Note that we can't use convertToJmp * because the return value isn't nullptr, so the original * instruction won't be inserted into the stream. */ gen(Jmp, inst->taken()); return src; } if (newType >= oldType) { /* The type of the src is the same or more refined than type, so the guard * is unnecessary. */ return src; } return nullptr; }
/* * Stores a ref (boxed value) to a local. Also handles unsetting a local. */ void TraceBuilder::genBindLoc(uint32_t id, SSATmp* newValue, bool doRefCount /* = true */) { Type trackedType = getLocalType(id); SSATmp* prevValue = 0; if (trackedType == Type::None) { if (doRefCount) { prevValue = gen(LdLoc, Type::Gen, LocalId(id), m_fpValue); } } else { prevValue = getLocalValue(id); assert(prevValue == nullptr || prevValue->type() == trackedType); if (prevValue == newValue) { // Silent store: local already contains value being stored // NewValue needs to be decref'ed if (!trackedType.notCounted() && doRefCount) { gen(DecRef, prevValue); } return; } if (trackedType.maybeCounted() && !prevValue && doRefCount) { prevValue = gen(LdLoc, trackedType, LocalId(id), m_fpValue); } } bool genStoreType = true; if ((trackedType.isBoxed() && newValue->type().isBoxed()) || (trackedType == newValue->type() && !trackedType.isString())) { // no need to store type with local value genStoreType = false; } gen(genStoreType ? StLoc : StLocNT, LocalId(id), m_fpValue, newValue); if (trackedType.maybeCounted() && doRefCount) { gen(DecRef, prevValue); } }
SSATmp* TraceBuilder::genLdLocAsCell(uint32_t id, Trace* exitTrace) { SSATmp* tmp = genLdLoc(id); Type type = tmp->type(); assert(type.isBoxed() || type.notBoxed()); if (!type.isBoxed()) { return tmp; } // Unbox tmp into a cell via a LdRef return gen(LdRef, type.innerType(), exitTrace, tmp); }
/* * Store a cell value to a local that might be boxed. */ SSATmp* TraceBuilder::genStLoc(uint32_t id, SSATmp* newValue, bool doRefCount, bool genStoreType, Trace* exit) { assert(!newValue->type().isBoxed()); /* * If prior value of local is a cell, then re-use genBindLoc. * Otherwise, if prior value of local is a ref: * * prevLocValue = LdLoc<T>{id} fp * prevValue = LdRef [prevLocValue] * newRef = StRef [prevLocValue], newValue * DecRef prevValue * -- track local value in newRef */ Type trackedType = getLocalType(id); assert(trackedType != Type::None); // tracelet guards guarantee a type if (trackedType.notBoxed()) { SSATmp* retVal = doRefCount ? gen(IncRef, newValue) : newValue; genBindLoc(id, newValue, doRefCount); return retVal; } assert(trackedType.isBoxed()); SSATmp* prevRef = getLocalValue(id); assert(prevRef == nullptr || prevRef->type() == trackedType); // prevRef is a ref if (prevRef == nullptr) { // prevRef = ldLoc prevRef = gen(LdLoc, trackedType, LocalId(id), m_fpValue); } SSATmp* prevValue = nullptr; if (doRefCount) { assert(exit); Type innerType = trackedType.innerType(); prevValue = gen(LdRef, innerType, exit, prevRef); } // stref [prevRef] = t1 Opcode opc = genStoreType ? StRef : StRefNT; gen(opc, prevRef, newValue); SSATmp* retVal = newValue; if (doRefCount) { retVal = gen(IncRef, newValue); gen(DecRef, prevValue); } return retVal; }
SSATmp* TraceBuilder::genBoxLoc(uint32_t id) { SSATmp* prevValue = genLdLoc(id); Type prevType = prevValue->type(); // Don't box if local's value already boxed if (prevType.isBoxed()) { return prevValue; } assert(prevType.notBoxed()); // The Box helper requires us to incref the values its boxing, but in // this case we don't need to incref prevValue because we are simply // transfering its refcount from the local to the box. if (prevValue->isA(Type::Uninit)) { // No box can ever contain Uninit, so promote it to InitNull here. prevValue = genDefInitNull(); } SSATmp* newValue = gen(Box, prevValue); gen(StLoc, LocalId(id), m_fpValue, newValue); return newValue; }
void CodeGenerator::cgIncRef(IRInstruction* inst) { SSATmp* src = inst->src(0); auto loc = srcLoc(0); Type type = src->type(); if (type.notCounted()) return; auto increfMaybeStatic = [&](Vout& v) { auto base = loc.reg(0); auto rCount = v.makeReg(); v << loadl{base[FAST_REFCOUNT_OFFSET], rCount}; if (!type.needsStaticBitCheck()) { auto count1 = v.makeReg(); v << addli{1, rCount, count1, v.makeReg()}; v << storel{count1, base[FAST_REFCOUNT_OFFSET]}; } else { auto const sf = v.makeReg(); v << cmpli{0, rCount, sf}; static_assert(UncountedValue < 0 && StaticValue < 0, ""); ifThen(v, CC_GE, sf, [&](Vout& v) { auto count1 = v.makeReg(); v << addli{1, rCount, count1, v.makeReg()}; v << storel{count1, base[FAST_REFCOUNT_OFFSET]}; }); } }; auto& v = vmain(); if (type.isKnownDataType()) { assert(IS_REFCOUNTED_TYPE(type.toDataType())); increfMaybeStatic(v); } else { auto const sf = v.makeReg(); v << cmpli{KindOfRefCountThreshold, loc.reg(1), sf}; ifThen(v, CC_G, sf, [&](Vout& v) { increfMaybeStatic(v); }); } }
/* * reoptimize() runs a trace through a second pass of TraceBuilder * optimizations, like this: * * reset state. * move all blocks to a temporary list. * compute immediate dominators. * for each block in trace order: * if we have a snapshot state for this block: * clear cse entries that don't dominate this block. * use snapshot state. * move all instructions to a temporary list. * for each instruction: * optimizeWork - do CSE and simplify again * if not simplified: * append existing instruction and update state. * else: * if the instruction has a result, insert a mov from the * simplified tmp to the original tmp and discard the instruction. * if the last conditional branch was turned into a jump, remove the * fall-through edge to the next block. */ void TraceBuilder::reoptimize() { FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n"); SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); }; assert(m_curTrace->isMain()); assert(m_savedTraces.empty()); m_state.setEnableCse(RuntimeOption::EvalHHIRCse); m_enableSimplification = RuntimeOption::EvalHHIRSimplification; if (!m_state.enableCse() && !m_enableSimplification) return; always_assert(!m_inReoptimize); m_inReoptimize = true; BlockList sortedBlocks = rpoSortCfg(m_unit); auto const idoms = findDominators(m_unit, sortedBlocks); m_state.clear(); auto& traceBlocks = m_curTrace->blocks(); BlockList blocks(traceBlocks.begin(), traceBlocks.end()); traceBlocks.clear(); for (auto* block : blocks) { assert(block->trace() == m_curTrace); FTRACE(5, "Block: {}\n", block->id()); assert(m_curTrace->isMain()); m_state.startBlock(block); m_curTrace->push_back(block); auto instructions = std::move(block->instrs()); assert(block->empty()); while (!instructions.empty()) { auto *inst = &instructions.front(); instructions.pop_front(); m_state.setMarker(inst->marker()); // merging state looks at the current marker, and optimizeWork // below may create new instructions. Use the marker from this // instruction. assert(inst->marker().valid()); setMarker(inst->marker()); auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs! if (!tmp) { // Could not optimize; keep the old instruction appendInstruction(inst, block); m_state.update(inst); continue; } SSATmp* dst = inst->dst(); if (dst->type() != Type::None && dst != tmp) { // The result of optimization has a different destination than the inst. // Generate a mov(tmp->dst) to get result into dst. If we get here then // assume the last instruction in the block isn't a guard. If it was, // we would have to insert the mov on the fall-through edge. assert(block->empty() || !block->back().isBlockEnd()); IRInstruction* mov = m_unit.mov(dst, tmp, inst->marker()); appendInstruction(mov, block); m_state.update(mov); } // Not re-adding inst; remove the inst->taken edge if (inst->taken()) inst->setTaken(nullptr); } if (block->empty()) { // If all the instructions in the block were optimized away, remove it // from the trace. auto it = traceBlocks.end(); --it; assert(*it == block); m_curTrace->unlink(it); } else { if (block->back().isTerminal()) { // Could have converted a conditional branch to Jmp; clear next. block->setNext(nullptr); } m_state.finishBlock(block); } } }
void LinearScan::allocRegToInstruction(InstructionList::iterator it) { IRInstruction* inst = &*it; dumpIR<IRInstruction, kExtraLevel>(inst, "allocating to instruction"); // Reload all source operands if necessary. // Mark registers as unpinned. for (int regNo = 0; regNo < kNumRegs; ++regNo) { m_regs[regNo].m_pinned = false; } smart::vector<bool> needsReloading(inst->numSrcs(), true); for (uint32_t i = 0; i < inst->numSrcs(); ++i) { SSATmp* tmp = inst->src(i); int32_t slotId = m_spillSlots[tmp]; if (slotId == -1) { needsReloading[i] = false; } else if ((tmp = m_slots[slotId].latestReload)) { needsReloading[i] = false; inst->setSrc(i, tmp); } if (!needsReloading[i]) { for (int i = 0, n = m_allocInfo[tmp].numAllocatedRegs(); i < n; ++i) { m_regs[int(m_allocInfo[tmp].reg(i))].m_pinned = true; } } } for (uint32_t i = 0; i < inst->numSrcs(); ++i) { if (needsReloading[i]) { SSATmp* tmp = inst->src(i); int32_t slotId = m_spillSlots[tmp]; // <tmp> is spilled, and not reloaded. // Therefore, We need to reload the value into a new SSATmp. // Insert the Reload instruction. SSATmp* spillTmp = m_slots[slotId].spillTmp; IRInstruction* reload = m_unit.gen(Reload, inst->marker(), spillTmp); inst->block()->insert(it, reload); // Create <reloadTmp> which inherits <tmp>'s slot ID and // <spillTmp>'s last use ID. // Replace <tmp> with <reloadTmp> in <inst>. SSATmp* reloadTmp = reload->dst(); m_uses[reloadTmp].lastUse = m_uses[spillTmp].lastUse; m_spillSlots[reloadTmp] = slotId; inst->setSrc(i, reloadTmp); // reloadTmp and tmp share the same type. Since it was spilled, it // must be using its entire needed-count of registers. assert(reloadTmp->type() == tmp->type()); for (int locIndex = 0; locIndex < tmp->numNeededRegs();) { locIndex += allocRegToTmp(reloadTmp, locIndex); } // Remember this reload tmp in case we can reuse it in later blocks. m_slots[slotId].latestReload = reloadTmp; dumpIR<IRInstruction, kExtraLevel>(reload, "created reload"); } } freeRegsAtId(m_linear[inst]); // Update next native. if (nextNative() == inst) { assert(!m_natives.empty()); m_natives.pop_front(); computePreColoringHint(); } Range<SSATmp*> dsts = inst->dsts(); if (dsts.empty()) return; Opcode opc = inst->op(); if (opc == DefMIStateBase) { assert(dsts[0].isA(Type::PtrToCell)); assignRegToTmp(&m_regs[int(rsp)], &dsts[0], 0); return; } for (SSATmp& dst : dsts) { for (int numAllocated = 0, n = dst.numNeededRegs(); numAllocated < n; ) { // LdRaw, loading a generator's embedded AR, is the only time we have a // pointer to an AR that is not in rVmFp. const bool abnormalFramePtr = (opc == LdRaw && inst->src(1)->getValInt() == RawMemSlot::ContARPtr); // Note that the point of StashGeneratorSP is to save a StkPtr // somewhere other than rVmSp. (TODO(#2288359): make rbx not // special.) const bool abnormalStkPtr = opc == StashGeneratorSP; if (!abnormalStkPtr && dst.isA(Type::StkPtr)) { assert(opc == DefSP || opc == ReDefSP || opc == ReDefGeneratorSP || opc == PassSP || opc == DefInlineSP || opc == Call || opc == CallArray || opc == SpillStack || opc == SpillFrame || opc == CufIterSpillFrame || opc == ExceptionBarrier || opc == RetAdjustStack || opc == InterpOne || opc == InterpOneCF || opc == GenericRetDecRefs || opc == CheckStk || opc == GuardStk || opc == AssertStk || opc == CastStk || opc == CoerceStk || opc == SideExitGuardStk || MInstrEffects::supported(opc)); assignRegToTmp(&m_regs[int(rVmSp)], &dst, 0); numAllocated++; continue; } if (!abnormalFramePtr && dst.isA(Type::FramePtr)) { assignRegToTmp(&m_regs[int(rVmFp)], &dst, 0); numAllocated++; continue; } // Generally speaking, StkPtrs are pretty special due to // tracelet ABI registers. Keep track here of the allowed uses // that don't use the above allocation. assert(!dst.isA(Type::FramePtr) || abnormalFramePtr); assert(!dst.isA(Type::StkPtr) || abnormalStkPtr); if (!RuntimeOption::EvalHHIRDeadCodeElim || m_uses[dst].lastUse != 0) { numAllocated += allocRegToTmp(&dst, numAllocated); } else { numAllocated++; } } } if (!RuntimeOption::EvalHHIRDeadCodeElim) { // if any outputs were unused, free regs now. freeRegsAtId(m_linear[inst]); } }
/* * reoptimize() runs a trace through a second pass of TraceBuilder * optimizations, like this: * * reset state. * move all blocks to a temporary list. * compute immediate dominators. * for each block in trace order: * if we have a snapshot state for this block: * clear cse entries that don't dominate this block. * use snapshot state. * move all instructions to a temporary list. * for each instruction: * optimizeWork - do CSE and simplify again * if not simplified: * append existing instruction and update state. * else: * if the instruction has a result, insert a mov from the * simplified tmp to the original tmp and discard the instruction. * if the last conditional branch was turned into a jump, remove the * fall-through edge to the next block. */ void TraceBuilder::reoptimize() { FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n"); SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); }; assert(m_curTrace == m_mainTrace.get()); assert(m_savedTraces.empty()); assert(m_inlineSavedStates.empty()); m_enableCse = RuntimeOption::EvalHHIRCse; m_enableSimplification = RuntimeOption::EvalHHIRSimplification; if (!m_enableCse && !m_enableSimplification) return; if (m_mainTrace->blocks().size() > RuntimeOption::EvalHHIRSimplificationMaxBlocks) { // TODO CSEHash::filter is very slow for large block sizes // t2135219 should address that return; } BlockList sortedBlocks = rpoSortCfg(m_mainTrace.get(), m_irFactory); auto const idoms = findDominators(sortedBlocks); clearTrackedState(); auto blocks = std::move(m_mainTrace->blocks()); assert(m_mainTrace->blocks().empty()); while (!blocks.empty()) { Block* block = blocks.front(); blocks.pop_front(); assert(block->trace() == m_mainTrace.get()); FTRACE(5, "Block: {}\n", block->id()); m_mainTrace->push_back(block); if (m_snapshots[block]) { useState(block); } auto instructions = std::move(block->instrs()); assert(block->empty()); while (!instructions.empty()) { auto *inst = &instructions.front(); instructions.pop_front(); // last attempt to elide ActRecs, if we still need the InlineFPAnchor // it will be added back to the trace when we re-add instructions that // rely on it if (inst->op() == InlineFPAnchor) { continue; } // merging state looks at the current marker, and optimizeWork // below may create new instructions. Use the marker from this // instruction. assert(inst->marker().valid()); setMarker(inst->marker()); auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs! if (!tmp) { // Could not optimize; keep the old instruction appendInstruction(inst, block); updateTrackedState(inst); continue; } SSATmp* dst = inst->dst(); if (dst->type() != Type::None && dst != tmp) { // The result of optimization has a different destination than the inst. // Generate a mov(tmp->dst) to get result into dst. If we get here then // assume the last instruction in the block isn't a guard. If it was, // we would have to insert the mov on the fall-through edge. assert(block->empty() || !block->back()->isBlockEnd()); IRInstruction* mov = m_irFactory.mov(dst, tmp, inst->marker()); appendInstruction(mov, block); updateTrackedState(mov); } // Not re-adding inst; remove the inst->taken edge if (inst->taken()) inst->setTaken(nullptr); } if (block->back()->isTerminal()) { // Could have converted a conditional branch to Jmp; clear next. block->setNext(nullptr); } else { // if the last instruction was a branch, we already saved state // for the target in updateTrackedState(). Now save state for // the fall-through path. saveState(block->next()); } } }
void CodeGenerator::cgGuardRefs(IRInstruction* inst) { assert(inst->numSrcs() == 5); SSATmp* funcPtrTmp = inst->src(0); SSATmp* nParamsTmp = inst->src(1); SSATmp* firstBitNumTmp = inst->src(2); SSATmp* mask64Tmp = inst->src(3); SSATmp* vals64Tmp = inst->src(4); // Get values in place assert(funcPtrTmp->type() == Type::Func); auto funcPtrReg = x2a(curOpd(funcPtrTmp).reg()); assert(funcPtrReg.IsValid()); assert(nParamsTmp->type() == Type::Int); auto nParamsReg = x2a(curOpd(nParamsTmp).reg()); assert(nParamsReg.IsValid() || nParamsTmp->isConst()); assert(firstBitNumTmp->isConst() && firstBitNumTmp->type() == Type::Int); uint32_t firstBitNum = (uint32_t)(firstBitNumTmp->getValInt()); assert(mask64Tmp->type() == Type::Int); assert(mask64Tmp->isConst()); auto mask64Reg = x2a(curOpd(mask64Tmp).reg()); assert(mask64Reg.IsValid() || mask64Tmp->inst()->op() != LdConst); uint64_t mask64 = mask64Tmp->getValInt(); assert(mask64); assert(vals64Tmp->type() == Type::Int); assert(vals64Tmp->isConst()); auto vals64Reg = x2a(curOpd(vals64Tmp).reg()); assert(vals64Reg.IsValid() || vals64Tmp->inst()->op() != LdConst); uint64_t vals64 = vals64Tmp->getValInt(); assert((vals64 & mask64) == vals64); auto const destSK = SrcKey(curFunc(), m_unit.bcOff()); auto const destSR = m_tx64->getSrcRec(destSK); auto thenBody = [&] { auto bitsOff = sizeof(uint64_t) * (firstBitNum / 64); auto cond = CC_NE; auto bitsPtrReg = rAsm; if (firstBitNum == 0) { bitsOff = Func::refBitValOff(); bitsPtrReg = funcPtrReg; } else { m_as. Ldr (bitsPtrReg, funcPtrReg[Func::sharedOff()]); bitsOff -= sizeof(uint64_t); } // Don't need the bits pointer after this point auto bitsReg = rAsm; // Load the bits m_as. Ldr (bitsReg, bitsPtrReg[bitsOff]); // Mask the bits. There are restrictions on what can be encoded as an // immediate in ARM's logical instructions, and if they're not met, we'll // have to use a register. if (vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize)) { m_as. And (bitsReg, bitsReg, mask64); } else { if (mask64Reg.IsValid()) { m_as.And (bitsReg, bitsReg, mask64Reg); } else { m_as.Mov (rAsm2, mask64); m_as.And (bitsReg, bitsReg, rAsm2); } } // Now do the compare. There are also restrictions on immediates in // arithmetic instructions (of which Cmp is one; it's just a subtract that // sets flags), so same deal as with the mask immediate above. if (vixl::Assembler::IsImmArithmetic(vals64)) { m_as. Cmp (bitsReg, vals64); } else { if (vals64Reg.IsValid()) { m_as.Cmp (bitsReg, vals64Reg); } else { m_as.Mov (rAsm2, vals64); m_as.Cmp (bitsReg, rAsm2); } } destSR->emitFallbackJump(m_mainCode, cond); }; if (firstBitNum == 0) { assert(!nParamsReg.IsValid()); // This is the first 64 bits. No need to check // nParams. thenBody(); } else { assert(nParamsReg.IsValid()); // Check number of args... m_as. Cmp (nParamsReg, firstBitNum); if (vals64 != 0 && vals64 != mask64) { // If we're beyond nParams, then either all params // are refs, or all params are non-refs, so if vals64 // isn't 0 and isnt mask64, there's no possibility of // a match destSR->emitFallbackJump(m_mainCode, CC_LE); thenBody(); } else { ifThenElse(m_as, vixl::gt, thenBody, /* else */ [&] { // If not special builtin... m_as. Ldr (rAsm, funcPtrReg[Func::attrsOff()]); m_as. Tst (rAsm, AttrVariadicByRef); destSR->emitFallbackJump(m_mainCode, vals64 ? CC_Z : CC_NZ); }); } } }
/* * reoptimize() runs a trace through a second pass of TraceBuilder * optimizations, like this: * * reset state. * move all blocks to a temporary list. * compute immediate dominators. * for each block in trace order: * if we have a snapshot state for this block: * clear cse entries that don't dominate this block. * use snapshot state. * move all instructions to a temporary list. * for each instruction: * optimizeWork - do CSE and simplify again * if not simplified: * append existing instruction and update state. * else: * if the instruction has a result, insert a mov from the * simplified tmp to the original tmp and discard the instruction. * if the last conditional branch was turned into a jump, remove the * fall-through edge to the next block. */ void TraceBuilder::reoptimize() { m_enableCse = RuntimeOption::EvalHHIRCse; m_enableSimplification = RuntimeOption::EvalHHIRSimplification; if (!m_enableCse && !m_enableSimplification) return; if (m_trace->getBlocks().size() > RuntimeOption::EvalHHIRSimplificationMaxBlocks) { // TODO CSEHash::filter is very slow for large block sizes // t2135219 should address that return; } BlockList sortedBlocks = sortCfg(m_trace.get(), m_irFactory); IdomVector idoms = findDominators(sortedBlocks); clearTrackedState(); auto blocks = std::move(m_trace->getBlocks()); assert(m_trace->getBlocks().empty()); while (!blocks.empty()) { Block* block = blocks.front(); blocks.pop_front(); assert(block->getTrace() == m_trace.get()); m_trace->push_back(block); if (m_snapshots[block]) { useState(block); m_cseHash.filter(block, idoms); } auto instructions = std::move(block->getInstrs()); assert(block->empty()); while (!instructions.empty()) { auto *inst = &instructions.front(); instructions.pop_front(); SSATmp* tmp = optimizeWork(inst); // Can generate new instrs! if (!tmp) { // Could not optimize; keep the old instruction appendInstruction(inst, block); updateTrackedState(inst); continue; } SSATmp* dst = inst->getDst(); if (dst->type() != Type::None && dst != tmp) { // The result of optimization has a different destination than the inst. // Generate a mov(tmp->dst) to get result into dst. If we get here then // assume the last instruction in the block isn't a guard. If it was, // we would have to insert the mov on the fall-through edge. assert(!block->back()->isBlockEnd()); IRInstruction* mov = m_irFactory.mov(dst, tmp); appendInstruction(mov, block); updateTrackedState(mov); } // Not re-adding inst; remove the inst->taken edge if (inst->getTaken()) inst->setTaken(nullptr); } if (block->back()->isTerminal()) { // Could have converted a conditional branch to Jmp; clear next. block->setNext(nullptr); } else { // if the last instruction was a branch, we already saved state // for the target in updateTrackedState(). Now save state for // the fall-through path. saveState(block->getNext()); } } }
/* * reoptimize() runs a trace through a second pass of TraceBuilder * optimizations, like this: * * reset state. * move all blocks to a temporary list. * compute immediate dominators. * for each block in trace order: * if we have a snapshot state for this block: * clear cse entries that don't dominate this block. * use snapshot state. * move all instructions to a temporary list. * for each instruction: * optimizeWork - do CSE and simplify again * if not simplified: * append existing instruction and update state. * else: * if the instruction has a result, insert a mov from the * simplified tmp to the original tmp and discard the instruction. * if the last conditional branch was turned into a jump, remove the * fall-through edge to the next block. */ void TraceBuilder::reoptimize() { FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n"); SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); }; assert(m_savedBlocks.empty()); assert(!m_curWhere); m_state.setEnableCse(RuntimeOption::EvalHHIRCse); m_enableSimplification = RuntimeOption::EvalHHIRSimplification; if (!m_state.enableCse() && !m_enableSimplification) return; setConstrainGuards(false); BlockList sortedBlocks = rpoSortCfg(m_unit); auto const idoms = findDominators(m_unit, sortedBlocks); m_state.clear(); for (auto* block : rpoSortCfg(m_unit)) { FTRACE(5, "Block: {}\n", block->id()); m_state.startBlock(block); m_curBlock = block; auto instructions = std::move(block->instrs()); assert(block->empty()); while (!instructions.empty()) { auto *inst = &instructions.front(); instructions.pop_front(); // merging state looks at the current marker, and optimizeWork // below may create new instructions. Use the marker from this // instruction. assert(inst->marker().valid()); setMarker(inst->marker()); auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs! if (!tmp) { // Could not optimize; keep the old instruction appendInstruction(inst); continue; } SSATmp* dst = inst->dst(); if (dst->type() != Type::None && dst != tmp) { // The result of optimization has a different destination than the inst. // Generate a mov(tmp->dst) to get result into dst. If we get here then // assume the last instruction in the block isn't a guard. If it was, // we would have to insert the mov on the fall-through edge. assert(block->empty() || !block->back().isBlockEnd()); IRInstruction* mov = m_unit.mov(dst, tmp, inst->marker()); appendInstruction(mov); } if (inst->isBlockEnd()) { // Not re-adding inst; replace it with a jump to the next block. auto next = inst->next(); appendInstruction(m_unit.gen(Jmp, inst->marker(), next)); inst->setTaken(nullptr); inst->setNext(nullptr); } } assert(!block->empty()); m_state.finishBlock(block); } }
void CodeGenerator::cgGuardRefs(IRInstruction* inst) { assert(inst->numSrcs() == 5); SSATmp* funcPtrTmp = inst->src(0); SSATmp* nParamsTmp = inst->src(1); SSATmp* firstBitNumTmp = inst->src(2); SSATmp* mask64Tmp = inst->src(3); SSATmp* vals64Tmp = inst->src(4); // Get values in place assert(funcPtrTmp->type() == Type::Func); auto funcPtrReg = x2a(m_regs[funcPtrTmp].reg()); assert(funcPtrReg.IsValid()); assert(nParamsTmp->type() == Type::Int); auto nParamsReg = x2a(m_regs[nParamsTmp].reg()); assert(nParamsReg.IsValid() || nParamsTmp->isConst()); assert(firstBitNumTmp->isConst() && firstBitNumTmp->type() == Type::Int); uint32_t firstBitNum = (uint32_t)(firstBitNumTmp->getValInt()); assert(mask64Tmp->type() == Type::Int); assert(mask64Tmp->isConst()); auto mask64Reg = x2a(m_regs[mask64Tmp].reg()); assert(mask64Reg.IsValid() || mask64Tmp->inst()->op() != LdConst); uint64_t mask64 = mask64Tmp->getValInt(); assert(mask64); assert(vals64Tmp->type() == Type::Int); assert(vals64Tmp->isConst()); auto vals64Reg = x2a(m_regs[vals64Tmp].reg()); assert(vals64Reg.IsValid() || vals64Tmp->inst()->op() != LdConst); uint64_t vals64 = vals64Tmp->getValInt(); assert((vals64 & mask64) == vals64); auto const destSK = SrcKey(curFunc(), m_unit.bcOff()); auto const destSR = m_tx64->getSrcRec(destSK); auto thenBody = [&] { auto bitsOff = sizeof(uint64_t) * (firstBitNum / 64); auto cond = CC_NE; auto bitsPtrReg = rAsm; if (firstBitNum == 0) { bitsOff = Func::refBitValOff(); bitsPtrReg = funcPtrReg; } else { m_as. Ldr (bitsPtrReg, funcPtrReg[Func::sharedOff()]); bitsOff -= sizeof(uint64_t); } if (vals64 == 0 || (mask64 & (mask64 - 1)) == 0) { // If vals64 is zero, or we're testing a single // bit, we can get away with a single test, // rather than mask-and-compare m_as. Ldr (rAsm2, bitsPtrReg[bitsOff]); if (mask64Reg.IsValid()) { m_as. Tst (rAsm2, mask64Reg); } else { assert(vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize)); m_as. Tst (rAsm2, mask64); } if (vals64) cond = CC_E; } else { auto bitsValReg = rAsm; m_as. Ldr (bitsValReg, bitsPtrReg[bitsOff]); if (debug) bitsPtrReg = Register(); // bitsValReg <- bitsValReg & mask64 // NB: these 'And' ops don't set flags. They don't need to. if (mask64Reg.IsValid()) { m_as. And (bitsValReg, bitsValReg, mask64Reg); } else { // There are restrictions on the immediates that can be encoded into // logical ops. If the mask doesn't meet those restrictions, we have to // load it into a register first. if (vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize)) { m_as.And (bitsValReg, bitsValReg, mask64); } else { m_as.Mov (rAsm2, mask64); m_as.And (bitsValReg, bitsValReg, rAsm2); } } // If bitsValReg != vals64, then goto Exit if (vals64Reg.IsValid()) { m_as. Cmp (bitsValReg, vals64Reg); } else { m_as. Cmp (bitsValReg, vals64); } } destSR->emitFallbackJump(m_mainCode, cond); }; if (firstBitNum == 0) { assert(!nParamsReg.IsValid()); // This is the first 64 bits. No need to check // nParams. thenBody(); } else { assert(nParamsReg.IsValid()); // Check number of args... m_as. Cmp (nParamsReg, firstBitNum); if (vals64 != 0 && vals64 != mask64) { // If we're beyond nParams, then either all params // are refs, or all params are non-refs, so if vals64 // isn't 0 and isnt mask64, there's no possibility of // a match destSR->emitFallbackJump(m_mainCode, CC_LE); thenBody(); } else { ifThenElse(m_as, vixl::gt, thenBody, /* else */ [&] { // If not special builtin... m_as. Ldr (rAsm, funcPtrReg[Func::attrsOff()]); m_as. Tst (rAsm, AttrVariadicByRef); destSR->emitFallbackJump(m_mainCode, vals64 ? CC_Z : CC_NZ); }); } } }