/** * Search for direct loads in the taken side of a guard * * @param firstBlock The guard's branch destination * @param coldPathLoads BitVector of symbol reference numbers for any direct loads seen until the merge back to mainline */ static void collectColdPathLoads(TR::Block* firstBlock, TR_BitVector &coldPathLoads) { TR_Stack<TR::Block*> blocksToCheck(TR::comp()->trMemory(), 8, false, stackAlloc); blocksToCheck.push(firstBlock); TR::NodeChecklist checklist(TR::comp()); coldPathLoads.empty(); while (!blocksToCheck.isEmpty()) { TR::Block *block = blocksToCheck.pop(); for (TR::TreeTop *tt = block->getFirstRealTreeTop(); tt->getNode()->getOpCodeValue() != TR::BBEnd; tt = tt->getNextTreeTop()) collectDirectLoads(tt->getNode(), coldPathLoads, checklist); // Search for any successors that have not merged with the mainline for (auto itr = block->getSuccessors().begin(), end = block->getSuccessors().end(); itr != end; ++itr) { TR::Block *dest = (*itr)->getTo()->asBlock(); if (dest != TR::comp()->getFlowGraph()->getEnd() && dest->getPredecessors().size() == 1) blocksToCheck.push(dest); } } }
// This opt tries to reduce merge backs from cold code that are the result of inliner // gnerated nopable virtual guards // It looks for one basic pattern // // guard1 -> cold1 // BBEND // BBSTART // guard2 -> cold2 // if guard1 is the guard for a method which calls the method guard2 protects or cold1 is // a predecessor of cold2 (a situation commonly greated by virtual guard tail splitter) we // can transform the guards as follows when guard1 and guard2 a // guard1 -> cold1 // BBEND // BBSTART // guard2 -> cold1 // This is safe because there are no trees between the guards and calling the caller will // result in the call to the callee if we need to patch guard2. cold2 and its mergebacks // can then be eliminated // // In addition this opt will try to move guard2 up from the end of a block to the // start of the block. We can do this if guard2 is an HCR guard and there is no GC point // between BBSTART and guard2 since HCR is a stop-the-world event. // // Finally, there is a simple tail splitting step run before the analysis of a guard if we // detect that the taken side of the guard merges back in the next block - this happens // for some empty methods and is common for Object.<init> at the top of constructors. int32_t TR_VirtualGuardHeadMerger::perform() { static char *disableVGHeadMergerTailSplitting = feGetEnv("TR_DisableVGHeadMergerTailSplitting"); TR::CFG *cfg = comp()->getFlowGraph(); // Cache the loads for the outer guard's cold path TR_BitVector coldPathLoads(comp()->trMemory()->currentStackRegion()); TR_BitVector privArgSymRefs(comp()->trMemory()->currentStackRegion()); bool evaluatedColdPathLoads = false; for (TR::Block *block = optimizer()->getMethodSymbol()->getFirstTreeTop()->getNode()->getBlock(); block; block = block->getNextBlock()) { TR::Node *guard1 = block->getLastRealTreeTop()->getNode(); if (isMergeableGuard(guard1)) { if (trace()) traceMsg(comp(), "Found mergeable guard in block_%d\n", block->getNumber()); TR::Block *cold1 = guard1->getBranchDestination()->getEnclosingBlock(); // check for an immediate merge back from the cold block and // tail split one block if we can - we only handle splitting a block // ending in a fallthrough, a branch or a goto for now for simplicity if (!disableVGHeadMergerTailSplitting && (cold1->getSuccessors().size() == 1) && cold1->hasSuccessor(block->getNextBlock()) && cold1->getLastRealTreeTop()->getNode()->getOpCode().isGoto()) { // TODO handle moving code earlier in the block down below the guard // tail split if ((block->getNextBlock()->getSuccessors().size() == 1) || ((block->getNextBlock()->getSuccessors().size() == 2) && block->getNextBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) && performTransformation(comp(), "%sCloning block_%d and placing clone after block_%d to reduce HCR guard nops\n", OPT_DETAILS, block->getNextBlock()->getNumber(), cold1->getNumber())) tailSplitBlock(block, cold1); } // guard motion is fairly complex but what we want to achieve around guard1 is a sequence // of relocated privarg blocks, followed by a sequence of runtime patchable guards going to // guard1's cold block, followed by a sequence of stop-the-world guards going to guard1's // cold block // // The following code is to setup the various insert points based on the following diagrams // of basic blocks: // // start: setup: end result after moving runtime guard' // | | +-------+ <-- privargIns // | | <-- privargIns | // +-------+ <-- runtimeIns +-------+ // | | | | Guard'| // | | V +-------+ <-- runtimeIns // +-------+ +-------+ | // | Guard | | Guard | V // +-------+ +-------+ <-- HCRIns +-------+ // | ===> | ===> | Guard | // V V +-------+ <-- HCRIns // +-------+ +-------+ | // | | | | V // | | | | +-------+ // // Note we always split the block - this may create an empty block but preserves the incoming // control flow we leave the rest to block extension to fix later block = block->split(block->getLastRealTreeTop(), cfg, true, false); TR::Block *privargIns = block->getPrevBlock(); TR::Block *runtimeIns = block->getPrevBlock(); TR::Block *HCRIns = block; // New outer guard so cold paths must be evaluated evaluatedColdPathLoads = false; // scan for candidate guards to merge with guard1 identified above for (TR::Block *nextBlock = block->getNextBlock(); nextBlock; nextBlock = nextBlock->getNextBlock()) { if (!(nextBlock->getPredecessors().size() == 1) || !nextBlock->hasPredecessor(block)) { break; } TR::TreeTop *guard2Tree = NULL; if (isMergeableGuard(nextBlock->getFirstRealTreeTop()->getNode())) { guard2Tree = nextBlock->getFirstRealTreeTop(); } else if (isMergeableGuard(nextBlock->getLastRealTreeTop()->getNode())) { guard2Tree = nextBlock->getLastRealTreeTop(); } else break; TR::Node *guard2 = guard2Tree->getNode(); TR::Block *guard2Block = nextBlock; // It is not possible to shift an OSR guard unless the destination is already an OSR point // as the necessary OSR state will not be available if (guard2->isOSRGuard() && !guard1->isOSRGuard()) break; TR::Block *insertPoint = isStopTheWorldGuard(guard2) ? HCRIns : runtimeIns; if (!safeToMoveGuard(insertPoint, guard2Tree, guard1->getBranchDestination(), privArgSymRefs)) break; // now we figure out if we can redirect guard2 to guard1's cold block // ie can we do the head merge TR::Block *cold2 = guard2->getBranchDestination()->getEnclosingBlock(); if (guard1->getInlinedSiteIndex() == guard2->getInlinedSiteIndex()) { if (trace()) traceMsg(comp(), " Guard1 [%p] is guarding the same call as Guard2 [%p] - proceeding with guard merging\n", guard1, guard2); } else if (guard2->getInlinedSiteIndex() > -1 && guard1->getInlinedSiteIndex() == TR::comp()->getInlinedCallSite(guard2->getInlinedSiteIndex())._byteCodeInfo.getCallerIndex()) { if (trace()) traceMsg(comp(), " Guard1 [%p] is the caller of Guard2 [%p] - proceeding with guard merging\n", guard1, guard2); } else if ((cold1->getSuccessors().size() == 1) && cold1->hasSuccessor(cold2)) { if (trace()) traceMsg(comp(), " Guard1 cold destination block_%d has guard2 cold destination block_%d as its only successor - proceeding with guard merging\n", cold1->getNumber(), cold2->getNumber()); } else { if (trace()) traceMsg(comp(), " Cold1 block_%d and cold2 block_%d of guard2 [%p] in unknown relationship - abandon the merge attempt\n", cold1->getNumber(), cold2->getNumber(), guard2); break; } // Runtime guards will shift their privargs, so it is necessary to check such a move is safe // This is possible if a privarg temp was recycled for the inner call site, with a prior use as an // argument for the outer call site. As the privargs for the inner call site must be evaluated before // both guards, this would result in the recycled temp holding the incorrect value if the guard is ever // taken. if (!isStopTheWorldGuard(guard2)) { if (!evaluatedColdPathLoads) { collectColdPathLoads(cold1, coldPathLoads); evaluatedColdPathLoads = true; } if (coldPathLoads.intersects(privArgSymRefs)) { if (trace()) traceMsg(comp(), " Recycled temp live in cold1 block_%d and used as privarg before guard2 [%p] - stop guard merging", cold1->getNumber(), guard2); break; } } if (!performTransformation(comp(), "%sRedirecting %s guard [%p] in block_%d to parent guard cold block_%d\n", OPT_DETAILS, isStopTheWorldGuard(guard2) ? "stop the world" : "runtime", guard2, guard2Block->getNumber(), cold1->getNumber())) continue; if (guard2->getBranchDestination() != guard1->getBranchDestination()) guard2Block->changeBranchDestination(guard1->getBranchDestination(), cfg); if (guard2Tree != guard2Block->getFirstRealTreeTop()) { cfg->setStructure(NULL); // We should leave code ahead of an HCR guard in place because: // 1, it might have side effect to runtime guards after it, moving it up might cause us to falsely merge // the subsequent runtime guards // 2, it might contain live monitor, moving it up above a guard can affect the monitor's live range if (!isStopTheWorldGuard(guard2)) { // the block created above guard2 contains only privarg treetops or monitor stores if // guard2 is a runtime-patchable guard and is safe to merge. We need to move the priv // args up to the runtime insert point and leave the monitor stores in place // It's safe to do so because there is no data dependency between the monitor store and // the priv arg store, because the priv arg store does not load the value from the temp // holding the monitored object // Split priv arg stores from monitor stores // Monitor store is generated for the caller of the method guard2 protects, so should appear before // priv arg stores for the method guard2 protects TR::Block *privargBlock = guard2Block; guard2Block = splitRuntimeGuardBlock(comp(), guard2Block, cfg); if (privargBlock != guard2Block) { if (trace()) traceMsg(comp(), " Moving privarg block_%d after block_%d\n", privargBlock->getNumber(), privargIns->getNumber()); moveBlockAfterDest(cfg, privargBlock, privargIns); if (HCRIns == privargIns) HCRIns = privargBlock; if (runtimeIns == privargIns) runtimeIns = privargBlock; privargIns = privargBlock; // refresh the insertPoint since it could be stale after the above updates insertPoint = runtimeIns; } } guard2Block = guard2Block->split(guard2Tree, cfg, true, false); if (trace()) traceMsg(comp(), " Created new block_%d to hold guard [%p] from block_%d\n", guard2Block->getNumber(), guard2, guard2Block->getNumber()); } if (insertPoint != guard2Block->getPrevBlock()) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "headMerger/%s_%s/(%s)", isStopTheWorldGuard(guard1) ? "stop the world" : "runtime", isStopTheWorldGuard(guard2) ? "stop the world" : "runtime", comp()->signature())); cfg->setStructure(NULL); block = nextBlock = guard2Block->getPrevBlock(); if (trace()) traceMsg(comp(), " Moving guard2 block block_%d after block_%d\n", guard2Block->getNumber(), insertPoint->getNumber()); moveBlockAfterDest(cfg, guard2Block, insertPoint); if (HCRIns == insertPoint) HCRIns = guard2Block; if (runtimeIns == insertPoint) runtimeIns = guard2Block; } else { block = guard2Block; } guard1 = guard2; } } } return 1; }