// Add an async check into a block - MUST be at block entry // void TR_AsyncCheckInsertion::insertAsyncCheck(TR::Block *block, TR::Compilation *comp, const char *counterPrefix) { TR::TreeTop *lastTree = block->getLastRealTreeTop(); TR::TreeTop *asyncTree = TR::TreeTop::create(comp, TR::Node::createWithSymRef(lastTree->getNode(), TR::asynccheck, 0, comp->getSymRefTab()->findOrCreateAsyncCheckSymbolRef(comp->getMethodSymbol()))); if (lastTree->getNode()->getOpCode().isReturn()) { TR::TreeTop *prevTree = lastTree->getPrevTreeTop(); prevTree->join(asyncTree); asyncTree->join(lastTree); } else { TR::TreeTop *nextTree = block->getEntry()->getNextTreeTop(); block->getEntry()->join(asyncTree); asyncTree->join(nextTree); } const char * const name = TR::DebugCounter::debugCounterName(comp, "asynccheck.insert/%s/(%s)/%s/block_%d", counterPrefix, comp->signature(), comp->getHotnessName(), block->getNumber()); TR::DebugCounter::prependDebugCounter(comp, name, asyncTree->getNextTreeTop()); }
void TR_ExpressionsSimplification::transformNode(TR::Node *srcNode, TR::Block *dstBlock) { TR::TreeTop *lastTree = dstBlock->getLastRealTreeTop(); TR::TreeTop *prevTree = lastTree->getPrevTreeTop(); TR::TreeTop *srcNodeTT = TR::TreeTop::create(comp(), srcNode); if (trace()) comp()->getDebug()->print(comp()->getOutFile(),srcNode,0,true); if (lastTree->getNode()->getOpCode().isBranch() || (lastTree->getNode()->getOpCode().isJumpWithMultipleTargets() && lastTree->getNode()->getOpCode().hasBranchChildren())) { srcNodeTT->join(lastTree); prevTree->join(srcNodeTT); } /* else if (dstBlock->getEntry()->getNode()->getOpCodeValue() == TR::BBStart) { srcNodeTT->join(dstBlock->getExit()); dstBlock->getEntry()->join(srcNodeTT); } */ else { srcNodeTT->join(dstBlock->getExit()); lastTree->join(srcNodeTT); } return; }
/** * A runtime guard block may have monitor stores and privarg stores along with the guard * it self. This method will rearrange these stores and split the block, managing any * uncommoning necessary for eventual block order. * * The provided block will become the privarg block, containing any privarg stores and additonal * temps for uncommoning. It must be evaluated first. The returned block will contain monitor * stores and the guard. If no split is required, the provided block will be returned. * * @param comp Compilation object * @param block Block to manipulate * @param cfg Current CFG * @return The block containing the guard. */ static TR::Block* splitRuntimeGuardBlock(TR::Compilation *comp, TR::Block* block, TR::CFG *cfg) { TR::NodeChecklist checklist(comp); TR::TreeTop *start = block->getFirstRealTreeTop(); TR::TreeTop *guard = block->getLastRealTreeTop(); TR::TreeTop *firstPrivArg = NULL; TR::TreeTop *firstMonitor = NULL; // Manage the unexpected case that monitors and priv args are reversed bool privThenMonitor = false; TR_ASSERT(isMergeableGuard(guard->getNode()), "last node must be guard %p", guard->getNode()); // Search for privarg and monitor stores // Only commoned nodes under the guard are required to be anchored, due to the guard being // evaluted before the monitor stores later on bool anchoredTemps = false; for (TR::TreeTop *tt = start; tt && tt->getNode()->getOpCodeValue() != TR::BBEnd; tt = tt->getNextTreeTop()) { TR::Node * node = tt->getNode(); if (node->getOpCode().hasSymbolReference() && node->getSymbol()->holdsMonitoredObject()) firstMonitor = firstMonitor == NULL ? tt : firstMonitor; else if (node->chkIsPrivatizedInlinerArg()) { if (firstPrivArg == NULL) { firstPrivArg = tt; privThenMonitor = (firstMonitor == NULL); } } else if (isMergeableGuard(node)) anchoredTemps |= anchorCommonNodes(comp, node, start, checklist); else TR_ASSERT(0, "Node other than monitor or privarg store %p before runtime guard", node); } // If there are monitors then privargs, they must be swapped around, such that all privargs are // evaluated first if (firstPrivArg && firstMonitor && !privThenMonitor) { TR::TreeTop *monitorEnd = firstPrivArg->getPrevTreeTop(); firstMonitor->getPrevTreeTop()->join(firstPrivArg); guard->getPrevTreeTop()->join(firstMonitor); monitorEnd->join(guard); } // If there were temps created or privargs in the block, perform a split TR::TreeTop *split = NULL; if (firstPrivArg) split = firstMonitor ? firstMonitor : guard; else if (anchoredTemps) split = start; if (split) return block->split(split, cfg, true /* fixupCommoning */, false /* copyExceptionSuccessors */); return block; }
static bool fixUpTree(TR::Node *node, TR::TreeTop *treeTop, TR::NodeChecklist &visited, bool &highGlobalIndex, TR::Optimization *opt, vcount_t evaluatedVisitCount) { if (node->getVisitCount() == evaluatedVisitCount) return false; if (visited.contains(node)) return false; visited.add(node); bool containsFloatingPoint = false; bool anchorLoadaddr = true; bool anchorArrayCmp = true; // for arraycmp node, don't create its tree top anchor // fold it into if statment and save jump instruction if (node->getOpCodeValue() == TR::arraycmp && !node->isArrayCmpLen() && TR::Compiler->target.cpu.isX86()) { anchorArrayCmp = false; } if ((node->getReferenceCount() > 1) && !node->getOpCode().isLoadConst() && anchorLoadaddr && anchorArrayCmp) { if (!opt->comp()->getOption(TR_ProcessHugeMethods)) { int32_t nodeCount = opt->comp()->getNodeCount(); int32_t nodeCountLimit = 3 * USHRT_MAX / 4; if (nodeCount > nodeCountLimit) { dumpOptDetails(opt->comp(), "%snode count %d exceeds limit %d\n", opt->optDetailString(), nodeCount, nodeCountLimit); highGlobalIndex = true; return containsFloatingPoint; } } if (node->getOpCode().isFloatingPoint()) containsFloatingPoint = true; TR::TreeTop *nextTree = treeTop->getNextTreeTop(); node->incFutureUseCount(); TR::TreeTop *anchorTreeTop = TR::TreeTop::create(opt->comp(), TR::Node::create(TR::treetop, 1, node)); anchorTreeTop->getNode()->setFutureUseCount(0); treeTop->join(anchorTreeTop); anchorTreeTop->join(nextTree); } else { for (int32_t i = 0; i < node->getNumChildren(); ++i) { TR::Node *child = node->getChild(i); if (fixUpTree(child, treeTop, visited, highGlobalIndex, opt, evaluatedVisitCount)) containsFloatingPoint = true; } } return containsFloatingPoint; }
int32_t TR::DeadTreesElimination::process(TR::TreeTop *startTree, TR::TreeTop *endTree) { TR::StackMemoryRegion stackRegion(*comp()->trMemory()); LongestPathMap longestPaths(std::less<TR::Node*>(), stackRegion); typedef TR::typed_allocator<CRAnchor, TR::Region&> CRAnchorAlloc; typedef TR::forward_list<CRAnchor, CRAnchorAlloc> CRAnchorList; CRAnchorList anchors(stackRegion); vcount_t visitCount = comp()->incOrResetVisitCount(); TR::TreeTop *treeTop; for (treeTop = startTree; (treeTop != endTree); treeTop = treeTop->getNextTreeTop()) treeTop->getNode()->initializeFutureUseCounts(visitCount); TR::Block *block = NULL; bool delayedRegStoresBeforeThisPass = _delayedRegStores; // Update visitCount as they are used in this optimization and need to be visitCount = comp()->incOrResetVisitCount(); for (TR::TreeTopIterator iter(startTree, comp()); iter != endTree; ++iter) { TR::Node *node = iter.currentTree()->getNode(); if (node->getOpCodeValue() == TR::BBStart) { block = node->getBlock(); if (!block->isExtensionOfPreviousBlock()) longestPaths.clear(); } int vcountLimit = MAX_VCOUNT - 3; if (comp()->getVisitCount() > vcountLimit) { dumpOptDetails(comp(), "%sVisit count %d exceeds limit %d; stopping\n", optDetailString(), comp()->getVisitCount(), vcountLimit); return 0; } // correct at all intermediate stages // if ((node->getOpCodeValue() != TR::treetop) && (!node->getOpCode().isAnchor() || (node->getFirstChild()->getReferenceCount() != 1)) && (!node->getOpCode().isStoreReg() || (node->getFirstChild()->getReferenceCount() != 1)) && (delayedRegStoresBeforeThisPass || (iter.currentTree() == block->getLastRealTreeTop()) || !node->getOpCode().isStoreReg() || (node->getVisitCount() == visitCount))) { if (node->getOpCode().isAnchor() && node->getFirstChild()->getOpCode().isLoadIndirect()) anchors.push_front(CRAnchor(iter.currentTree(), block)); TR::TransformUtil::recursivelySetNodeVisitCount(node, visitCount); continue; } if (node->getOpCode().isStoreReg()) _delayedRegStores = true; TR::Node *child = node->getFirstChild(); if (child->getOpCodeValue() == TR::PassThrough) { TR::Node *newChild = child->getFirstChild(); node->setAndIncChild(0, newChild); newChild->incFutureUseCount(); if (child->getReferenceCount() <= 1) optimizer()->prepareForNodeRemoval(child); child->recursivelyDecReferenceCount(); recursivelyDecFutureUseCount(child); child = newChild; } bool treeTopCanBeEliminated = false; // If the treetop child has been seen before then it must be anchored // somewhere above already; so we don't need the treetop to be anchoring // this node (as the computation is already done at the first reference to // the node). // if (visitCount == child->getVisitCount()) { treeTopCanBeEliminated = true; } else { TR::ILOpCode &childOpCode = child->getOpCode(); TR::ILOpCodes opCodeValue = childOpCode.getOpCodeValue(); bool seenConditionalBranch = false; bool callWithNoSideEffects = child->getOpCode().isCall() && child->getSymbolReference()->getSymbol()->isResolvedMethod() && child->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->isSideEffectFree(); if (callWithNoSideEffects) { treeTopCanBeEliminated = true; } else if (!((childOpCode.isCall() && !callWithNoSideEffects) || childOpCode.isStore() || ((opCodeValue == TR::New || opCodeValue == TR::anewarray || opCodeValue == TR::newarray) && child->getReferenceCount() > 1) || opCodeValue == TR::multianewarray || opCodeValue == TR::MergeNew || opCodeValue == TR::checkcast || opCodeValue == TR::Prefetch || opCodeValue == TR::iu2l || ((childOpCode.isDiv() || childOpCode.isRem()) && child->getNumChildren() == 3))) { // Perform the rather complex check to see whether its safe // to disconnect the child node from the treetop // bool safeToReplaceNode = false; if (child->getReferenceCount() == 1) { safeToReplaceNode = true; #ifdef J9_PROJECT_SPECIFIC if (child->getOpCode().isPackedExponentiation()) { // pdexp has a possible message side effect in truncating or no significant digits left cases safeToReplaceNode = false; } #endif if (opCodeValue == TR::loadaddr) treeTopCanBeEliminated = true; } else if (!_cannotBeEliminated) { safeToReplaceNode = isSafeToReplaceNode( child, iter.currentTree(), &seenConditionalBranch, visitCount, comp(), &_targetTrees, _cannotBeEliminated, longestPaths); } if (safeToReplaceNode) { if (childOpCode.hasSymbolReference()) { TR::SymbolReference *symRef = child->getSymbolReference(); if (symRef->getSymbol()->isAuto() || symRef->getSymbol()->isParm()) treeTopCanBeEliminated = true; else { if (childOpCode.isLoad() || (opCodeValue == TR::loadaddr) || (opCodeValue == TR::instanceof) || (((opCodeValue == TR::New) || (opCodeValue == TR::anewarray || opCodeValue == TR::newarray)) && ///child->getFirstChild()->isNonNegative())) child->markedAllocationCanBeRemoved())) // opCodeValue == TR::multianewarray || // opCodeValue == TR::MergeNew) treeTopCanBeEliminated = true; } } else treeTopCanBeEliminated = true; } } // Fix for the case when a float to non-float conversion node swings // down past a branch on IA32; this would cause a FP value to be commoned // across a branch where there was none originally; this causes pblms // as a value is left on the stack. // if (treeTopCanBeEliminated && seenConditionalBranch) { if (!cg()->getSupportsJavaFloatSemantics()) { if (child->getOpCode().isConversion() || child->getOpCode().isBooleanCompare()) { if (child->getFirstChild()->getOpCode().isFloatingPoint() && !child->getOpCode().isFloatingPoint()) treeTopCanBeEliminated = false; } } } if (treeTopCanBeEliminated) { TR::NodeChecklist visited(comp()); bool containsFloatingPoint = false; for (int32_t i = 0; i < child->getNumChildren(); ++i) { // Anchor nodes with reference count > 1 // bool highGlobalIndex = false; if (fixUpTree(child->getChild(i), iter.currentTree(), visited, highGlobalIndex, self(), visitCount)) containsFloatingPoint = true; if (highGlobalIndex) { dumpOptDetails(comp(), "%sGlobal index limit exceeded; stopping\n", optDetailString()); return 0; } } if (seenConditionalBranch && containsFloatingPoint) { if (!cg()->getSupportsJavaFloatSemantics()) treeTopCanBeEliminated = false; } } } // Update visitCount as they are used in this optimization and need to be // correct at all intermediate stages // if (!treeTopCanBeEliminated) TR::TransformUtil::recursivelySetNodeVisitCount(node, visitCount); if (treeTopCanBeEliminated) { TR::TreeTop *prevTree = iter.currentTree()->getPrevTreeTop(); TR::TreeTop *nextTree = iter.currentTree()->getNextTreeTop(); if (!node->getOpCode().isStoreReg() || (node->getFirstChild()->getReferenceCount() == 1)) { // Actually going to remove the treetop now // if (performTransformation(comp(), "%sRemove tree : [" POINTER_PRINTF_FORMAT "] ([" POINTER_PRINTF_FORMAT "] = %s)\n", optDetailString(), node, node->getFirstChild(), node->getFirstChild()->getOpCode().getName())) { prevTree->join(nextTree); optimizer()->prepareForNodeRemoval(node); ///child->recursivelyDecReferenceCount(); node->recursivelyDecReferenceCount(); recursivelyDecFutureUseCount(child); iter.jumpTo(prevTree); if (child->getReferenceCount() == 1) requestOpt(OMR::treeSimplification, true, block); if (nextTree->getNode()->getOpCodeValue() == TR::Goto && prevTree->getNode()->getOpCodeValue() == TR::BBStart && !prevTree->getNode()->getBlock()->isExtensionOfPreviousBlock()) { requestOpt( OMR::redundantGotoElimination, prevTree->getNode()->getBlock()); } } } else { if (performTransformation(comp(), "%sMove tree : [" POINTER_PRINTF_FORMAT "]([" POINTER_PRINTF_FORMAT "] = %s) to end of block\n", optDetailString(), node, node->getFirstChild(), node->getFirstChild()->getOpCode().getName())) { prevTree->join(nextTree); node->setVisitCount(visitCount); TR::TreeTop *lastTree = findLastTreetop(block, prevTree); TR::TreeTop *prevLastTree = lastTree->getPrevTreeTop(); TR::TreeTop *cursorTreeTop = nextTree; while (cursorTreeTop != lastTree) { if (cursorTreeTop->getNode()->getOpCode().isStoreReg() && (cursorTreeTop->getNode()->getGlobalRegisterNumber() == iter.currentTree()->getNode()->getGlobalRegisterNumber())) { lastTree = cursorTreeTop; prevLastTree = lastTree->getPrevTreeTop(); break; } cursorTreeTop = cursorTreeTop->getNextTreeTop(); } if (lastTree->getNode()->getOpCodeValue() == TR::BBStart) { prevLastTree = lastTree; lastTree = block->getExit(); } TR::Node *lastNode = lastTree->getNode(); TR::Node *prevLastNode = prevLastTree->getNode(); if (lastNode->getOpCode().isIf() && !lastNode->getOpCode().isCompBranchOnly() && prevLastNode->getOpCode().isStoreReg() && ((prevLastNode->getFirstChild() == lastNode->getFirstChild()) || (prevLastNode->getFirstChild() == lastNode->getSecondChild()))) { lastTree = prevLastTree; prevLastTree = lastTree->getPrevTreeTop(); } prevLastTree->join(iter.currentTree()); iter.currentTree()->join(lastTree); iter.jumpTo(prevTree); requestOpt(OMR::treeSimplification, true, block); } } } } for (auto it = anchors.begin(); it != anchors.end(); ++it) { TR::Node *anchor = it->tree->getNode(); TR::Node *load = anchor->getChild(0); if (load->getReferenceCount() > 1) continue; // We can eliminate the indirect load immediately, but for the moment the // subtree providing the base object has to be anchored. TR::Node *heapBase = anchor->getChild(1); TR::Node::recreate(anchor, TR::treetop); anchor->setAndIncChild(0, load->getChild(0)); anchor->setChild(1, NULL); anchor->setNumChildren(1); if (!heapBase->getOpCode().isLoadConst()) { it->tree->insertAfter( TR::TreeTop::create( comp(), TR::Node::create(heapBase, TR::treetop, 1, heapBase))); } load->recursivelyDecReferenceCount(); heapBase->recursivelyDecReferenceCount(); // A later pass of dead trees can likely move (or even remove) the base // object expression. requestOpt(OMR::deadTreesElimination, true, it->block); } return 1; // actual cost }