void TR::RegDepCopyRemoval::makeFreshCopy(TR_GlobalRegisterNumber reg) { RegDepInfo &dep = getRegDepInfo(reg); if (!performTransformation(comp(), "%schange %s in GlRegDeps n%un to an explicit copy of n%un\n", optDetailString(), registerName(reg), _regDeps->getGlobalIndex(), dep.value->getGlobalIndex())) return; // Split the block at fallthrough if necessary to avoid putting copies // between branches and BBEnd. TR::Node *curNode = _treetop->getNode(); if (curNode->getOpCodeValue() == TR::BBEnd) { TR::Block *curBlock = curNode->getBlock(); if (curBlock->getLastRealTreeTop() != curBlock->getLastNonControlFlowTreeTop()) { TR::Block *fallthrough = curBlock->getNextBlock(); fallthrough = curBlock->splitEdge(curBlock, fallthrough, comp()); TR_ASSERT(curBlock->getNextBlock() == fallthrough, "bad block placement from splitEdge\n"); fallthrough->setIsExtensionOfPreviousBlock(); _treetop = fallthrough->getExit(); TR::Node *newNode = _treetop->getNode(); newNode->setChild(0, _regDeps); newNode->setNumChildren(1); curNode->setNumChildren(0); if (trace()) traceMsg(comp(), "\tsplit fallthrough edge to insert copy, created block_%d\n", fallthrough->getNumber()); } } // Make and insert the copy TR::Node *copyNode = NULL; if (dep.value->getOpCode().isLoadConst()) { // No need to depend on the other register. // TODO heuristic for whether this is really better than a reg-reg move? generateRegcopyDebugCounter("const-remat"); copyNode = TR::Node::create(dep.value->getOpCodeValue(), 0); copyNode->setConstValue(dep.value->getConstValue()); } else { generateRegcopyDebugCounter("fresh-copy"); copyNode = TR::Node::create(TR::PassThrough, 1, dep.value); copyNode->setCopyToNewVirtualRegister(); } TR::Node *copyTreetopNode = TR::Node::create(TR::treetop, 1, copyNode); _treetop->insertBefore(TR::TreeTop::create(comp(), copyTreetopNode)); if (trace()) traceMsg(comp(), "\tcopy is n%un\n", copyNode->getGlobalIndex()); updateSingleRegDep(reg, copyNode); }
// Checks for syntactic equivalence and returns the side-table index // of the syntactically equivalent node if it found one; else it returns // -1 signifying that this is the first time any node similar syntactically // to this node has been seen. Adds the node to the hash table if seen for the // first time. // // int TR_LocalAnalysisInfo::hasOldExpressionOnRhs(TR::Node *node, bool recalcContainsCall, bool storeLhsContainsCall) { // // Get the relevant portion of the subtree // for this node; this is different for a null check // as its null check reference is the only // sub-expression that matters // TR::Node *relevantSubtree = NULL; if (node->getOpCodeValue() == TR::NULLCHK) relevantSubtree = node->getNullCheckReference(); else relevantSubtree = node; // containsCall checks whether the relevant node has some // sub-expression that cannot be commoned, e.g. call or a new // bool nodeContainsCall; if (!recalcContainsCall && (relevantSubtree == node)) { // can use pre-calculated value of containsCall and storeLhsContainsCall, to avoid visitCount overflow nodeContainsCall = node->containsCall(); } else { storeLhsContainsCall = false; nodeContainsCall = containsCall(relevantSubtree, storeLhsContainsCall); } if (nodeContainsCall) { // // If the node is not a store, a call-like sub-expression is inadmissable; // if the node is a store, a call-like sub-expression is allowed on the RHS // of the store as this does not inhibit privatization in any way as // the private temp store's RHS simply points at original RHS. But if a call-like // sub-expression is present in the LHS of the store, that is inadmissable // if (!node->getOpCode().isStore() || storeLhsContainsCall) return 0; } bool seenIndirectStore = false; #ifdef J9_PROJECT_SPECIFIC bool seenIndirectBCDStore = false; #endif bool seenWriteBarrier = false; int32_t storeNumChildren = node->getNumChildren(); // If node is a null check, compare the // null check reference only to establish syntactic equivalence // if (node->getOpCodeValue() == TR::NULLCHK) /////if (node->getOpCode().isNullCheck()) { int32_t k; for (k=0;k<_numNullChecks;k++) { if (!(_nullCheckNodesAsArray[k] == NULL)) { if (areSyntacticallyEquivalent(_nullCheckNodesAsArray[k]->getNullCheckReference(), node->getNullCheckReference())) return _nullCheckNodesAsArray[k]->getLocalIndex(); } } _nullCheckNodesAsArray[_numNullChecks++] = node; } else { // // If this node is a global store, then equivalence check is different. // We try to give a store to field (or static) o.f the same index as // a load of o.f. This is so that privatization happens for fields/statics. // So the store's opcode value is changed temporarily to be a load before // syntactic equivalence is checked; this enables matching stores/loads to // same global symbol. // if (node->getOpCode().isStore() && !node->getSymbolReference()->getSymbol()->isAutoOrParm()) { if (node->getOpCode().isWrtBar()) seenWriteBarrier = true; #ifdef J9_PROJECT_SPECIFIC seenIndirectBCDStore = node->getType().isBCD(); #endif if (node->getOpCode().isStoreIndirect()) { if (seenWriteBarrier) { TR::Node::recreate(node, _compilation->il.opCodeForIndirectArrayLoad(node->getDataType())); } else { TR::Node::recreate(node, _compilation->il.opCodeForCorrespondingIndirectStore(node->getOpCodeValue())); } node->setNumChildren(1); } else { TR::Node::recreate(node, _compilation->il.opCodeForDirectLoad(node->getDataType())); node->setNumChildren(0); } #ifdef J9_PROJECT_SPECIFIC if (seenIndirectBCDStore) node->setBCDStoreIsTemporarilyALoad(true); #endif seenIndirectStore = true; } int32_t hashValue = _hashTable->hash(node); HashTable::Cursor cursor(_hashTable, hashValue); TR::Node *other; for (other = cursor.firstNode(); other; other = cursor.nextNode()) { // Convert other node's opcode to be a load temporarily // (only for syntactic equivalence check; see explanation above) // to enable matching global stores/loads. // bool seenOtherIndirectStore = false; #ifdef J9_PROJECT_SPECIFIC bool seenOtherIndirectBCDStore = false; #endif bool seenOtherWriteBarrier = false; int32_t otherStoreNumChildren = other->getNumChildren(); if (other->getOpCode().isStore() && !other->getSymbolReference()->getSymbol()->isAutoOrParm()) { if (other->getOpCode().isWrtBar()) seenOtherWriteBarrier = true; #ifdef J9_PROJECT_SPECIFIC seenOtherIndirectBCDStore = other->getType().isBCD(); #endif if (other->getOpCode().isStoreIndirect()) { if (seenOtherWriteBarrier) { TR::Node::recreate(other, _compilation->il.opCodeForIndirectArrayLoad(other->getDataType())); } else { TR::Node::recreate(other, _compilation->il.opCodeForCorrespondingIndirectStore(other->getOpCodeValue())); } other->setNumChildren(1); } else { TR::Node::recreate(other, _compilation->il.opCodeForDirectLoad(other->getDataType())); other->setNumChildren(0); } #ifdef J9_PROJECT_SPECIFIC if (seenOtherIndirectBCDStore) other->setBCDStoreIsTemporarilyALoad(true); #endif seenOtherIndirectStore = true; } bool areSame = areSyntacticallyEquivalent(node, other); // Restore the other node's state to what it was originally // (if it was a global store) // if (seenOtherWriteBarrier) { other->setNumChildren(otherStoreNumChildren); if (otherStoreNumChildren == 3) TR::Node::recreate(other, TR::awrtbari); else TR::Node::recreate(other, TR::awrtbar); } else if (seenOtherIndirectStore) { other->setNumChildren(otherStoreNumChildren); #ifdef J9_PROJECT_SPECIFIC if (seenOtherIndirectBCDStore) other->setBCDStoreIsTemporarilyALoad(false); #endif if (other->getOpCode().isIndirect()) TR::Node::recreate(other, _compilation->il.opCodeForCorrespondingIndirectLoad(other->getOpCodeValue())); else TR::Node::recreate(other, _compilation->il.opCodeForDirectStore(other->getDataType())); } if (areSame) { if (seenWriteBarrier) { node->setNumChildren(storeNumChildren); if (storeNumChildren == 3) TR::Node::recreate(node, TR::awrtbari); else TR::Node::recreate(node, TR::awrtbar); } else if (seenIndirectStore) { node->setNumChildren(storeNumChildren); #ifdef J9_PROJECT_SPECIFIC if (seenIndirectBCDStore) node->setBCDStoreIsTemporarilyALoad(false); #endif if (node->getOpCode().isIndirect()) TR::Node::recreate(node, _compilation->il.opCodeForCorrespondingIndirectLoad(node->getOpCodeValue())); else TR::Node::recreate(node, _compilation->il.opCodeForDirectStore(node->getDataType())); } return other->getLocalIndex(); } } // No match from existing nodes in the hash table; // add this node to the hash table. // _hashTable->add(node, hashValue); } // Restore this node's state to what it was before // (if it was a global store) // if (seenWriteBarrier) { node->setNumChildren(storeNumChildren); if (storeNumChildren == 3) TR::Node::recreate(node, TR::awrtbari); else TR::Node::recreate(node, TR::awrtbar); } else if (seenIndirectStore) { node->setNumChildren(storeNumChildren); #ifdef J9_PROJECT_SPECIFIC if (seenIndirectBCDStore) node->setBCDStoreIsTemporarilyALoad(false); #endif if (node->getOpCode().isIndirect()) TR::Node::recreate(node, _compilation->il.opCodeForCorrespondingIndirectLoad(node->getOpCodeValue())); else TR::Node::recreate(node, _compilation->il.opCodeForDirectStore(node->getDataType())); } return -1; }
int32_t TR::DeadTreesElimination::process(TR::TreeTop *startTree, TR::TreeTop *endTree) { TR::StackMemoryRegion stackRegion(*comp()->trMemory()); LongestPathMap longestPaths(std::less<TR::Node*>(), stackRegion); typedef TR::typed_allocator<CRAnchor, TR::Region&> CRAnchorAlloc; typedef TR::forward_list<CRAnchor, CRAnchorAlloc> CRAnchorList; CRAnchorList anchors(stackRegion); vcount_t visitCount = comp()->incOrResetVisitCount(); TR::TreeTop *treeTop; for (treeTop = startTree; (treeTop != endTree); treeTop = treeTop->getNextTreeTop()) treeTop->getNode()->initializeFutureUseCounts(visitCount); TR::Block *block = NULL; bool delayedRegStoresBeforeThisPass = _delayedRegStores; // Update visitCount as they are used in this optimization and need to be visitCount = comp()->incOrResetVisitCount(); for (TR::TreeTopIterator iter(startTree, comp()); iter != endTree; ++iter) { TR::Node *node = iter.currentTree()->getNode(); if (node->getOpCodeValue() == TR::BBStart) { block = node->getBlock(); if (!block->isExtensionOfPreviousBlock()) longestPaths.clear(); } int vcountLimit = MAX_VCOUNT - 3; if (comp()->getVisitCount() > vcountLimit) { dumpOptDetails(comp(), "%sVisit count %d exceeds limit %d; stopping\n", optDetailString(), comp()->getVisitCount(), vcountLimit); return 0; } // correct at all intermediate stages // if ((node->getOpCodeValue() != TR::treetop) && (!node->getOpCode().isAnchor() || (node->getFirstChild()->getReferenceCount() != 1)) && (!node->getOpCode().isStoreReg() || (node->getFirstChild()->getReferenceCount() != 1)) && (delayedRegStoresBeforeThisPass || (iter.currentTree() == block->getLastRealTreeTop()) || !node->getOpCode().isStoreReg() || (node->getVisitCount() == visitCount))) { if (node->getOpCode().isAnchor() && node->getFirstChild()->getOpCode().isLoadIndirect()) anchors.push_front(CRAnchor(iter.currentTree(), block)); TR::TransformUtil::recursivelySetNodeVisitCount(node, visitCount); continue; } if (node->getOpCode().isStoreReg()) _delayedRegStores = true; TR::Node *child = node->getFirstChild(); if (child->getOpCodeValue() == TR::PassThrough) { TR::Node *newChild = child->getFirstChild(); node->setAndIncChild(0, newChild); newChild->incFutureUseCount(); if (child->getReferenceCount() <= 1) optimizer()->prepareForNodeRemoval(child); child->recursivelyDecReferenceCount(); recursivelyDecFutureUseCount(child); child = newChild; } bool treeTopCanBeEliminated = false; // If the treetop child has been seen before then it must be anchored // somewhere above already; so we don't need the treetop to be anchoring // this node (as the computation is already done at the first reference to // the node). // if (visitCount == child->getVisitCount()) { treeTopCanBeEliminated = true; } else { TR::ILOpCode &childOpCode = child->getOpCode(); TR::ILOpCodes opCodeValue = childOpCode.getOpCodeValue(); bool seenConditionalBranch = false; bool callWithNoSideEffects = child->getOpCode().isCall() && child->getSymbolReference()->getSymbol()->isResolvedMethod() && child->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->isSideEffectFree(); if (callWithNoSideEffects) { treeTopCanBeEliminated = true; } else if (!((childOpCode.isCall() && !callWithNoSideEffects) || childOpCode.isStore() || ((opCodeValue == TR::New || opCodeValue == TR::anewarray || opCodeValue == TR::newarray) && child->getReferenceCount() > 1) || opCodeValue == TR::multianewarray || opCodeValue == TR::MergeNew || opCodeValue == TR::checkcast || opCodeValue == TR::Prefetch || opCodeValue == TR::iu2l || ((childOpCode.isDiv() || childOpCode.isRem()) && child->getNumChildren() == 3))) { // Perform the rather complex check to see whether its safe // to disconnect the child node from the treetop // bool safeToReplaceNode = false; if (child->getReferenceCount() == 1) { safeToReplaceNode = true; #ifdef J9_PROJECT_SPECIFIC if (child->getOpCode().isPackedExponentiation()) { // pdexp has a possible message side effect in truncating or no significant digits left cases safeToReplaceNode = false; } #endif if (opCodeValue == TR::loadaddr) treeTopCanBeEliminated = true; } else if (!_cannotBeEliminated) { safeToReplaceNode = isSafeToReplaceNode( child, iter.currentTree(), &seenConditionalBranch, visitCount, comp(), &_targetTrees, _cannotBeEliminated, longestPaths); } if (safeToReplaceNode) { if (childOpCode.hasSymbolReference()) { TR::SymbolReference *symRef = child->getSymbolReference(); if (symRef->getSymbol()->isAuto() || symRef->getSymbol()->isParm()) treeTopCanBeEliminated = true; else { if (childOpCode.isLoad() || (opCodeValue == TR::loadaddr) || (opCodeValue == TR::instanceof) || (((opCodeValue == TR::New) || (opCodeValue == TR::anewarray || opCodeValue == TR::newarray)) && ///child->getFirstChild()->isNonNegative())) child->markedAllocationCanBeRemoved())) // opCodeValue == TR::multianewarray || // opCodeValue == TR::MergeNew) treeTopCanBeEliminated = true; } } else treeTopCanBeEliminated = true; } } // Fix for the case when a float to non-float conversion node swings // down past a branch on IA32; this would cause a FP value to be commoned // across a branch where there was none originally; this causes pblms // as a value is left on the stack. // if (treeTopCanBeEliminated && seenConditionalBranch) { if (!cg()->getSupportsJavaFloatSemantics()) { if (child->getOpCode().isConversion() || child->getOpCode().isBooleanCompare()) { if (child->getFirstChild()->getOpCode().isFloatingPoint() && !child->getOpCode().isFloatingPoint()) treeTopCanBeEliminated = false; } } } if (treeTopCanBeEliminated) { TR::NodeChecklist visited(comp()); bool containsFloatingPoint = false; for (int32_t i = 0; i < child->getNumChildren(); ++i) { // Anchor nodes with reference count > 1 // bool highGlobalIndex = false; if (fixUpTree(child->getChild(i), iter.currentTree(), visited, highGlobalIndex, self(), visitCount)) containsFloatingPoint = true; if (highGlobalIndex) { dumpOptDetails(comp(), "%sGlobal index limit exceeded; stopping\n", optDetailString()); return 0; } } if (seenConditionalBranch && containsFloatingPoint) { if (!cg()->getSupportsJavaFloatSemantics()) treeTopCanBeEliminated = false; } } } // Update visitCount as they are used in this optimization and need to be // correct at all intermediate stages // if (!treeTopCanBeEliminated) TR::TransformUtil::recursivelySetNodeVisitCount(node, visitCount); if (treeTopCanBeEliminated) { TR::TreeTop *prevTree = iter.currentTree()->getPrevTreeTop(); TR::TreeTop *nextTree = iter.currentTree()->getNextTreeTop(); if (!node->getOpCode().isStoreReg() || (node->getFirstChild()->getReferenceCount() == 1)) { // Actually going to remove the treetop now // if (performTransformation(comp(), "%sRemove tree : [" POINTER_PRINTF_FORMAT "] ([" POINTER_PRINTF_FORMAT "] = %s)\n", optDetailString(), node, node->getFirstChild(), node->getFirstChild()->getOpCode().getName())) { prevTree->join(nextTree); optimizer()->prepareForNodeRemoval(node); ///child->recursivelyDecReferenceCount(); node->recursivelyDecReferenceCount(); recursivelyDecFutureUseCount(child); iter.jumpTo(prevTree); if (child->getReferenceCount() == 1) requestOpt(OMR::treeSimplification, true, block); if (nextTree->getNode()->getOpCodeValue() == TR::Goto && prevTree->getNode()->getOpCodeValue() == TR::BBStart && !prevTree->getNode()->getBlock()->isExtensionOfPreviousBlock()) { requestOpt( OMR::redundantGotoElimination, prevTree->getNode()->getBlock()); } } } else { if (performTransformation(comp(), "%sMove tree : [" POINTER_PRINTF_FORMAT "]([" POINTER_PRINTF_FORMAT "] = %s) to end of block\n", optDetailString(), node, node->getFirstChild(), node->getFirstChild()->getOpCode().getName())) { prevTree->join(nextTree); node->setVisitCount(visitCount); TR::TreeTop *lastTree = findLastTreetop(block, prevTree); TR::TreeTop *prevLastTree = lastTree->getPrevTreeTop(); TR::TreeTop *cursorTreeTop = nextTree; while (cursorTreeTop != lastTree) { if (cursorTreeTop->getNode()->getOpCode().isStoreReg() && (cursorTreeTop->getNode()->getGlobalRegisterNumber() == iter.currentTree()->getNode()->getGlobalRegisterNumber())) { lastTree = cursorTreeTop; prevLastTree = lastTree->getPrevTreeTop(); break; } cursorTreeTop = cursorTreeTop->getNextTreeTop(); } if (lastTree->getNode()->getOpCodeValue() == TR::BBStart) { prevLastTree = lastTree; lastTree = block->getExit(); } TR::Node *lastNode = lastTree->getNode(); TR::Node *prevLastNode = prevLastTree->getNode(); if (lastNode->getOpCode().isIf() && !lastNode->getOpCode().isCompBranchOnly() && prevLastNode->getOpCode().isStoreReg() && ((prevLastNode->getFirstChild() == lastNode->getFirstChild()) || (prevLastNode->getFirstChild() == lastNode->getSecondChild()))) { lastTree = prevLastTree; prevLastTree = lastTree->getPrevTreeTop(); } prevLastTree->join(iter.currentTree()); iter.currentTree()->join(lastTree); iter.jumpTo(prevTree); requestOpt(OMR::treeSimplification, true, block); } } } } for (auto it = anchors.begin(); it != anchors.end(); ++it) { TR::Node *anchor = it->tree->getNode(); TR::Node *load = anchor->getChild(0); if (load->getReferenceCount() > 1) continue; // We can eliminate the indirect load immediately, but for the moment the // subtree providing the base object has to be anchored. TR::Node *heapBase = anchor->getChild(1); TR::Node::recreate(anchor, TR::treetop); anchor->setAndIncChild(0, load->getChild(0)); anchor->setChild(1, NULL); anchor->setNumChildren(1); if (!heapBase->getOpCode().isLoadConst()) { it->tree->insertAfter( TR::TreeTop::create( comp(), TR::Node::create(heapBase, TR::treetop, 1, heapBase))); } load->recursivelyDecReferenceCount(); heapBase->recursivelyDecReferenceCount(); // A later pass of dead trees can likely move (or even remove) the base // object expression. requestOpt(OMR::deadTreesElimination, true, it->block); } return 1; // actual cost }