int32_t TR_AsyncCheckInsertion::insertReturnAsyncChecks(TR::Optimization *opt, const char *counterPrefix) { TR::Compilation * const comp = opt->comp(); if (opt->trace()) traceMsg(comp, "Inserting return asyncchecks (%s)\n", counterPrefix); int numAsyncChecksInserted = 0; for (TR::TreeTop *treeTop = comp->getStartTree(); treeTop; /* nothing */ ) { TR::Block *block = treeTop->getNode()->getBlock(); if (block->getLastRealTreeTop()->getNode()->getOpCode().isReturn() && performTransformation(comp, "%sInserting return asynccheck (%s) in block_%d\n", opt->optDetailString(), counterPrefix, block->getNumber())) { insertAsyncCheck(block, comp, counterPrefix); numAsyncChecksInserted++; } treeTop = block->getExit()->getNextRealTreeTop(); } return numAsyncChecksInserted; }
void OMR::CodeGenerator::eliminateLoadsOfLocalsThatAreNotStored( TR::Node *node, int32_t childNum) { if (node->getVisitCount() == self()->comp()->getVisitCount()) { return; } node->setVisitCount(self()->comp()->getVisitCount()); if (node->getOpCode().isLoadVarDirect() && node->getSymbolReference()->getSymbol()->isAuto() && (node->getSymbolReference()->getReferenceNumber() < _numLocalsWhenStoreAnalysisWasDone) && !node->getSymbol()->castToAutoSymbol()->isLiveLocalIndexUninitialized() && (!_liveButMaybeUnreferencedLocals || !_liveButMaybeUnreferencedLocals->get(node->getSymbol()->castToAutoSymbol()->getLiveLocalIndex())) && !_localsThatAreStored->get(node->getSymbolReference()->getReferenceNumber()) && performTransformation(self()->comp(), "%sRemoving dead load of sym ref %d at %p\n", OPT_DETAILS, node->getSymbolReference()->getReferenceNumber(), node)) { TR::Node::recreate(node, self()->comp()->il.opCodeForConst(node->getSymbolReference()->getSymbol()->getDataType())); node->setLongInt(0); return; } int32_t i; for (i=0; i < node->getNumChildren(); i++) { self()->eliminateLoadsOfLocalsThatAreNotStored(node->getChild(i), i); } }
//--------------------------------------------------------------------- // Common routine to change a conditional branch into an unconditional one. // Change the node to be the unconditional branch or NULL if no branch taken. // Return true if blocks were removed as a result of the change // bool OMR::Simplifier::conditionalToUnconditional(TR::Node *&node, TR::Block * block, int takeBranch) { if (!performTransformation(comp(), "%s change conditional to unconditional n%in\n", optDetailString(), node->getNodePoolIndex())) { return false; } TR::CFGEdge* removedEdge = changeConditionalToUnconditional(node, block, takeBranch, _curTree, optDetailString()); bool blocksWereRemoved = removedEdge ? removedEdge->getTo()->nodeIsRemoved() : false; if (takeBranch) { TR_ASSERT(node->getOpCodeValue() == TR::Goto, "expecting the node to have been converted to a goto"); node = simplify(node, block); } if (blocksWereRemoved) { _invalidateUseDefInfo = true; _alteredBlock = true; _blockRemoved = true; } return blocksWereRemoved; }
void TR::RegDepCopyRemoval::makeFreshCopy(TR_GlobalRegisterNumber reg) { RegDepInfo &dep = getRegDepInfo(reg); if (!performTransformation(comp(), "%schange %s in GlRegDeps n%un to an explicit copy of n%un\n", optDetailString(), registerName(reg), _regDeps->getGlobalIndex(), dep.value->getGlobalIndex())) return; // Split the block at fallthrough if necessary to avoid putting copies // between branches and BBEnd. TR::Node *curNode = _treetop->getNode(); if (curNode->getOpCodeValue() == TR::BBEnd) { TR::Block *curBlock = curNode->getBlock(); if (curBlock->getLastRealTreeTop() != curBlock->getLastNonControlFlowTreeTop()) { TR::Block *fallthrough = curBlock->getNextBlock(); fallthrough = curBlock->splitEdge(curBlock, fallthrough, comp()); TR_ASSERT(curBlock->getNextBlock() == fallthrough, "bad block placement from splitEdge\n"); fallthrough->setIsExtensionOfPreviousBlock(); _treetop = fallthrough->getExit(); TR::Node *newNode = _treetop->getNode(); newNode->setChild(0, _regDeps); newNode->setNumChildren(1); curNode->setNumChildren(0); if (trace()) traceMsg(comp(), "\tsplit fallthrough edge to insert copy, created block_%d\n", fallthrough->getNumber()); } } // Make and insert the copy TR::Node *copyNode = NULL; if (dep.value->getOpCode().isLoadConst()) { // No need to depend on the other register. // TODO heuristic for whether this is really better than a reg-reg move? generateRegcopyDebugCounter("const-remat"); copyNode = TR::Node::create(dep.value->getOpCodeValue(), 0); copyNode->setConstValue(dep.value->getConstValue()); } else { generateRegcopyDebugCounter("fresh-copy"); copyNode = TR::Node::create(TR::PassThrough, 1, dep.value); copyNode->setCopyToNewVirtualRegister(); } TR::Node *copyTreetopNode = TR::Node::create(TR::treetop, 1, copyNode); _treetop->insertBefore(TR::TreeTop::create(comp(), copyTreetopNode)); if (trace()) traceMsg(comp(), "\tcopy is n%un\n", copyNode->getGlobalIndex()); updateSingleRegDep(reg, copyNode); }
//exitTree is the next treeTop after the relevant range of TreeTops (start of next block) bool TR_LocalLiveRangeReduction::transformExtendedBlock(TR::TreeTop *entryTree, TR::TreeTop *exitTree) { TR::TreeTop *currentTree = exitTree; if (!performTransformation(comp(), "%sBlock %d\n", OPT_DETAILS, entryTree->getNode()->getBlock()->getNumber())) return false; //Gather information for each tree regarding its first/mid/last-ref-nodes. //And populate list of TreesRefInfo collectInfo(entryTree, exitTree); /******************* pass 1 ********************************************/ //Move "interesting trees" down as close as possible to the last ref node. for (int32_t i =0; i< _numTreeTops; ) { bool movedFlag=false; TR_TreeRefInfo *currentTree = _treesRefInfoArray[i]; if (isNeedToBeInvestigated(currentTree)) movedFlag = investigateAndMove(currentTree,1); if (!movedFlag) i++; } //if none moved return; if (_movedTreesList.isEmpty()) return true; //Update DependenciesList after pass 1. //Remove dependencies which are not of interest (i.e. anchor didn't move). updateDepList(); //if in all pairs the anchor didn't move, no need for second pass. if (_depPairList.isEmpty()) return true; _movedTreesList.deleteAll(); /******************* pass 2 ********************************************/ //Try to move trees that were blcok by a tree that later has been moved. //Note that since the list adds elements to the head, we can iterate the list, and by this to cover chain of moves (the later one will be in front) //e.g. if a blocked by b and then b moved after c. The list will be b-c;a-b ListIterator<DepPair> listIt(&_depPairList); for (DepPair * depPair = listIt.getFirst(); depPair != NULL; depPair = listIt.getNext()) { TR_TreeRefInfo *treeToMove = depPair->getDep(); if (isNeedToBeInvestigated(treeToMove)) investigateAndMove(treeToMove,2); } return true; }
TR::Register *OMR::X86::AMD64::TreeEvaluator::i2lEvaluator(TR::Node *node, TR::CodeGenerator *cg) { TR::Compilation *comp = cg->comp(); if (node->getFirstChild()->getOpCode().isLoadConst()) { TR::Register *targetRegister = cg->allocateRegister(); generateRegImmInstruction(MOV8RegImm4, node, targetRegister, node->getFirstChild()->getInt(), cg); node->setRegister(targetRegister); cg->decReferenceCount(node->getFirstChild()); return targetRegister; } else { // In theory, because iRegStore has chosen to disregard needsSignExtension, // we must disregard skipSignExtension here for correctness. // // However, in fact, it is actually safe to obey skipSignExtension so // long as the optimizer only uses it on nodes known to be non-negative // when the i2l occurs. We do already have isNonNegative for that // purpose, but it may not always be set by the optimizer if a node known // to be non-negative at one point in a block is commoned up above the // BNDCHK or branch that determines the node's non-negativity. The // codegen does set the flag during tree evaluation, but the // skipSignExtension flag is set by the optimizer with more global // knowledge than the tree evaluator, so we will trust it. // TR_X86OpCodes regMemOpCode,regRegOpCode; if( node->isNonNegative() || (node->skipSignExtension() && performTransformation(comp, "TREE EVALUATION: skipping sign extension on node %s despite lack of isNonNegative\n", comp->getDebug()->getName(node)))) { // We prefer these plain (zero-extending) opcodes because the analyser can often eliminate them // regMemOpCode = L4RegMem; regRegOpCode = MOVZXReg8Reg4; } else { regMemOpCode = MOVSXReg8Mem4; regRegOpCode = MOVSXReg8Reg4; } return TR::TreeEvaluator::conversionAnalyser(node, regMemOpCode, regRegOpCode, cg); } }
TR::DebugCounter * TR::DebugCounter::getDebugCounter(TR::Compilation *comp, const char *name, int8_t fidelity, int32_t staticDelta) { if (comp->getOptions()->staticDebugCounterIsEnabled(name, fidelity)) { TR::DebugCounter *staticCounter = comp->getPersistentInfo()->getStaticCounters()->getCounter(comp, name, fidelity); staticCounter->increment(staticDelta); } if (comp->getOptions()->dynamicDebugCounterIsEnabled(name, fidelity) && performTransformation(comp, "O^O DEBUG COUNTER: '%s'\n", name)) { return comp->getPersistentInfo()->getDynamicCounters()->getCounter(comp, name, fidelity); } else { return NULL; } }
void TR::RegDepCopyRemoval::reuseCopy(TR_GlobalRegisterNumber reg) { RegDepInfo &dep = getRegDepInfo(reg); NodeChoice &prevChoice = getNodeChoice(reg); TR_ASSERT(prevChoice.original == dep.value, "previous copy for %s doesn't match original\n", registerName(reg)); TR_ASSERT(prevChoice.selected != dep.value, "previous copy is the same as original for %s\n", registerName(reg)); if (performTransformation(comp(), "%schange %s in GlRegDeps n%un to use previous copy n%un of n%un\n", optDetailString(), registerName(reg), _regDeps->getGlobalIndex(), prevChoice.selected->getGlobalIndex(), prevChoice.original->getGlobalIndex())) { generateRegcopyDebugCounter("reuse-copy"); updateSingleRegDep(reg, prevChoice.selected); } }
int32_t OMR::RecognizedCallTransformer::perform() { TR::NodeChecklist visited(comp()); for (auto treetop = comp()->getMethodSymbol()->getFirstTreeTop(); treetop != NULL; treetop = treetop->getNextTreeTop()) { if (treetop->getNode()->getNumChildren() > 0) { auto node = treetop->getNode()->getFirstChild(); if (node && node->getOpCode().isCall() && !visited.contains(node)) { if (isInlineable(treetop) && performTransformation(comp(), "%s Transforming recognized call node [" POINTER_PRINTF_FORMAT "]\n", optDetailString(), node)) { visited.add(node); transform(treetop); } } } } return 0; }
void TR_ExpressionsSimplification::tranformStoreMotionCandidate(TR::TreeTop *treeTop, bool *isPreheaderBlockInvalid) { TR::Node *node = treeTop->getNode(); TR_ASSERT(node->getOpCode().isStore() && !node->getSymbol()->isStatic() && !node->getSymbol()->holdsMonitoredObject(), "node %p was expected to be a non-static non-monitored object store and was not.", node); // this candidate should be valid, either direct or indirect if (trace()) comp()->getDebug()->print(comp()->getOutFile(), node, 0, true); TR::Block *entryBlock = _currentRegion->getEntryBlock(); TR::Block *preheaderBlock = findPredecessorBlock(entryBlock); if (!preheaderBlock) { if (trace()) traceMsg(comp(), "Fail to find a place to put the hoist code in\n"); *isPreheaderBlockInvalid = true; return; } // Earlier post-dominance test ensures that the loop is executed as least once, or is canonicalized. // but to be safe we still perform on canonicalized loops only. if (_currentRegion->isCanonicalizedLoop()) // make sure that the loop is canonicalized, in which case the preheader is { // executed in its first iteration and is protected. if (performTransformation(comp(), "%sMove out loop-invariant store [%p] to block_%d\n", OPT_DETAILS, node, preheaderBlock->getNumber())) { TR::Node *newNode = node->duplicateTree(); transformNode(newNode, preheaderBlock); TR::TransformUtil::removeTree(comp(), treeTop); } } else { if (trace()) traceMsg(comp(), "No canonicalized loop for this candidate\n"); } }
TR::Node * OMR::Simplifier::unaryCancelOutWithChild(TR::Node * node, TR::Node * firstChild, TR::TreeTop *anchorTree, TR::ILOpCodes opcode, bool anchorChildren) { if (!isLegalToUnaryCancel(node, firstChild, opcode)) return NULL; if (firstChild->getOpCodeValue() == opcode && (node->getType().isAggregate() || firstChild->getType().isAggregate()) && (node->getSize() > firstChild->getSize() || node->getSize() != firstChild->getFirstChild()->getSize())) { // ensure a truncation side-effect of a conversion is not lost // o2a size=3 // a2o size=3 // conversion truncates in addition to type cast so cannot be removed // loadaddr size=4 // This restriction could be loosened to only disallow intermediate truncations (see BCD case above) but then would require a node // op that would just correct for size (e.g. addrSizeMod size=3 to replace the o2a/a2o pair) // // Do allow cases when all three sizes are the same and when the middle node widens but the top and bottom node have the same size, e.g. // // i2o size=3 // o2i size=4 // oload size=3 // // Also allow the special case where the grandchild is not really truncated as the 'truncated' bytes are known to be zero // (i.e. there really isn't an intermediate truncation of 4->3 even though it appears that way from looking at the sizes alone) // o2i // i2o size=3 // iushr // x // iconst 8 bool disallow = true; TR::Node *grandChild = firstChild->getFirstChild(); size_t nodeSize = node->getSize(); if (node->getType().isIntegral() && nodeSize == grandChild->getSize() && nodeSize > firstChild->getSize()) { size_t truncatedBits = (nodeSize - firstChild->getSize()) * 8; if (grandChild->getOpCode().isRightShift() && grandChild->getOpCode().isShiftLogical() && grandChild->getSecondChild()->getOpCode().isLoadConst() && (grandChild->getSecondChild()->get64bitIntegralValue() == truncatedBits)) { disallow = false; if (trace()) traceMsg(comp(),"do allow unaryCancel of node %s (%p) and firstChild %s (%p) as grandChild %s (%p) zeros the %d truncated bytes\n", node->getOpCode().getName(),node,firstChild->getOpCode().getName(),firstChild, grandChild->getOpCode().getName(),grandChild,truncatedBits/8); } } if (disallow) { if (trace()) traceMsg(comp(),"disallow unaryCancel of node %s (%p) and firstChild %s (%p) due to unequal sizes (nodeSize %d, firstChildSize %d, firstChild->childSize %d)\n", node->getOpCode().getName(),node,firstChild->getOpCode().getName(),firstChild, node->getSize(),firstChild->getSize(),firstChild->getFirstChild()->getSize()); return NULL; } } if (firstChild->getOpCodeValue() == opcode && performTransformation(comp(), "%sRemoving node [" POINTER_PRINTF_FORMAT "] %s and its child [" POINTER_PRINTF_FORMAT "] %s\n", optDetailString(), node, node->getOpCode().getName(), firstChild, firstChild->getOpCode().getName())) { TR::Node *grandChild = firstChild->getFirstChild(); grandChild->incReferenceCount(); bool anchorChildrenNeeded = anchorChildren && (node->getNumChildren() > 1 || firstChild->getNumChildren() > 1 || node->getOpCode().hasSymbolReference() || firstChild->getOpCode().hasSymbolReference()); prepareToStopUsingNode(node, anchorTree, anchorChildrenNeeded); node->recursivelyDecReferenceCount(); node->setVisitCount(0); return grandChild; } return NULL; }
TR::Node *constrainVcall(TR::ValuePropagation *vp, TR::Node *node) { constrainCall(vp, node); // Look for System.arraycopy call. If the node is transformed into an arraycopy // re-process it. // vp->transformArrayCopyCall(node); if (node->getOpCodeValue() == TR::arraycopy) { node->setVisitCount(0); vp->launchNode(node, vp->getCurrentParent(), 0); return node; } if (vp->transformUnsafeCopyMemoryCall(node)) return node; cacheStringAppend(vp,node); #ifdef J9_PROJECT_SPECIFIC TR::SymbolReference *finalizeSymRef = vp->comp()->getSymRefTab()->findOrCreateRuntimeHelper(TR_jitCheckIfFinalizeObject, true, true, true); if (node->getSymbolReference() == finalizeSymRef) { TR::Node *receiver = node->getFirstChild(); bool isGlobal; TR::VPConstraint *type = vp->getConstraint(receiver, isGlobal); bool canBeRemoved = false; // ensure the type is really a fixedClass // resolvedClass is not sufficient because java.lang.Object has an // empty finalizer method (hasFinalizer returns false) and the call to // vm helper is incorrectly optimized in this case // if (type && type->getClassType() && type->getClassType()->asFixedClass()) { TR_OpaqueClassBlock *klass = type->getClassType()->getClass(); if (klass && !TR::Compiler->cls.hasFinalizer(vp->comp(), klass) && !vp->comp()->fej9()->isOwnableSyncClass(klass)) { canBeRemoved = true; } } // If a class has a finalizer or is an ownableSync it won't be allocated on the stack. That's ensured // by virtue of (indirectly) calling bool J9::ObjectModel::canAllocateInlineClass(TR_OpaqueClassBlock *block) // Doesn't make sense to call jitCheckIfFinalizeObject for a stack // allocated object, so optimize else if (receiver->getOpCode().hasSymbolReference() && receiver->getSymbol()->isLocalObject()) { canBeRemoved = true; } if (canBeRemoved && performTransformation(vp->comp(), "%s Removing redundant call to jitCheckIfFinalize [%p]\n", OPT_DETAILS, node)) { ///printf("found opportunity in %s to remove call to checkfinalize\n", vp->comp()->signature());fflush(stdout); ///traceMsg(vp->comp(), "found opportunity to remove call %p to checkfinalize\n", node); vp->removeNode(node); vp->_curTree->setNode(NULL); return node; } } #endif return node; }
int32_t TR_CatchBlockRemover::perform() { TR::CFG *cfg = comp()->getFlowGraph(); if (cfg == NULL) { if (trace()) traceMsg(comp(), "Can't do Catch Block Removal, no CFG\n"); return 0; } if (trace()) traceMsg(comp(), "Starting Catch Block Removal\n"); bool thereMayBeRemovableCatchBlocks = false; { TR::StackMemoryRegion stackMemoryRegion(*trMemory()); TR::Block *block; ListIterator<TR::CFGEdge> edgeIterator; // Go through all blocks that have exception successors and see if any of them // are not reached. Mark each of these edges with a visit count so they can // be identified later. // vcount_t visitCount = comp()->incOrResetVisitCount(); TR::CFGNode *cfgNode; for (cfgNode = cfg->getFirstNode(); cfgNode; cfgNode = cfgNode->getNext()) { if (cfgNode->getExceptionSuccessors().empty()) continue; block = toBlock(cfgNode); uint32_t reachedExceptions = 0; TR::TreeTop *treeTop; for (treeTop = block->getEntry(); treeTop != block->getExit(); treeTop = treeTop->getNextTreeTop()) { reachedExceptions |= treeTop->getNode()->exceptionsRaised(); if (treeTop->getNode()->getOpCodeValue() == TR::monexitfence) // for live monitor metadata reachedExceptions |= TR::Block::CanCatchMonitorExit; } if (reachedExceptions & TR::Block::CanCatchUserThrows) continue; for (auto edge = block->getExceptionSuccessors().begin(); edge != block->getExceptionSuccessors().end();) { TR::CFGEdge * current = *(edge++); TR::Block *catchBlock = toBlock(current->getTo()); if (catchBlock->isOSRCodeBlock() || catchBlock->isOSRCatchBlock()) continue; if (!reachedExceptions && performTransformation(comp(), "%sRemove redundant exception edge from block_%d at [%p] to catch block_%d at [%p]\n", optDetailString(), block->getNumber(), block, catchBlock->getNumber(), catchBlock)) { cfg->removeEdge(block, catchBlock); thereMayBeRemovableCatchBlocks = true; } else { if (!catchBlock->canCatchExceptions(reachedExceptions)) { current->setVisitCount(visitCount); thereMayBeRemovableCatchBlocks = true; } } } } bool edgesRemoved = false; // Now look to see if there are any catch blocks for which all exception // predecessors have the visit count set. If so, the block is unreachable and // can be removed. // If only some of the exception predecessors are marked, these edges are // left in place to identify the try/catch structure properly. // while (thereMayBeRemovableCatchBlocks) { thereMayBeRemovableCatchBlocks = false; for (cfgNode = cfg->getFirstNode(); cfgNode; cfgNode = cfgNode->getNext()) { if (cfgNode->getExceptionPredecessors().empty()) continue; auto edgeIt = cfgNode->getExceptionPredecessors().begin(); for (; edgeIt != cfgNode->getExceptionPredecessors().end(); ++edgeIt) { if ((*edgeIt)->getVisitCount() != visitCount) break; } if (edgeIt == cfgNode->getExceptionPredecessors().end() && performTransformation(comp(), "%sRemove redundant catch block_%d at [%p]\n", optDetailString(), cfgNode->getNumber(), cfgNode)) { while (!cfgNode->getExceptionPredecessors().empty()) { cfg->removeEdge(cfgNode->getExceptionPredecessors().front()); } edgesRemoved = true; thereMayBeRemovableCatchBlocks = true; } } } // Any transformations invalidate use/def and value number information // if (edgesRemoved) { optimizer()->setUseDefInfo(NULL); optimizer()->setValueNumberInfo(NULL); requestOpt(OMR::treeSimplification, true); } } // scope of the stack memory region if (trace()) traceMsg(comp(), "\nEnding Catch Block Removal\n"); return 1; // actual cost }
bool TR_ExpressionsSimplification::tranformSummationReductionCandidate(TR::TreeTop *treeTop, LoopInfo *loopInfo, bool *isPreheaderBlockInvalid) { TR::Node *node = treeTop->getNode(); TR::Node *opNode = node->getFirstChild(); TR::Node *expNode = NULL; int32_t expChildNumber = 0; bool removeOnly = false; bool replaceWithNewNode = false; if (opNode->getOpCodeValue() == TR::iadd || opNode->getOpCodeValue() == TR::isub) { if (opNode->getSecondChild()->getOpCode().hasSymbolReference() && node->getSymbolReference() == opNode->getSecondChild()->getSymbolReference()) { expChildNumber = 0; expNode = opNode->getFirstChild(); } else { expChildNumber = 1; expNode = opNode->getSecondChild(); } expNode = iaddisubSimplifier(expNode, loopInfo); replaceWithNewNode = true; } else if (opNode->getOpCodeValue() == TR::ixor || opNode->getOpCodeValue() == TR::ineg) { expNode = ixorinegSimplifier(opNode, loopInfo, &removeOnly); } if (expNode) { if (trace()) comp()->getDebug()->print(comp()->getOutFile(), expNode, 0, true); TR::Block *entryBlock = _currentRegion->getEntryBlock(); TR::Block *preheaderBlock = findPredecessorBlock(entryBlock); if (!preheaderBlock) { if (trace()) traceMsg(comp(), "Fail to find a place to put the hoist code in\n"); *isPreheaderBlockInvalid = true; return true; } if (loopInfo->getNumIterations() > 0 || // make sure that the loop is going to be executed at least once _currentRegion->isCanonicalizedLoop()) // or that the loop is canonicalized, in which case the preheader is { // executed in its first iteration and is protected. if (performTransformation(comp(), "%sMove out loop-invariant node [%p] to block_%d\n", OPT_DETAILS, node, preheaderBlock->getNumber())) { if (!(removeOnly)) { TR::Node *newNode = node->duplicateTree(); if (replaceWithNewNode) newNode->getFirstChild()->setAndIncChild(expChildNumber, expNode); transformNode(newNode, preheaderBlock); } TR::TransformUtil::removeTree(comp(), treeTop); } } } return (expNode != NULL); }
void TR_LoadExtensions::flagPreferredLoadExtensions(TR::Node* parent) { if (isSupportedType(parent) && parent->getOpCode().isConversion()) { TR::Node* child = parent->getFirstChild(); bool canSkipConversion = false; if (isSupportedType(child)) { if (parent->getSize() == child->getSize()) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "codegen/LoadExtensions/success/unneededConversion/%s", comp()->signature())); parent->setUnneededConversion(true); } else { TR::ILOpCode& childOpCode = child->getOpCode(); if (childOpCode.isLoadReg() && !(parent->getSize() > 4 && TR::Compiler->target.is32Bit()) && excludedNodes->count(parent) == 0) { TR::Node* useRegLoad = child; TR_UseDefInfo* useDefInfo = optimizer()->getUseDefInfo(); if (useDefInfo != NULL && useDefInfo->infoIsValid() && useRegLoad->getUseDefIndex() != 0 && useDefInfo->isUseIndex(useRegLoad->getUseDefIndex() != 0)) { TR_UseDefInfo::BitVector info(comp()->allocator()); if (useDefInfo->getUseDef(info, useRegLoad->getUseDefIndex())) { TR_UseDefInfo::BitVector::Cursor cursor(info); int32_t firstDefIndex = useDefInfo->getFirstRealDefIndex(); int32_t firstUseIndex = useDefInfo->getFirstUseIndex(); canSkipConversion = true; bool forceExtensionOnAnyLoads = false; bool forceExtensionOnAllLoads = true; for (cursor.SetToFirstOne(); cursor.Valid() && canSkipConversion; cursor.SetToNextOne()) { int32_t defIndex = cursor; // We've examined all the defs of this particular use if (defIndex >= firstUseIndex) { break; } // Do not consider defs that correspond to method arguments as we cannot force extension on those if (defIndex < firstDefIndex) { continue; } TR::Node* defRegLoad = useDefInfo->getNode(defIndex); if (defRegLoad != NULL) { TR::Node* defRegLoadChild = defRegLoad->getFirstChild(); bool forceExtension = false; canSkipConversion = TR_LoadExtensions::canSkipConversion(parent, defRegLoadChild, forceExtension); forceExtensionOnAnyLoads |= forceExtension; forceExtensionOnAllLoads &= forceExtension; // If we have to force extension on any loads which feed a def of this use ensure we must also // force extension on all such loads. Conversely the conversion can be skipped if none of the // loads feeding the def of this use need to be extended. This ensures either all loads feeding // into defs of this use should be extended or none of them. canSkipConversion &= forceExtensionOnAllLoads == forceExtensionOnAnyLoads; if (trace()) { traceMsg(comp(), "\t\tPeeked through %s [%p] and found %s [%p] with child %s [%p] - conversion %s be skipped\n", useRegLoad->getOpCode().getName(), useRegLoad, defRegLoad->getOpCode().getName(), defRegLoad, defRegLoadChild->getOpCode().getName(), defRegLoadChild, canSkipConversion ? "can" : "cannot"); } } } if (canSkipConversion && performTransformation(comp(), "%sSkipping conversion %s [%p] after RegLoad\n", optDetailString(), parent->getOpCode().getName(), parent)) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "codegen/LoadExtensions/success/unneededConversion/GRA/%s", comp()->signature())); parent->setUnneededConversion(true); if (forceExtensionOnAllLoads) { TR_UseDefInfo::BitVector info(comp()->allocator()); if (useDefInfo->getUseDef(info, useRegLoad->getUseDefIndex())) { TR_UseDefInfo::BitVector::Cursor cursor(info); for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne()) { int32_t defIndex = cursor; // We've examined all the defs of this particular use if (defIndex >= firstUseIndex) { break; } // Do not consider defs that correspond to method arguments as we cannot force extension on those if (defIndex < firstDefIndex) { continue; } TR::Node *defRegLoad = useDefInfo->getNode(defIndex); if (defRegLoad != NULL) { TR::Node* defRegLoadChild = defRegLoad->getFirstChild(); const int32_t preference = getExtensionPreference(defRegLoadChild); if (preference > 0) { if (trace()) { traceMsg(comp(), "\t\t\tForcing sign extension on %s [%p]\n", defRegLoadChild->getOpCode().getName(), defRegLoadChild); } if (parent->getSize() == 8 || parent->useSignExtensionMode()) { defRegLoadChild->setSignExtendTo64BitAtSource(true); } else { defRegLoadChild->setSignExtendTo32BitAtSource(true); } } if (preference < 0) { if (trace()) { traceMsg(comp(), "\t\t\tForcing zero extension on %s [%p]\n", defRegLoadChild->getOpCode().getName(), defRegLoadChild); } if (parent->getSize() == 8 || parent->useSignExtensionMode()) { defRegLoadChild->setZeroExtendTo64BitAtSource(true); } else { defRegLoadChild->setZeroExtendTo32BitAtSource(true); } } } } } } if (parent->getType().isInt64() && parent->getSize() > child->getSize()) { if (trace()) { traceMsg(comp(), "\t\t\tSet global register %s in getExtendedToInt64GlobalRegisters for child %s [%p] with parent node %s [%p]\n", comp()->getDebug()->getGlobalRegisterName(child->getGlobalRegisterNumber()), child->getOpCode().getName(), child, parent->getOpCode().getName(), parent); } // getExtendedToInt64GlobalRegisters is used by the evaluators to force a larger virtual register to be used when // evaluating the regload so any instructions generated by local RA are the correct size to preserve the upper bits cg()->getExtendedToInt64GlobalRegisters()[child->getGlobalRegisterNumber()] = true; } } } } } } } if (!canSkipConversion) { bool forceExtension = false; canSkipConversion = TR_LoadExtensions::canSkipConversion(parent, child, forceExtension); if (canSkipConversion && performTransformation(comp(), "%sSkipping conversion %s [%p]\n", optDetailString(), parent->getOpCode().getName(), parent)) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "codegen/LoadExtensions/success/unneededConversion/%s", comp()->signature())); parent->setUnneededConversion(true); if (forceExtension) { const int32_t preference = getExtensionPreference(child); if (preference > 0) { if (trace()) { traceMsg(comp(), "\t\t\tForcing sign extension on %s [%p]\n", child->getOpCode().getName(), child); } if (parent->getSize() == 8 || parent->useSignExtensionMode()) { child->setSignExtendTo64BitAtSource(true); } else { child->setSignExtendTo32BitAtSource(true); } } if (preference < 0) { if (trace()) { traceMsg(comp(), "\t\t\tForcing zero extension on %s [%p]\n", child->getOpCode().getName(), child); } if (parent->getSize() == 8 || parent->useSignExtensionMode()) { child->setZeroExtendTo64BitAtSource(true); } else { child->setZeroExtendTo32BitAtSource(true); } } } } } } }
int32_t TR::DeadTreesElimination::process(TR::TreeTop *startTree, TR::TreeTop *endTree) { TR::StackMemoryRegion stackRegion(*comp()->trMemory()); LongestPathMap longestPaths(std::less<TR::Node*>(), stackRegion); typedef TR::typed_allocator<CRAnchor, TR::Region&> CRAnchorAlloc; typedef TR::forward_list<CRAnchor, CRAnchorAlloc> CRAnchorList; CRAnchorList anchors(stackRegion); vcount_t visitCount = comp()->incOrResetVisitCount(); TR::TreeTop *treeTop; for (treeTop = startTree; (treeTop != endTree); treeTop = treeTop->getNextTreeTop()) treeTop->getNode()->initializeFutureUseCounts(visitCount); TR::Block *block = NULL; bool delayedRegStoresBeforeThisPass = _delayedRegStores; // Update visitCount as they are used in this optimization and need to be visitCount = comp()->incOrResetVisitCount(); for (TR::TreeTopIterator iter(startTree, comp()); iter != endTree; ++iter) { TR::Node *node = iter.currentTree()->getNode(); if (node->getOpCodeValue() == TR::BBStart) { block = node->getBlock(); if (!block->isExtensionOfPreviousBlock()) longestPaths.clear(); } int vcountLimit = MAX_VCOUNT - 3; if (comp()->getVisitCount() > vcountLimit) { dumpOptDetails(comp(), "%sVisit count %d exceeds limit %d; stopping\n", optDetailString(), comp()->getVisitCount(), vcountLimit); return 0; } // correct at all intermediate stages // if ((node->getOpCodeValue() != TR::treetop) && (!node->getOpCode().isAnchor() || (node->getFirstChild()->getReferenceCount() != 1)) && (!node->getOpCode().isStoreReg() || (node->getFirstChild()->getReferenceCount() != 1)) && (delayedRegStoresBeforeThisPass || (iter.currentTree() == block->getLastRealTreeTop()) || !node->getOpCode().isStoreReg() || (node->getVisitCount() == visitCount))) { if (node->getOpCode().isAnchor() && node->getFirstChild()->getOpCode().isLoadIndirect()) anchors.push_front(CRAnchor(iter.currentTree(), block)); TR::TransformUtil::recursivelySetNodeVisitCount(node, visitCount); continue; } if (node->getOpCode().isStoreReg()) _delayedRegStores = true; TR::Node *child = node->getFirstChild(); if (child->getOpCodeValue() == TR::PassThrough) { TR::Node *newChild = child->getFirstChild(); node->setAndIncChild(0, newChild); newChild->incFutureUseCount(); if (child->getReferenceCount() <= 1) optimizer()->prepareForNodeRemoval(child); child->recursivelyDecReferenceCount(); recursivelyDecFutureUseCount(child); child = newChild; } bool treeTopCanBeEliminated = false; // If the treetop child has been seen before then it must be anchored // somewhere above already; so we don't need the treetop to be anchoring // this node (as the computation is already done at the first reference to // the node). // if (visitCount == child->getVisitCount()) { treeTopCanBeEliminated = true; } else { TR::ILOpCode &childOpCode = child->getOpCode(); TR::ILOpCodes opCodeValue = childOpCode.getOpCodeValue(); bool seenConditionalBranch = false; bool callWithNoSideEffects = child->getOpCode().isCall() && child->getSymbolReference()->getSymbol()->isResolvedMethod() && child->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->isSideEffectFree(); if (callWithNoSideEffects) { treeTopCanBeEliminated = true; } else if (!((childOpCode.isCall() && !callWithNoSideEffects) || childOpCode.isStore() || ((opCodeValue == TR::New || opCodeValue == TR::anewarray || opCodeValue == TR::newarray) && child->getReferenceCount() > 1) || opCodeValue == TR::multianewarray || opCodeValue == TR::MergeNew || opCodeValue == TR::checkcast || opCodeValue == TR::Prefetch || opCodeValue == TR::iu2l || ((childOpCode.isDiv() || childOpCode.isRem()) && child->getNumChildren() == 3))) { // Perform the rather complex check to see whether its safe // to disconnect the child node from the treetop // bool safeToReplaceNode = false; if (child->getReferenceCount() == 1) { safeToReplaceNode = true; #ifdef J9_PROJECT_SPECIFIC if (child->getOpCode().isPackedExponentiation()) { // pdexp has a possible message side effect in truncating or no significant digits left cases safeToReplaceNode = false; } #endif if (opCodeValue == TR::loadaddr) treeTopCanBeEliminated = true; } else if (!_cannotBeEliminated) { safeToReplaceNode = isSafeToReplaceNode( child, iter.currentTree(), &seenConditionalBranch, visitCount, comp(), &_targetTrees, _cannotBeEliminated, longestPaths); } if (safeToReplaceNode) { if (childOpCode.hasSymbolReference()) { TR::SymbolReference *symRef = child->getSymbolReference(); if (symRef->getSymbol()->isAuto() || symRef->getSymbol()->isParm()) treeTopCanBeEliminated = true; else { if (childOpCode.isLoad() || (opCodeValue == TR::loadaddr) || (opCodeValue == TR::instanceof) || (((opCodeValue == TR::New) || (opCodeValue == TR::anewarray || opCodeValue == TR::newarray)) && ///child->getFirstChild()->isNonNegative())) child->markedAllocationCanBeRemoved())) // opCodeValue == TR::multianewarray || // opCodeValue == TR::MergeNew) treeTopCanBeEliminated = true; } } else treeTopCanBeEliminated = true; } } // Fix for the case when a float to non-float conversion node swings // down past a branch on IA32; this would cause a FP value to be commoned // across a branch where there was none originally; this causes pblms // as a value is left on the stack. // if (treeTopCanBeEliminated && seenConditionalBranch) { if (!cg()->getSupportsJavaFloatSemantics()) { if (child->getOpCode().isConversion() || child->getOpCode().isBooleanCompare()) { if (child->getFirstChild()->getOpCode().isFloatingPoint() && !child->getOpCode().isFloatingPoint()) treeTopCanBeEliminated = false; } } } if (treeTopCanBeEliminated) { TR::NodeChecklist visited(comp()); bool containsFloatingPoint = false; for (int32_t i = 0; i < child->getNumChildren(); ++i) { // Anchor nodes with reference count > 1 // bool highGlobalIndex = false; if (fixUpTree(child->getChild(i), iter.currentTree(), visited, highGlobalIndex, self(), visitCount)) containsFloatingPoint = true; if (highGlobalIndex) { dumpOptDetails(comp(), "%sGlobal index limit exceeded; stopping\n", optDetailString()); return 0; } } if (seenConditionalBranch && containsFloatingPoint) { if (!cg()->getSupportsJavaFloatSemantics()) treeTopCanBeEliminated = false; } } } // Update visitCount as they are used in this optimization and need to be // correct at all intermediate stages // if (!treeTopCanBeEliminated) TR::TransformUtil::recursivelySetNodeVisitCount(node, visitCount); if (treeTopCanBeEliminated) { TR::TreeTop *prevTree = iter.currentTree()->getPrevTreeTop(); TR::TreeTop *nextTree = iter.currentTree()->getNextTreeTop(); if (!node->getOpCode().isStoreReg() || (node->getFirstChild()->getReferenceCount() == 1)) { // Actually going to remove the treetop now // if (performTransformation(comp(), "%sRemove tree : [" POINTER_PRINTF_FORMAT "] ([" POINTER_PRINTF_FORMAT "] = %s)\n", optDetailString(), node, node->getFirstChild(), node->getFirstChild()->getOpCode().getName())) { prevTree->join(nextTree); optimizer()->prepareForNodeRemoval(node); ///child->recursivelyDecReferenceCount(); node->recursivelyDecReferenceCount(); recursivelyDecFutureUseCount(child); iter.jumpTo(prevTree); if (child->getReferenceCount() == 1) requestOpt(OMR::treeSimplification, true, block); if (nextTree->getNode()->getOpCodeValue() == TR::Goto && prevTree->getNode()->getOpCodeValue() == TR::BBStart && !prevTree->getNode()->getBlock()->isExtensionOfPreviousBlock()) { requestOpt( OMR::redundantGotoElimination, prevTree->getNode()->getBlock()); } } } else { if (performTransformation(comp(), "%sMove tree : [" POINTER_PRINTF_FORMAT "]([" POINTER_PRINTF_FORMAT "] = %s) to end of block\n", optDetailString(), node, node->getFirstChild(), node->getFirstChild()->getOpCode().getName())) { prevTree->join(nextTree); node->setVisitCount(visitCount); TR::TreeTop *lastTree = findLastTreetop(block, prevTree); TR::TreeTop *prevLastTree = lastTree->getPrevTreeTop(); TR::TreeTop *cursorTreeTop = nextTree; while (cursorTreeTop != lastTree) { if (cursorTreeTop->getNode()->getOpCode().isStoreReg() && (cursorTreeTop->getNode()->getGlobalRegisterNumber() == iter.currentTree()->getNode()->getGlobalRegisterNumber())) { lastTree = cursorTreeTop; prevLastTree = lastTree->getPrevTreeTop(); break; } cursorTreeTop = cursorTreeTop->getNextTreeTop(); } if (lastTree->getNode()->getOpCodeValue() == TR::BBStart) { prevLastTree = lastTree; lastTree = block->getExit(); } TR::Node *lastNode = lastTree->getNode(); TR::Node *prevLastNode = prevLastTree->getNode(); if (lastNode->getOpCode().isIf() && !lastNode->getOpCode().isCompBranchOnly() && prevLastNode->getOpCode().isStoreReg() && ((prevLastNode->getFirstChild() == lastNode->getFirstChild()) || (prevLastNode->getFirstChild() == lastNode->getSecondChild()))) { lastTree = prevLastTree; prevLastTree = lastTree->getPrevTreeTop(); } prevLastTree->join(iter.currentTree()); iter.currentTree()->join(lastTree); iter.jumpTo(prevTree); requestOpt(OMR::treeSimplification, true, block); } } } } for (auto it = anchors.begin(); it != anchors.end(); ++it) { TR::Node *anchor = it->tree->getNode(); TR::Node *load = anchor->getChild(0); if (load->getReferenceCount() > 1) continue; // We can eliminate the indirect load immediately, but for the moment the // subtree providing the base object has to be anchored. TR::Node *heapBase = anchor->getChild(1); TR::Node::recreate(anchor, TR::treetop); anchor->setAndIncChild(0, load->getChild(0)); anchor->setChild(1, NULL); anchor->setNumChildren(1); if (!heapBase->getOpCode().isLoadConst()) { it->tree->insertAfter( TR::TreeTop::create( comp(), TR::Node::create(heapBase, TR::treetop, 1, heapBase))); } load->recursivelyDecReferenceCount(); heapBase->recursivelyDecReferenceCount(); // A later pass of dead trees can likely move (or even remove) the base // object expression. requestOpt(OMR::deadTreesElimination, true, it->block); } return 1; // actual cost }
// This opt tries to reduce merge backs from cold code that are the result of inliner // gnerated nopable virtual guards // It looks for one basic pattern // // guard1 -> cold1 // BBEND // BBSTART // guard2 -> cold2 // if guard1 is the guard for a method which calls the method guard2 protects or cold1 is // a predecessor of cold2 (a situation commonly greated by virtual guard tail splitter) we // can transform the guards as follows when guard1 and guard2 a // guard1 -> cold1 // BBEND // BBSTART // guard2 -> cold1 // This is safe because there are no trees between the guards and calling the caller will // result in the call to the callee if we need to patch guard2. cold2 and its mergebacks // can then be eliminated // // In addition this opt will try to move guard2 up from the end of a block to the // start of the block. We can do this if guard2 is an HCR guard and there is no GC point // between BBSTART and guard2 since HCR is a stop-the-world event. // // Finally, there is a simple tail splitting step run before the analysis of a guard if we // detect that the taken side of the guard merges back in the next block - this happens // for some empty methods and is common for Object.<init> at the top of constructors. int32_t TR_VirtualGuardHeadMerger::perform() { static char *disableVGHeadMergerTailSplitting = feGetEnv("TR_DisableVGHeadMergerTailSplitting"); TR::CFG *cfg = comp()->getFlowGraph(); // Cache the loads for the outer guard's cold path TR_BitVector coldPathLoads(comp()->trMemory()->currentStackRegion()); TR_BitVector privArgSymRefs(comp()->trMemory()->currentStackRegion()); bool evaluatedColdPathLoads = false; for (TR::Block *block = optimizer()->getMethodSymbol()->getFirstTreeTop()->getNode()->getBlock(); block; block = block->getNextBlock()) { TR::Node *guard1 = block->getLastRealTreeTop()->getNode(); if (isMergeableGuard(guard1)) { if (trace()) traceMsg(comp(), "Found mergeable guard in block_%d\n", block->getNumber()); TR::Block *cold1 = guard1->getBranchDestination()->getEnclosingBlock(); // check for an immediate merge back from the cold block and // tail split one block if we can - we only handle splitting a block // ending in a fallthrough, a branch or a goto for now for simplicity if (!disableVGHeadMergerTailSplitting && (cold1->getSuccessors().size() == 1) && cold1->hasSuccessor(block->getNextBlock()) && cold1->getLastRealTreeTop()->getNode()->getOpCode().isGoto()) { // TODO handle moving code earlier in the block down below the guard // tail split if ((block->getNextBlock()->getSuccessors().size() == 1) || ((block->getNextBlock()->getSuccessors().size() == 2) && block->getNextBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) && performTransformation(comp(), "%sCloning block_%d and placing clone after block_%d to reduce HCR guard nops\n", OPT_DETAILS, block->getNextBlock()->getNumber(), cold1->getNumber())) tailSplitBlock(block, cold1); } // guard motion is fairly complex but what we want to achieve around guard1 is a sequence // of relocated privarg blocks, followed by a sequence of runtime patchable guards going to // guard1's cold block, followed by a sequence of stop-the-world guards going to guard1's // cold block // // The following code is to setup the various insert points based on the following diagrams // of basic blocks: // // start: setup: end result after moving runtime guard' // | | +-------+ <-- privargIns // | | <-- privargIns | // +-------+ <-- runtimeIns +-------+ // | | | | Guard'| // | | V +-------+ <-- runtimeIns // +-------+ +-------+ | // | Guard | | Guard | V // +-------+ +-------+ <-- HCRIns +-------+ // | ===> | ===> | Guard | // V V +-------+ <-- HCRIns // +-------+ +-------+ | // | | | | V // | | | | +-------+ // // Note we always split the block - this may create an empty block but preserves the incoming // control flow we leave the rest to block extension to fix later block = block->split(block->getLastRealTreeTop(), cfg, true, false); TR::Block *privargIns = block->getPrevBlock(); TR::Block *runtimeIns = block->getPrevBlock(); TR::Block *HCRIns = block; // New outer guard so cold paths must be evaluated evaluatedColdPathLoads = false; // scan for candidate guards to merge with guard1 identified above for (TR::Block *nextBlock = block->getNextBlock(); nextBlock; nextBlock = nextBlock->getNextBlock()) { if (!(nextBlock->getPredecessors().size() == 1) || !nextBlock->hasPredecessor(block)) { break; } TR::TreeTop *guard2Tree = NULL; if (isMergeableGuard(nextBlock->getFirstRealTreeTop()->getNode())) { guard2Tree = nextBlock->getFirstRealTreeTop(); } else if (isMergeableGuard(nextBlock->getLastRealTreeTop()->getNode())) { guard2Tree = nextBlock->getLastRealTreeTop(); } else break; TR::Node *guard2 = guard2Tree->getNode(); TR::Block *guard2Block = nextBlock; // It is not possible to shift an OSR guard unless the destination is already an OSR point // as the necessary OSR state will not be available if (guard2->isOSRGuard() && !guard1->isOSRGuard()) break; TR::Block *insertPoint = isStopTheWorldGuard(guard2) ? HCRIns : runtimeIns; if (!safeToMoveGuard(insertPoint, guard2Tree, guard1->getBranchDestination(), privArgSymRefs)) break; // now we figure out if we can redirect guard2 to guard1's cold block // ie can we do the head merge TR::Block *cold2 = guard2->getBranchDestination()->getEnclosingBlock(); if (guard1->getInlinedSiteIndex() == guard2->getInlinedSiteIndex()) { if (trace()) traceMsg(comp(), " Guard1 [%p] is guarding the same call as Guard2 [%p] - proceeding with guard merging\n", guard1, guard2); } else if (guard2->getInlinedSiteIndex() > -1 && guard1->getInlinedSiteIndex() == TR::comp()->getInlinedCallSite(guard2->getInlinedSiteIndex())._byteCodeInfo.getCallerIndex()) { if (trace()) traceMsg(comp(), " Guard1 [%p] is the caller of Guard2 [%p] - proceeding with guard merging\n", guard1, guard2); } else if ((cold1->getSuccessors().size() == 1) && cold1->hasSuccessor(cold2)) { if (trace()) traceMsg(comp(), " Guard1 cold destination block_%d has guard2 cold destination block_%d as its only successor - proceeding with guard merging\n", cold1->getNumber(), cold2->getNumber()); } else { if (trace()) traceMsg(comp(), " Cold1 block_%d and cold2 block_%d of guard2 [%p] in unknown relationship - abandon the merge attempt\n", cold1->getNumber(), cold2->getNumber(), guard2); break; } // Runtime guards will shift their privargs, so it is necessary to check such a move is safe // This is possible if a privarg temp was recycled for the inner call site, with a prior use as an // argument for the outer call site. As the privargs for the inner call site must be evaluated before // both guards, this would result in the recycled temp holding the incorrect value if the guard is ever // taken. if (!isStopTheWorldGuard(guard2)) { if (!evaluatedColdPathLoads) { collectColdPathLoads(cold1, coldPathLoads); evaluatedColdPathLoads = true; } if (coldPathLoads.intersects(privArgSymRefs)) { if (trace()) traceMsg(comp(), " Recycled temp live in cold1 block_%d and used as privarg before guard2 [%p] - stop guard merging", cold1->getNumber(), guard2); break; } } if (!performTransformation(comp(), "%sRedirecting %s guard [%p] in block_%d to parent guard cold block_%d\n", OPT_DETAILS, isStopTheWorldGuard(guard2) ? "stop the world" : "runtime", guard2, guard2Block->getNumber(), cold1->getNumber())) continue; if (guard2->getBranchDestination() != guard1->getBranchDestination()) guard2Block->changeBranchDestination(guard1->getBranchDestination(), cfg); if (guard2Tree != guard2Block->getFirstRealTreeTop()) { cfg->setStructure(NULL); // We should leave code ahead of an HCR guard in place because: // 1, it might have side effect to runtime guards after it, moving it up might cause us to falsely merge // the subsequent runtime guards // 2, it might contain live monitor, moving it up above a guard can affect the monitor's live range if (!isStopTheWorldGuard(guard2)) { // the block created above guard2 contains only privarg treetops or monitor stores if // guard2 is a runtime-patchable guard and is safe to merge. We need to move the priv // args up to the runtime insert point and leave the monitor stores in place // It's safe to do so because there is no data dependency between the monitor store and // the priv arg store, because the priv arg store does not load the value from the temp // holding the monitored object // Split priv arg stores from monitor stores // Monitor store is generated for the caller of the method guard2 protects, so should appear before // priv arg stores for the method guard2 protects TR::Block *privargBlock = guard2Block; guard2Block = splitRuntimeGuardBlock(comp(), guard2Block, cfg); if (privargBlock != guard2Block) { if (trace()) traceMsg(comp(), " Moving privarg block_%d after block_%d\n", privargBlock->getNumber(), privargIns->getNumber()); moveBlockAfterDest(cfg, privargBlock, privargIns); if (HCRIns == privargIns) HCRIns = privargBlock; if (runtimeIns == privargIns) runtimeIns = privargBlock; privargIns = privargBlock; // refresh the insertPoint since it could be stale after the above updates insertPoint = runtimeIns; } } guard2Block = guard2Block->split(guard2Tree, cfg, true, false); if (trace()) traceMsg(comp(), " Created new block_%d to hold guard [%p] from block_%d\n", guard2Block->getNumber(), guard2, guard2Block->getNumber()); } if (insertPoint != guard2Block->getPrevBlock()) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "headMerger/%s_%s/(%s)", isStopTheWorldGuard(guard1) ? "stop the world" : "runtime", isStopTheWorldGuard(guard2) ? "stop the world" : "runtime", comp()->signature())); cfg->setStructure(NULL); block = nextBlock = guard2Block->getPrevBlock(); if (trace()) traceMsg(comp(), " Moving guard2 block block_%d after block_%d\n", guard2Block->getNumber(), insertPoint->getNumber()); moveBlockAfterDest(cfg, guard2Block, insertPoint); if (HCRIns == insertPoint) HCRIns = guard2Block; if (runtimeIns == insertPoint) runtimeIns = guard2Block; } else { block = guard2Block; } guard1 = guard2; } } } return 1; }
bool TR_LocalLiveRangeReduction::moveTreeBefore(TR_TreeRefInfo *treeToMove,TR_TreeRefInfo *anchor,int32_t passNumber) { TR::TreeTop *treeToMoveTT = treeToMove->getTreeTop(); TR::TreeTop *anchorTT = anchor->getTreeTop(); if (treeToMoveTT->getNextRealTreeTop() == anchorTT) { addDepPair(treeToMove, anchor); return false; } if (!performTransformation(comp(), "%sPass %d: moving tree [%p] before Tree %p\n", OPT_DETAILS, passNumber, treeToMoveTT->getNode(),anchorTT->getNode())) return false; // printf("Moving [%p] before Tree %p\n", treeToMoveTT->getNode(),anchorTT->getNode()); //changing location in block TR::TreeTop *origPrevTree = treeToMoveTT->getPrevTreeTop(); TR::TreeTop *origNextTree = treeToMoveTT->getNextTreeTop(); origPrevTree->setNextTreeTop(origNextTree); origNextTree->setPrevTreeTop(origPrevTree); TR::TreeTop *prevTree = anchorTT->getPrevTreeTop(); anchorTT->setPrevTreeTop(treeToMoveTT); treeToMoveTT->setNextTreeTop(anchorTT); treeToMoveTT->setPrevTreeTop(prevTree); prevTree->setNextTreeTop(treeToMoveTT); //UPDATE REFINFO //find locations of treeTops in TreeTopsRefInfo array //startIndex points to the currentTree that has moved //endIndex points to the treeTop after which we moved the tree (nextTree) int32_t startIndex = getIndexInArray(treeToMove); int32_t endIndex = getIndexInArray(anchor)-1; int32_t i=0; for ( i = startIndex+1; i<= endIndex ; i++) { TR_TreeRefInfo *currentTreeRefInfo = _treesRefInfoArray[i]; List<TR::Node> *firstList = currentTreeRefInfo->getFirstRefNodesList(); List<TR::Node> *midList = currentTreeRefInfo->getMidRefNodesList(); List<TR::Node> *lastList = currentTreeRefInfo->getLastRefNodesList(); List<TR::Node> *M_firstList = treeToMove->getFirstRefNodesList(); List<TR::Node> *M_midList = treeToMove->getMidRefNodesList(); List<TR::Node> *M_lastList = treeToMove->getLastRefNodesList(); if (trace()) { traceMsg(comp(),"Before move:\n"); printRefInfo(treeToMove); printRefInfo(currentTreeRefInfo); } updateRefInfo(treeToMove->getTreeTop()->getNode(), currentTreeRefInfo, treeToMove , false); treeToMove->resetSyms(); currentTreeRefInfo->resetSyms(); populatePotentialDeps(currentTreeRefInfo,currentTreeRefInfo->getTreeTop()->getNode()); populatePotentialDeps(treeToMove,treeToMove->getTreeTop()->getNode()); if (trace()) { traceMsg(comp(),"After move:\n"); printRefInfo(treeToMove); printRefInfo(currentTreeRefInfo); traceMsg(comp(),"------------------------\n"); } } TR_TreeRefInfo *temp = _treesRefInfoArray[startIndex]; for (i = startIndex; i< endIndex ; i++) { _treesRefInfoArray[i] = _treesRefInfoArray[i+1]; } _treesRefInfoArray[endIndex]=temp; #if defined(DEBUG) || defined(PROD_WITH_ASSUMES) if (!(comp()->getOption(TR_EnableParanoidOptCheck) || debug("paranoidOptCheck"))) return true; //verifier { TR::StackMemoryRegion stackMemoryRegion(*trMemory()); vcount_t visitCount = comp()->getVisitCount(); int32_t maxRefCount = 0; TR::TreeTop *tt; TR_TreeRefInfo **treesRefInfoArrayTemp = (TR_TreeRefInfo**)trMemory()->allocateStackMemory(_numTreeTops*sizeof(TR_TreeRefInfo*)); memset(treesRefInfoArrayTemp, 0, _numTreeTops*sizeof(TR_TreeRefInfo*)); TR_TreeRefInfo *treeRefInfoTemp; //collect info for ( int32_t i = 0; i<_numTreeTops-1; i++) { tt =_treesRefInfoArray[i]->getTreeTop(); treeRefInfoTemp = new (trStackMemory()) TR_TreeRefInfo(tt, trMemory()); collectRefInfo(treeRefInfoTemp, tt->getNode(),visitCount,&maxRefCount); treesRefInfoArrayTemp[i] = treeRefInfoTemp; } comp()->setVisitCount(visitCount+maxRefCount); for ( int32_t i = 0; i<_numTreeTops-1; i++) { if (!verifyRefInfo(treesRefInfoArrayTemp[i]->getFirstRefNodesList(),_treesRefInfoArray[i]->getFirstRefNodesList())) { printOnVerifyError(_treesRefInfoArray[i],treesRefInfoArrayTemp[i]); TR_ASSERT(0,"fail to verify firstRefNodesList for %p\n",_treesRefInfoArray[i]->getTreeTop()->getNode()); } if (!verifyRefInfo(treesRefInfoArrayTemp[i]->getMidRefNodesList(),_treesRefInfoArray[i]->getMidRefNodesList())) { printOnVerifyError(_treesRefInfoArray[i],treesRefInfoArrayTemp[i]); TR_ASSERT(0,"fail to verify midRefNodesList for %p\n",_treesRefInfoArray[i]->getTreeTop()->getNode()); } if (!verifyRefInfo(treesRefInfoArrayTemp[i]->getLastRefNodesList(),_treesRefInfoArray[i]->getLastRefNodesList())) { printOnVerifyError(_treesRefInfoArray[i],treesRefInfoArrayTemp[i]); TR_ASSERT(0,"fail to verify lastRefNodesList for %p\n",_treesRefInfoArray[i]->getTreeTop()->getNode()); } } } // scope of the stack memory region #endif return true; }
TR::Node * OMR::TransformUtil::scalarizeArrayCopy( TR::Compilation *comp, TR::Node *node, TR::TreeTop *tt, bool useElementType, bool &didTransformArrayCopyNode, TR::SymbolReference *sourceRef, TR::SymbolReference *targetRef, bool castToIntegral) { TR::CodeGenerator *cg = comp->cg(); didTransformArrayCopyNode = false; if ((comp->getOptLevel() == noOpt) || !comp->getOption(TR_ScalarizeSSOps) || node->getOpCodeValue() != TR::arraycopy || node->getNumChildren() != 3 || comp->requiresSpineChecks() || !node->getChild(2)->getOpCode().isLoadConst() || cg->getOptimizationPhaseIsComplete()) return node; int64_t byteLen = node->getChild(2)->get64bitIntegralValue(); if (byteLen == 0) { if (tt) { // Anchor the first two children if (!node->getFirstChild()->safeToDoRecursiveDecrement()) TR::TreeTop::create(comp, tt->getPrevTreeTop(), TR::Node::create(TR::treetop, 1, node->getFirstChild())); if (!node->getSecondChild()->safeToDoRecursiveDecrement()) TR::TreeTop::create(comp, tt->getPrevTreeTop(), TR::Node::create(TR::treetop, 1, node->getSecondChild())); tt->getPrevTreeTop()->join(tt->getNextTreeTop()); tt->getNode()->recursivelyDecReferenceCount(); didTransformArrayCopyNode = true; } return node; } else if (byteLen < 0) { return node; } else if (byteLen > TR_MAX_OTYPE_SIZE) { return node; } TR::DataType dataType = TR::Aggregate; // Get the element datatype from the (hidden) 4th child TR::DataType elementType = node->getArrayCopyElementType(); int32_t elementSize = TR::Symbol::convertTypeToSize(elementType); if (byteLen == elementSize) { dataType = elementType; } else if (!useElementType) { switch (byteLen) { case 1: dataType = TR::Int8; break; case 2: dataType = TR::Int16; break; case 4: dataType = TR::Int32; break; case 8: dataType = TR::Int64; break; } } else { return node; } // load/store double on 64-bit PPC requires offset to be word aligned // abort if this requirement is not met. // TODO: also need to check if the first two children are aload nodes bool cannot_use_load_store_long = false; if (TR::Compiler->target.cpu.isPower()) if (dataType == TR::Int64 && TR::Compiler->target.is64Bit()) { TR::Node * firstChild = node->getFirstChild(); if (firstChild->getNumChildren() == 2) { TR::Node *offsetChild = firstChild->getSecondChild(); TR_ASSERT(offsetChild->getOpCodeValue() != TR::iconst, "iconst shouldn't be used for 64-bit array indexing"); if (offsetChild->getOpCodeValue() == TR::lconst) { if ((offsetChild->getLongInt() & 0x3) != 0) cannot_use_load_store_long = true; } } TR::Node *secondChild = node->getSecondChild(); if (secondChild->getNumChildren() == 2) { TR::Node *offsetChild = secondChild->getSecondChild(); TR_ASSERT(offsetChild->getOpCodeValue() != TR::iconst, "iconst shouldn't be used for 64-bit array indexing"); if (offsetChild->getOpCodeValue() == TR::lconst) { if ((offsetChild->getLongInt() & 0x3) != 0) cannot_use_load_store_long = true; } } } if (cannot_use_load_store_long) return node; TR::SymbolReference *nodeRef; targetRef = comp->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0); sourceRef = targetRef; bool trace = comp->getOption(TR_TraceScalarizeSSOps); if (trace) traceMsg(comp,"scalarizeArrayCopy: node %p got targetRef (#%d) and sourceRef (#%d)\n", node,targetRef?targetRef->getReferenceNumber():-1,sourceRef?sourceRef->getReferenceNumber():-1); if (targetRef == NULL || sourceRef == NULL) { if (trace) traceMsg(comp,"do not scalarizeArrayCopy node %p : targetRef is NULL (%s) or sourceRef is NULL (%s)\n",node,targetRef?"no":"yes",sourceRef?"no":"yes"); return node; } #ifdef J9_PROJECT_SPECIFIC if (targetRef->getSymbol()->getDataType().isBCD() || sourceRef->getSymbol()->getDataType().isBCD()) { return node; } #endif if (performTransformation(comp, "%sScalarize arraycopy 0x%p\n", OPT_DETAILS, node)) { TR::Node *store = TR::TransformUtil::scalarizeAddressParameter(comp, node->getSecondChild(), byteLen, dataType, targetRef, true); TR::Node *load = TR::TransformUtil::scalarizeAddressParameter(comp, node->getFirstChild(), byteLen, dataType, sourceRef, false); if (tt) { // Transforming // treetop // arrayCopy <-- node // into // *store // node->recursivelyDecReferenceCount(); tt->setNode(node); } else { for (int16_t c = node->getNumChildren() - 1; c >= 0; c--) cg->recursivelyDecReferenceCount(node->getChild(c)); } TR::Node::recreate(node, store->getOpCodeValue()); node->setSymbolReference(store->getSymbolReference()); if (store->getOpCode().isStoreIndirect()) { node->setChild(0, store->getFirstChild()); node->setAndIncChild(1, load); node->setNumChildren(2); } else { node->setAndIncChild(0, load); node->setNumChildren(1); } didTransformArrayCopyNode = true; } return node; }
void TR_S390BinaryAnalyser::longSubtractAnalyser(TR::Node * root) { TR::Node * firstChild; TR::Node * secondChild; TR::Instruction * cursor = NULL; TR::RegisterDependencyConditions * dependencies = NULL; bool setsOrReadsCC = NEED_CC(root) || (root->getOpCodeValue() == TR::lusubb); TR::InstOpCode::Mnemonic regToRegOpCode; TR::InstOpCode::Mnemonic memToRegOpCode; TR::Compilation *comp = TR::comp(); if (TR::Compiler->target.is64Bit() || cg()->use64BitRegsOn32Bit()) { if (!setsOrReadsCC) { regToRegOpCode = TR::InstOpCode::SGR; memToRegOpCode = TR::InstOpCode::SG; } else { regToRegOpCode = TR::InstOpCode::SLGR; memToRegOpCode = TR::InstOpCode::SLG; } } else { regToRegOpCode = TR::InstOpCode::SLR; memToRegOpCode = TR::InstOpCode::SL; } firstChild = root->getFirstChild(); secondChild = root->getSecondChild(); TR::Register * firstRegister = firstChild->getRegister(); TR::Register * secondRegister = secondChild->getRegister(); setInputs(firstChild, firstRegister, secondChild, secondRegister, false, false, comp); /** Attempt to use SGH to subtract halfword (64 <- 16). * The second child is a halfword from memory */ bool is16BitMemory2Operand = false; if (TR::Compiler->target.cpu.getS390SupportsZ14() && secondChild->getOpCodeValue() == TR::s2l && secondChild->getFirstChild()->getOpCodeValue() == TR::sloadi && secondChild->isSingleRefUnevaluated() && secondChild->getFirstChild()->isSingleRefUnevaluated()) { setMem2(); memToRegOpCode = TR::InstOpCode::SGH; is16BitMemory2Operand = true; } if (getEvalChild1()) { firstRegister = cg()->evaluate(firstChild); } if (getEvalChild2()) { secondRegister = cg()->evaluate(secondChild); } remapInputs(firstChild, firstRegister, secondChild, secondRegister); if ((root->getOpCodeValue() == TR::lusubb) && TR_S390ComputeCC::setCarryBorrow(root->getChild(2), false, cg())) { // use SLBGR rather than SLGR/SGR // SLBG rather than SLG/SG // or // use SLBR rather than SLR // SLB rather than SL bool uses64bit = TR::Compiler->target.is64Bit() || cg()->use64BitRegsOn32Bit(); regToRegOpCode = uses64bit ? TR::InstOpCode::SLBGR : TR::InstOpCode::SLBR; memToRegOpCode = uses64bit ? TR::InstOpCode::SLBG : TR::InstOpCode::SLB; } if (TR::Compiler->target.is64Bit() || cg()->use64BitRegsOn32Bit()) { if (getCopyReg1()) { TR::Register * thirdReg = cg()->allocate64bitRegister(); root->setRegister(thirdReg); generateRRInstruction(cg(), TR::InstOpCode::LGR, root, thirdReg, firstRegister); if (getBinaryReg3Reg2()) { generateRRInstruction(cg(), regToRegOpCode, root, thirdReg, secondRegister); } else // assert getBinaryReg3Mem2() == true { TR::MemoryReference * longMR = generateS390MemoryReference(secondChild, cg()); generateRXInstruction(cg(), memToRegOpCode, root, thirdReg, longMR); longMR->stopUsingMemRefRegister(cg()); } } else if (getBinaryReg1Reg2()) { generateRRInstruction(cg(), regToRegOpCode, root, firstRegister, secondRegister); root->setRegister(firstRegister); } else // assert getBinaryReg1Mem2() == true { TR_ASSERT( !getInvalid(), "TR_S390BinaryAnalyser::invalid case\n"); TR::Node* baseAddrNode = is16BitMemory2Operand ? secondChild->getFirstChild() : secondChild; TR::MemoryReference * longMR = generateS390MemoryReference(baseAddrNode, cg()); generateRXInstruction(cg(), memToRegOpCode, root, firstRegister, longMR); longMR->stopUsingMemRefRegister(cg()); root->setRegister(firstRegister); if(is16BitMemory2Operand) { cg()->decReferenceCount(secondChild->getFirstChild()); } } } else // if 32bit codegen... { bool zArchTrexsupported = performTransformation(comp, "O^O Use SL/SLB for long sub."); TR::Register * highDiff = NULL; TR::LabelSymbol * doneLSub = TR::LabelSymbol::create(cg()->trHeapMemory(),cg()); if (getCopyReg1()) { TR::Register * lowThird = cg()->allocateRegister(); TR::Register * highThird = cg()->allocateRegister(); TR::RegisterPair * thirdReg = cg()->allocateConsecutiveRegisterPair(lowThird, highThird); highDiff = highThird; dependencies = new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 9, cg()); dependencies->addPostCondition(firstRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(firstRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(firstRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); // If 2nd operand has ref count of 1 and can be accessed by a memory reference, // then second register will not be used. if(secondRegister == firstRegister && !setsOrReadsCC) { TR_ASSERT( false, "lsub with identical children - fix Simplifier"); } if (secondRegister != NULL && firstRegister != secondRegister) { dependencies->addPostCondition(secondRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(secondRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(secondRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); } dependencies->addPostCondition(highThird, TR::RealRegister::AssignAny); root->setRegister(thirdReg); generateRRInstruction(cg(), TR::InstOpCode::LR, root, highThird, firstRegister->getHighOrder()); generateRRInstruction(cg(), TR::InstOpCode::LR, root, lowThird, firstRegister->getLowOrder()); if (getBinaryReg3Reg2()) { if ((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC) { generateRRInstruction(cg(), regToRegOpCode, root, lowThird, secondRegister->getLowOrder()); generateRRInstruction(cg(), TR::InstOpCode::SLBR, root, highThird, secondRegister->getHighOrder()); } else { generateRRInstruction(cg(), TR::InstOpCode::SR, root, highThird, secondRegister->getHighOrder()); generateRRInstruction(cg(), TR::InstOpCode::SLR, root, lowThird, secondRegister->getLowOrder()); } } else // assert getBinaryReg3Mem2() == true { TR::MemoryReference * highMR = generateS390MemoryReference(secondChild, cg()); TR::MemoryReference * lowMR = generateS390MemoryReference(*highMR, 4, cg()); dependencies->addAssignAnyPostCondOnMemRef(highMR); if ((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC) { generateRXInstruction(cg(), memToRegOpCode, root, lowThird, lowMR); generateRXInstruction(cg(), TR::InstOpCode::SLB, root, highThird, highMR); } else { generateRXInstruction(cg(), TR::InstOpCode::S, root, highThird, highMR); generateRXInstruction(cg(), TR::InstOpCode::SL, root, lowThird, lowMR); } highMR->stopUsingMemRefRegister(cg()); lowMR->stopUsingMemRefRegister(cg()); } } else if (getBinaryReg1Reg2()) { dependencies = new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 6, cg()); dependencies->addPostCondition(firstRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(firstRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(firstRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); if(secondRegister == firstRegister) { TR_ASSERT( false, "lsub with identical children - fix Simplifier"); } if (secondRegister != firstRegister) { dependencies->addPostCondition(secondRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(secondRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(secondRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); } if ((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC) { generateRRInstruction(cg(), regToRegOpCode, root, firstRegister->getLowOrder(), secondRegister->getLowOrder()); generateRRInstruction(cg(), TR::InstOpCode::SLBR, root, firstRegister->getHighOrder(), secondRegister->getHighOrder()); } else { generateRRInstruction(cg(), TR::InstOpCode::SR, root, firstRegister->getHighOrder(), secondRegister->getHighOrder()); generateRRInstruction(cg(), TR::InstOpCode::SLR, root, firstRegister->getLowOrder(), secondRegister->getLowOrder()); } highDiff = firstRegister->getHighOrder(); root->setRegister(firstRegister); } else // assert getBinaryReg1Mem2() == true { TR_ASSERT( !getInvalid(),"TR_S390BinaryAnalyser::invalid case\n"); dependencies = new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg()); dependencies->addPostCondition(firstRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(firstRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(firstRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); TR::MemoryReference * highMR = generateS390MemoryReference(secondChild, cg()); TR::MemoryReference * lowMR = generateS390MemoryReference(*highMR, 4, cg()); dependencies->addAssignAnyPostCondOnMemRef(highMR); if ((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC) { generateRXInstruction(cg(), memToRegOpCode, root, firstRegister->getLowOrder(), lowMR); generateRXInstruction(cg(), TR::InstOpCode::SLB, root, firstRegister->getHighOrder(), highMR); } else { generateRXInstruction(cg(), TR::InstOpCode::S, root, firstRegister->getHighOrder(), highMR); generateRXInstruction(cg(), TR::InstOpCode::SL, root, firstRegister->getLowOrder(), lowMR); } highDiff = firstRegister->getHighOrder(); root->setRegister(firstRegister); highMR->stopUsingMemRefRegister(cg()); lowMR->stopUsingMemRefRegister(cg()); } if (!((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC)) { // Check for overflow in LS int. If overflow, we are done. generateS390BranchInstruction(cg(), TR::InstOpCode::BRC,TR::InstOpCode::COND_MASK3, root, doneLSub); // Increment MS int due to overflow in LS int generateRIInstruction(cg(), TR::InstOpCode::AHI, root, highDiff, -1); generateS390LabelInstruction(cg(), TR::InstOpCode::LABEL, root, doneLSub, dependencies); } } cg()->decReferenceCount(firstChild); cg()->decReferenceCount(secondChild); return; }
void TR::DeadTreesElimination::prePerformOnBlocks() { _cannotBeEliminated = false; _delayedRegStores = false; _targetTrees.deleteAll(); // Walk through all the blocks to remove trivial dead trees of the form // treetop // => node // The problem with these trees is in the scenario where the earlier use // of 'node' is also dead. However, our analysis won't find that because // the reference count is > 1. vcount_t visitCount = comp()->incOrResetVisitCount(); for (TR::TreeTop *tt = comp()->getStartTree(); tt != 0; tt = tt->getNextTreeTop()) { bool removed = false; TR::Node *node = tt->getNode(); if (node->getOpCodeValue() == TR::treetop && node->getFirstChild()->getVisitCount() == visitCount && performTransformation(comp(), "%sRemove trivial dead tree: %p\n", optDetailString(), node)) { TR::TransformUtil::removeTree(comp(), tt); removed = true; } else { if (node->getOpCode().isCheck() && node->getFirstChild()->getOpCode().isCall() && node->getFirstChild()->getReferenceCount() == 1 && node->getFirstChild()->getSymbolReference()->getSymbol()->isResolvedMethod() && node->getFirstChild()->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->isSideEffectFree() && performTransformation(comp(), "%sRemove dead check of side-effect free call: %p\n", optDetailString(), node)) { TR::TransformUtil::removeTree(comp(), tt); removed = true; } } if (removed && tt->getNextTreeTop()->getNode()->getOpCodeValue() == TR::Goto && tt->getPrevTreeTop()->getNode()->getOpCodeValue() == TR::BBStart && !tt->getPrevTreeTop()->getNode()->getBlock()->isExtensionOfPreviousBlock()) { requestOpt(OMR::redundantGotoElimination, tt->getEnclosingBlock()); } if (node->getVisitCount() >= visitCount) continue; TR::TransformUtil::recursivelySetNodeVisitCount(tt->getNode(), visitCount); } // If the last use of an iRegLoad has been removed, then remove the node from // the BBStart and remove the corresponding dependency node from each of the block's // predecessors. // while (1) { bool glRegDepRemoved = false; for (TR::Block * b = comp()->getStartBlock(); b; b = b->getNextBlock()) { TR::TreeTop * startTT = b->getEntry(); TR::Node * startNode = startTT->getNode(); if (startNode->getNumChildren() > 0 && !debug("disableEliminationOfGlRegDeps")) { TR::Node * glRegDeps = startNode->getFirstChild(); TR_ASSERT(glRegDeps->getOpCodeValue() == TR::GlRegDeps, "expected TR::GlRegDeps"); for (int32_t i = glRegDeps->getNumChildren() - 1; i >= 0; --i) { TR::Node * dep = glRegDeps->getChild(i); if (dep->getReferenceCount() == 1 && (!dep->getOpCode().isFloatingPoint() || cg()->getSupportsJavaFloatSemantics()) && performTransformation(comp(), "%sRemove GlRegDep : %p\n", optDetailString(), glRegDeps->getChild(i))) { glRegDeps->removeChild(i); glRegDepRemoved = true; TR_GlobalRegisterNumber registerNum = dep->getGlobalRegisterNumber(); for (auto e = b->getPredecessors().begin(); e != b->getPredecessors().end(); ++e) { TR::Block * pred = toBlock((*e)->getFrom()); if (pred == comp()->getFlowGraph()->getStart()) continue; TR::Node * parent = pred->getLastRealTreeTop()->getNode(); if ( parent->getOpCode().isJumpWithMultipleTargets() && parent->getOpCode().hasBranchChildren()) { for (int32_t j = parent->getCaseIndexUpperBound() - 1; j > 0; --j) { TR::Node * caseNode = parent->getChild(j); TR_ASSERT(caseNode->getOpCode().isCase() || caseNode->getOpCodeValue() == TR::branch, "having problems navigating a switch"); if (caseNode->getBranchDestination() == startTT && caseNode->getNumChildren() > 0 && 0) // can't do this now that all glRegDeps are hung off the default branch removeGlRegDep(caseNode, registerNum, pred, this); } } else if (!parent->getOpCode().isReturn() && parent->getOpCodeValue() != TR::igoto && !( parent->getOpCode().isJumpWithMultipleTargets() && parent->getOpCode().hasBranchChildren()) && !(parent->getOpCodeValue()==TR::treetop && parent->getFirstChild()->getOpCode().isCall() && parent->getFirstChild()->getOpCode().isIndirect())) { if (pred->getNextBlock() == b) parent = pred->getExit()->getNode(); removeGlRegDep(parent, registerNum, pred, this); } } } } if (glRegDeps->getNumChildren() == 0) startNode->removeChild(0); } } if (!glRegDepRemoved) break; } }