void TR::ILValidator::updateNodeState(Location &newLocation) { TR::Node *node = newLocation.currentNode(); NodeState &state = _nodeStates[node]; if (node->getReferenceCount() == state._futureReferenceCount) { // First occurrence -- do some bookkeeping // if (node->getReferenceCount() == 0) { validityRule(newLocation, node->getOpCode().isTreeTop(), "Only nodes with isTreeTop opcodes can have refcount == 0"); } else { _liveNodes.add(node); } } if (_liveNodes.contains(node)) { validityRule(newLocation, state._futureReferenceCount >= 1, "Node already has reference count 0"); if (--state._futureReferenceCount == 0) { _liveNodes.remove(node); } } else { validityRule(newLocation, node->getOpCode().isTreeTop(), "Node has already gone dead"); } if (isLoggingEnabled()) { static const char *traceLiveNodesDuringValidation = feGetEnv("TR_traceLiveNodesDuringValidation"); if (traceLiveNodesDuringValidation && !_liveNodes.isEmpty()) { traceMsg(comp(), " -- Live nodes: {"); char *separator = ""; for (LiveNodeWindow::Iterator lnwi(_liveNodes); lnwi.currentNode(); ++lnwi) { traceMsg(comp(), "%sn%dn", separator, lnwi.currentNode()->getGlobalIndex()); separator = ", "; } traceMsg(comp(), "}\n"); } } }
static void removeGlRegDep(TR::Node * parent, TR_GlobalRegisterNumber registerNum, TR::Block *containingBlock, TR::Optimization *opt) { if (parent->getNumChildren() == 0) return; TR_ASSERT(parent->getNumChildren() > 0, "expected TR::GlRegDeps %p", parent); TR::Node * predGlRegDeps = parent->getLastChild(); if (predGlRegDeps->getOpCodeValue() != TR::GlRegDeps) // could be already removed return; TR_ASSERT(predGlRegDeps->getOpCodeValue() == TR::GlRegDeps, "expected TR::GlRegDeps"); for (int32_t i = predGlRegDeps->getNumChildren() - 1; i >= 0; --i) if (predGlRegDeps->getChild(i)->getGlobalRegisterNumber() == registerNum) { dumpOptDetails(opt->comp(), "%sRemove GlRegDep : %p\n", opt->optDetailString(), predGlRegDeps->getChild(i)); TR::Node *removedChild = predGlRegDeps->removeChild(i); if (removedChild->getReferenceCount() <= 1) { // The only remaining parent is the RegStore. Another pass of // deadTrees may be able to eliminate that. // opt->requestOpt(OMR::deadTreesElimination, true, containingBlock); } break; } if (predGlRegDeps->getNumChildren() == 0) parent->removeLastChild(); }
static TR::Register *l2fd(TR::Node *node, TR::RealRegister *target, TR_X86OpCodes opRegMem8, TR_X86OpCodes opRegReg8, TR::CodeGenerator *cg) { TR::Node *child = node->getFirstChild(); TR::MemoryReference *tempMR; TR_ASSERT(cg->useSSEForSinglePrecision(), "assertion failure"); if (child->getRegister() == NULL && child->getReferenceCount() == 1 && child->getOpCode().isLoadVar()) { tempMR = generateX86MemoryReference(child, cg); generateRegMemInstruction(opRegMem8, node, target, tempMR, cg); tempMR->decNodeReferenceCounts(cg); } else { TR::Register *intReg = cg->evaluate(child); generateRegRegInstruction(opRegReg8, node, target, intReg, cg); cg->decReferenceCount(child); } node->setRegister(target); return target; }
void TR::ValidateNodeRefCountWithinBlock::validate(TR::TreeTop *firstTreeTop, TR::TreeTop *exitTreeTop) { _nodeChecklist.empty(); for (TR::TreeTop *tt = firstTreeTop; tt != exitTreeTop->getNextTreeTop(); tt = tt->getNextTreeTop()) { TR::Node *node = tt->getNode(); node->setLocalIndex(node->getReferenceCount()); validateRefCountPass1(node); } /** * We start again from the start of the block, and check the localIndex to * make sure it is 0. * * NOTE: Walking the tree backwards causes huge stack usage in validateRefCountPass2. */ _nodeChecklist.empty(); for (TR::TreeTop *tt = firstTreeTop; tt != exitTreeTop->getNextTreeTop(); tt = tt->getNextTreeTop()) { validateRefCountPass2(tt->getNode()); } }
bool collectSymbolReferencesInNode(TR::Node *node, TR::SparseBitVector &symbolReferencesInNode, int32_t *numDeadSubNodes, vcount_t visitCount, TR::Compilation *comp, bool *seenInternalPointer, bool *seenArraylet, bool *cantMoveUnderBranch) { // The visit count in the node must be maintained by this method. // vcount_t oldVisitCount = node->getVisitCount(); if (oldVisitCount == visitCount || oldVisitCount == comp->getVisitCount()) return true; node->setVisitCount(comp->getVisitCount()); //diagnostic("Walking node %p, height=%d, oldVisitCount=%d, visitCount=%d, compVisitCount=%d\n", node, *height, oldVisitCount, visitCount,comp->getVisitCount()); // For all other subtrees collect all symbols that could be killed between // here and the next reference. // for (int32_t i = node->getNumChildren()-1; i >= 0; i--) { TR::Node *child = node->getChild(i); if (child->getFutureUseCount() == 1 && child->getReferenceCount() > 1 && !child->getOpCode().isLoadConst()) *numDeadSubNodes = (*numDeadSubNodes) + 1; collectSymbolReferencesInNode(child, symbolReferencesInNode, numDeadSubNodes, visitCount, comp, seenInternalPointer, seenArraylet, cantMoveUnderBranch); } // detect if this is a direct load that shouldn't be moved under a branch (because an update was moved past // this load by treeSimplification) if (cantMoveUnderBranch && (node->getOpCode().isLoadVarDirect() || node->getOpCode().isLoadReg()) && node->isDontMoveUnderBranch()) *cantMoveUnderBranch = true; if (seenInternalPointer && node->isInternalPointer() && node->getReferenceCount() > 1) *seenInternalPointer = true; if (seenArraylet) { if (node->getOpCode().hasSymbolReference() && node->getSymbolReference()->getSymbol()->isArrayletShadowSymbol() && node->getReferenceCount() > 1) { *seenArraylet = true; } } // Add this node's symbol reference to the set if (node->getOpCode().hasSymbolReference()) { symbolReferencesInNode[node->getSymbolReference()->getReferenceNumber()]=true; } return true; }
//returns true if there is first reference of a call or check bool TR_LocalLiveRangeReduction::containsCallOrCheck(TR_TreeRefInfo *treeRefInfo, TR::Node *node) { if ((node->getOpCode().isCall() && (node->getReferenceCount()==1 || treeRefInfo->getFirstRefNodesList()->find(node))) || node->getOpCode().isCheck()) { return true; } for (int32_t i = 0; i < node->getNumChildren(); i++) { TR::Node *child = node->getChild(i); if (child->getReferenceCount()==1 || treeRefInfo->getFirstRefNodesList()->find(child)) return containsCallOrCheck(treeRefInfo, child); } return false; }
void OMR::CodeGenerator::evaluateChildrenWithMultipleRefCount(TR::Node * node) { for (int i=0; i < node->getNumChildren(); i++) { TR::Node *child = node->getChild(i); if (child->getRegister() == NULL) // not already evaluated { // Note: we assume things without a symbol reference don't // necessarily need to be evaluated here, and can wait // until they are actually needed. // // vft pointers are speical - we need to evaluate the object in all cases // but for nopable virtual guards we can wait to load and mask the pointer // until we actually need to use it // if (child->getReferenceCount() > 1 && (child->getOpCode().hasSymbolReference() || (child->getOpCodeValue() == TR::l2a && child->getChild(0)->containsCompressionSequence()))) { TR::SymbolReference *vftPointerSymRef = TR::comp()->getSymRefTab()->element(TR::SymbolReferenceTable::vftSymbol); if (node->isNopableInlineGuard() && self()->getSupportsVirtualGuardNOPing() && child->getOpCodeValue() == TR::aloadi && child->getChild(0)->getOpCode().hasSymbolReference() && child->getChild(0)->getSymbolReference() == vftPointerSymRef && child->getChild(0)->getOpCodeValue() == TR::aloadi) { if (!child->getChild(0)->getChild(0)->getRegister() && child->getChild(0)->getChild(0)->getReferenceCount() > 1) self()->evaluate(child->getChild(0)->getChild(0)); else self()->evaluateChildrenWithMultipleRefCount(child->getChild(0)->getChild(0)); } else { self()->evaluate(child); } } else { self()->evaluateChildrenWithMultipleRefCount(child); } } } }
// TODO:AMD64: Could this be combined with istoreEvaluator without too much ugliness? TR::Register *OMR::X86::AMD64::TreeEvaluator::lstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg) { TR::Node *valueChild; TR::Compilation* comp = cg->comp(); if (node->getOpCode().isIndirect()) valueChild = node->getSecondChild(); else valueChild = node->getFirstChild(); // Handle special cases // if (valueChild->getRegister() == NULL && valueChild->getReferenceCount() == 1) { // Special case storing a double value into long variable // if (valueChild->getOpCodeValue() == TR::dbits2l && !valueChild->normalizeNanValues()) { if (node->getOpCode().isIndirect()) { node->setChild(1, valueChild->getFirstChild()); TR::Node::recreate(node, TR::dstorei); TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg); node->setChild(1, valueChild); TR::Node::recreate(node, TR::lstorei); } else { node->setChild(0, valueChild->getFirstChild()); TR::Node::recreate(node, TR::dstore); TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg); node->setChild(0, valueChild); TR::Node::recreate(node, TR::lstore); } cg->decReferenceCount(valueChild); return NULL; } } return TR::TreeEvaluator::integerStoreEvaluator(node, cg); }
TR::Register *OMR::X86::AMD64::TreeEvaluator::l2iEvaluator(TR::Node *node, TR::CodeGenerator *cg) { TR::Node *child = node->getFirstChild(); TR::Register *reg = cg->evaluate(child); if (child->getReferenceCount() > 1) { // This catches two scenarios: // // 1) A longClobberEvaluate (or any other register-clobbering logic) on // the l2i node could see a refcount of 1, and hence won't make a copy. // If child's refcount is more than 1, we do in fact need a copy, so we'd // better do it here. // // 2) If the child is commoned, and the l2i node is also commoned, then // we may end up with a situation where the last evaluation of the child // is a clobberEvaluate. By that time, the child's refcount would be 1, // so no copy is made, and the register would be clobbered. Therefore, // the l2i node can't return that same register, or else the other uses // of the node will end up getting the clobbered value. // // Note that case 2 is conservative, in that it presumes that the child's // register will be clobbered by another node. If this does not occur, // then the copy we're about to make is unnecessary. // TR::Register *childReg = reg; reg = cg->allocateRegister(); // to support signExtension in GRA, need to preserve upper word // in this move generateRegRegInstruction(MOV8RegReg, node, reg, childReg, cg); } node->setRegister(reg); cg->decReferenceCount(child); if (cg->enableRegisterInterferences() && node->getOpCode().getSize() == 1) cg->getLiveRegisters(TR_GPR)->setByteRegisterAssociation(node->getRegister()); return reg; }
/** * In pass_1(validateRefCountPass1), the Local Index (which is set to the Ref * Count) for each child is decremented for each visit. The second pass is to * make sure that the Local Index is zero by the end of the block. A non-zero * Local Index would indicate that the Ref count was wrong at the start * of the Validation Process. */ void TR::ValidateNodeRefCountWithinBlock::validateRefCountPass1(TR::Node *node) { /* If this is the first time through this node, verify the children. */ if (!_nodeChecklist.isSet(node->getGlobalIndex())) { _nodeChecklist.set(node->getGlobalIndex()); for (int32_t i = node->getNumChildren() - 1; i >= 0; --i) { TR::Node *child = node->getChild(i); if (_nodeChecklist.isSet(child->getGlobalIndex())) { /* If the child has already been visited, decrement its verifyRefCount. */ child->decLocalIndex(); } else { /* If the child has not yet been visited, set its localIndex and visit it. */ child->setLocalIndex(child->getReferenceCount() - 1); validateRefCountPass1(child); } } } }
TR::Register *TR_X86FPCompareAnalyser::fpCompareAnalyser(TR::Node *root, TR_X86OpCodes cmpRegRegOpCode, TR_X86OpCodes cmpRegMemOpCode, TR_X86OpCodes cmpiRegRegOpCode, bool useFCOMIInstructions) { TR::Node *firstChild, *secondChild; TR::ILOpCodes cmpOp = root->getOpCodeValue(); bool reverseMemOp = false; bool reverseCmpOp = false; TR::Compilation* comp = _cg->comp(); TR_X86OpCodes cmpInstr = useFCOMIInstructions ? cmpiRegRegOpCode : cmpRegRegOpCode; // Some operators must have their operands swapped to improve the generated // code needed to evaluate the result of the comparison. // bool mustSwapOperands = (cmpOp == TR::iffcmple || cmpOp == TR::ifdcmple || cmpOp == TR::iffcmpgtu || cmpOp == TR::ifdcmpgtu || cmpOp == TR::fcmple || cmpOp == TR::dcmple || cmpOp == TR::fcmpgtu || cmpOp == TR::dcmpgtu || (useFCOMIInstructions && (cmpOp == TR::iffcmplt || cmpOp == TR::ifdcmplt || cmpOp == TR::iffcmpgeu || cmpOp == TR::ifdcmpgeu || cmpOp == TR::fcmplt || cmpOp == TR::dcmplt || cmpOp == TR::fcmpgeu || cmpOp == TR::dcmpgeu))) ? true : false; // Some operators should not have their operands swapped to improve the generated // code needed to evaluate the result of the comparison. // bool preventOperandSwapping = (cmpOp == TR::iffcmpltu || cmpOp == TR::ifdcmpltu || cmpOp == TR::iffcmpge || cmpOp == TR::ifdcmpge || cmpOp == TR::fcmpltu || cmpOp == TR::dcmpltu || cmpOp == TR::fcmpge || cmpOp == TR::dcmpge || (useFCOMIInstructions && (cmpOp == TR::iffcmpgt || cmpOp == TR::ifdcmpgt || cmpOp == TR::iffcmpleu || cmpOp == TR::ifdcmpleu || cmpOp == TR::fcmpgt || cmpOp == TR::dcmpgt || cmpOp == TR::fcmpleu || cmpOp == TR::dcmpleu))) ? true : false; // For correctness, don't swap operands of these operators. // if (cmpOp == TR::fcmpg || cmpOp == TR::fcmpl || cmpOp == TR::dcmpg || cmpOp == TR::dcmpl) { preventOperandSwapping = true; } // Initial operand evaluation ordering. // if (preventOperandSwapping || (!mustSwapOperands && _cg->whichChildToEvaluate(root) == 0)) { firstChild = root->getFirstChild(); secondChild = root->getSecondChild(); setReversedOperands(false); } else { firstChild = root->getSecondChild(); secondChild = root->getFirstChild(); setReversedOperands(true); } TR::Register *firstRegister = firstChild->getRegister(); TR::Register *secondRegister = secondChild->getRegister(); setInputs(firstChild, firstRegister, secondChild, secondRegister, useFCOMIInstructions, // If either 'preventOperandSwapping' or 'mustSwapOperands' is set then the // initial operand ordering set above must be maintained. // preventOperandSwapping || mustSwapOperands); // Make sure any required operand ordering is respected. // if ((getCmpReg2Reg1() || getCmpReg2Mem1()) && (mustSwapOperands || preventOperandSwapping)) { reverseCmpOp = getCmpReg2Reg1() ? true : false; reverseMemOp = getCmpReg2Mem1() ? true : false; } // If we are not comparing with a memory operand, one of them evaluates // to a zero, and the zero is not already on the stack, then we can use // FTST to save a register. // // (With a memory operand, either the constant zero needs to be loaded // to use FCOM, or the memory operand needs to be loaded to use FTST, // so there is no gain in using FTST.) // // If the constant zero is in the target register, using FTST means the // comparison will be reversed. We cannot do this if the initial ordering // of the operands must be maintained. // // Finally, if FTST is used and this is the last use of the target, the // target register may need to be explicitly popped. // TR::Register *targetRegisterForFTST = NULL; TR::Node *targetChildForFTST = NULL; if (getEvalChild1() && isUnevaluatedZero(firstChild)) // do we need getEvalChild1() here? { if ( ((getCmpReg1Reg2() || reverseCmpOp) && !(preventOperandSwapping || mustSwapOperands)) || (getCmpReg2Reg1() && !reverseCmpOp)) { if (getEvalChild2()) { secondRegister = _cg->evaluate(secondChild); } targetRegisterForFTST = secondRegister; targetChildForFTST = secondChild; notReversedOperands(); } } else if (getEvalChild2() && isUnevaluatedZero(secondChild)) // do we need getEvalChild2() here? { if ( (getCmpReg1Reg2() || reverseCmpOp) || (getCmpReg2Reg1() && !reverseCmpOp && !(preventOperandSwapping || mustSwapOperands)) ) { if (getEvalChild1()) { firstRegister = _cg->evaluate(firstChild); } targetRegisterForFTST = firstRegister; targetChildForFTST = firstChild; } } if (!targetRegisterForFTST) { // If we have a choice, evaluate the target operand last. By doing so, we // help out the register assigner because the target must be TOS. This // avoids an unneccessary FXCH for the target. // if (getEvalChild1() && getEvalChild2()) { if (getCmpReg1Reg2() || getCmpReg1Mem2()) { secondRegister = _cg->evaluate(secondChild); firstRegister = _cg->evaluate(firstChild); } else { firstRegister = _cg->evaluate(firstChild); secondRegister = _cg->evaluate(secondChild); } } else { if (getEvalChild1()) { firstRegister = _cg->evaluate(firstChild); } if (getEvalChild2()) { secondRegister = _cg->evaluate(secondChild); } } } // Adjust the FP precision of feeding operands. // if (firstRegister && (firstRegister->needsPrecisionAdjustment() || comp->getOption(TR_StrictFPCompares) || (firstRegister->mayNeedPrecisionAdjustment() && secondChild->getOpCode().isLoadConst()) || (firstRegister->mayNeedPrecisionAdjustment() && !secondRegister))) { TR::TreeEvaluator::insertPrecisionAdjustment(firstRegister, root, _cg); } if (secondRegister && (secondRegister->needsPrecisionAdjustment() || comp->getOption(TR_StrictFPCompares) || (secondRegister->mayNeedPrecisionAdjustment() && firstChild->getOpCode().isLoadConst()) || (secondRegister->mayNeedPrecisionAdjustment() && !firstRegister))) { TR::TreeEvaluator::insertPrecisionAdjustment(secondRegister, root, _cg); } // Generate the compare instruction. // if (targetRegisterForFTST) { generateFPRegInstruction(FTSTReg, root, targetRegisterForFTST, _cg); } else if (!useFCOMIInstructions && (getCmpReg1Mem2() || reverseMemOp)) { TR::MemoryReference *tempMR = generateX86MemoryReference(secondChild, _cg); generateFPRegMemInstruction(cmpRegMemOpCode, root, firstRegister, tempMR, _cg); tempMR->decNodeReferenceCounts(_cg); } else if (!useFCOMIInstructions && getCmpReg2Mem1()) { TR::MemoryReference *tempMR = generateX86MemoryReference(firstChild, _cg); generateFPRegMemInstruction(cmpRegMemOpCode, root, secondRegister, tempMR, _cg); notReversedOperands(); tempMR->decNodeReferenceCounts(_cg); } else if (getCmpReg1Reg2() || reverseCmpOp) { generateFPCompareRegRegInstruction(cmpInstr, root, firstRegister, secondRegister, _cg); } else if (getCmpReg2Reg1()) { generateFPCompareRegRegInstruction(cmpInstr, root, secondRegister, firstRegister, _cg); notReversedOperands(); } _cg->decReferenceCount(firstChild); _cg->decReferenceCount(secondChild); // Evaluate the comparison. // if (getReversedOperands()) { cmpOp = TR::ILOpCode(cmpOp).getOpCodeForSwapChildren(); TR::Node::recreate(root, cmpOp); } if (useFCOMIInstructions && !targetRegisterForFTST) { return NULL; } // We must manually move the FP condition flags to the EFLAGS register if we don't // use the FCOMI instructions. // TR::Register *accRegister = _cg->allocateRegister(); TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions((uint8_t)1, 1, _cg); dependencies->addPreCondition(accRegister, TR::RealRegister::eax, _cg); dependencies->addPostCondition(accRegister, TR::RealRegister::eax, _cg); generateRegInstruction(STSWAcc, root, accRegister, dependencies, _cg); // Pop the FTST target register if it is not used any more. // if (targetRegisterForFTST && targetChildForFTST && targetChildForFTST->getReferenceCount() == 0) { generateFPSTiST0RegRegInstruction(FSTRegReg, root, targetRegisterForFTST, targetRegisterForFTST, _cg); } return accRegister; }
int32_t OMR::X86::I386::CodeGenerator::getMaximumNumberOfGPRsAllowedAcrossEdge(TR::Node *node) { // TODO: Currently, lookupEvaluator doesn't deal properly with different // glRegDeps on different cases of a lookupswitch. // static const char *enableLookupswitch = feGetEnv("TR_enableGRAAcrossLookupSwitch"); if (!enableLookupswitch && node->getOpCode().getOpCodeValue()==TR::lookup) return 1; if (node->getOpCode().getOpCodeValue()==TR::table) { // 1 for jump table base reg, which is not apparent in the trees // 1 for ebp when it is needed for the VMThread // return self()->getNumberOfGlobalGPRs() - 2; } if (node->getOpCode().isIf()) { // we run out of all but one/two registers in these cases // if (node->getFirstChild()->getType().isInt64()) { if (node->getOpCode().isBranch()) { TR::Node *firstChild = node->getFirstChild(); TR::Node *secondChild = node->getSecondChild(); int extraRegsAvailable = 0; if(firstChild->getOpCodeValue() == TR::d2l || secondChild->getOpCodeValue() == TR::d2l) { return 1; } if ((firstChild->getReferenceCount() == 1 && firstChild->getOpCode().isLoadVarDirect()) || (secondChild->getReferenceCount() == 1 && firstChild->getOpCode().isLoadVarDirect())) extraRegsAvailable += 0; // TODO: put it back to 2 when looking at GRA, GRA pushes allocation of 8 registers return 2 + extraRegsAvailable; } else { // TR_lcmpXX opcodes take up 5 regs // return 1; } } // we run out of all but one register in these cases....last time I tried.... // if (node->getFirstChild()->getOpCodeValue() == TR::instanceof) { if (!TR::TreeEvaluator::instanceOfOrCheckCastNeedSuperTest(node->getFirstChild(), self()) && TR::TreeEvaluator::instanceOfOrCheckCastNeedEqualityTest(node->getFirstChild(), self())) return self()->getNumberOfGlobalGPRs() - 4; // ebp plus three other regs if vft masking is enabled else return 0; } // All other conditional branches, we usually need one reg for the compare and possibly one for the vmthread //return getNumberOfGlobalGPRs() - 1 - (node->isVMThreadRequired()? 1 : 0); // vmThread required might be set on a node after GRA has ran return self()->getNumberOfGlobalGPRs() - 2; } return INT_MAX; }
TR::Register * OMR::CodeGenerator::evaluate(TR::Node * node) { TR::Register *reg; bool trace = self()->comp()->getOptions()->getTraceCGOption(TR_TraceCGEvaluation); TR::ILOpCodes opcode = node->getOpCodeValue(); TR_ASSERT(!self()->comp()->getOption(TR_EnableParanoidRefCountChecks) || node->getOpCode().isTreeTop() || node->getReferenceCount() > 0, "OMR::CodeGenerator::evaluate invoked for nontreetop node [%s] with count == 0", node->getName(self()->comp()->getDebug())); if (opcode != TR::BBStart && node->getRegister()) { reg = node->getRegister(); if (trace) { self()->getDebug()->printNodeEvaluation(node, ": ", reg); } } else { if (trace) { self()->getDebug()->printNodeEvaluation(node); _indentation += 2; } // Evaluation of a TR IL tree can be performed by many functions: // // 1) evaluate(...) // 2) populateMemoryReference(...) // 3) populateAddTree(...) // ... // // However all of these functions can be categorized into two classes: // // A) functions which completely evaluate their subtree. // B) functions which partially evaluate their subtree. // // Because functions of class A and class B can be used interchangably to // perform a recursive decent of a TR IL tree, and because A or B functions // can perform a destructive evaluation of their subtree, a bug can occur where // the results of a partial evalutation are destructively overwritten before // being completely evalutated. // // Ex: the motivating case is the following evaluation pattern: // // node_A evaluate // node_B populateMemoryReference // node_C evaluate // // where // // a) there is a common node between the subtrees of node_B and node_C. // b) calling populateMemoryReference on node_B reduces the reference count of one // of the base or index nodes to 1, creating the "opportunity" for a destructive // evaluation. // // The following chain of events occurs: // // 1) evaluate is called on node_A. This evaluator can produce instructions of RX form, // and chooses to do so for this node. // // 2) populateMemoryReference is called on node_B, and evaluates the subtree, returning // a TR_MemoryReference to node_A's evaluator. This memory reference has not been // dereferenced yet, and the base (and optionally index) nodes may have registers // assigned to them. // // 3) evaluate is called on node_C, which chooses to destructively evaluate the commoned // base node. The memory reference's base register now contains a garbage value. // // 4) control passes to node_A's evaluator, which emits an RX instruction using node_B's // memory reference and node_C's register. // // In the past, the fix for this was to switch the order of evaluation: call evaluate // on node_C and then call populateMemoryReference on node_B. This fixes this scenario, but // the capability of another tree and evaluation pattern to create this bug still exists. // // As well, more insidious trees exist: // // ificmpeq // iiload // i2l // x // iiload // ishl // ==> i2l // 4 // // The evaluation pattern here could be: // // evaluate // populateMemoryReference // evaluate // populateMemoryReference // evaluate // // If the commoned node's reference count is 2 coming into ificmpeq's evaluator, then // the second sub-evaluate call could be destructive to the first populateMemoryReference. // // Even worse, if either subtree could be destructive, then there would be no correct order to // perform the function calls: // // ificmpeq // iiload // ishl // ==> x // 7 // iiload // ishl // ==> x // 4 // // Generally, two conditions must be true for this bug to be possible: // // 1) the following two classes of recursive decent functions must exist: // A) functions which completely evaluate their subtree. // B) functions which partially evaluate their subtree. // // 2) destructive evaluation by either class of function must be possible. // // This code implements changes to eliminate the second condition for this bug by performing // the following check and fixup: // // If in a function which partially evaluates its subtree, note all non-restricted nodes that // have a reference count > 1. If any of those node's reference counts reach 1, then artificially // inflate those reference counts by 1 for the lifetime of the parent evaluation. // int32_t topOfNodeStackBeforeEvaluation = _stackOfArtificiallyInflatedNodes.topIndex(); // Memory references are not like registers, and should not be allowed to escape their evaluator. // Explicitly note memory references that are not loaded into registers and automatically call // stopUsingMemRefRegister on all memory references that have "escaped". // // Only the s390 memory references are tracked in this way. // int32_t topOfMemRefStackBeforeEvaluation = _stackOfMemoryReferencesCreatedDuringEvaluation.topIndex(); reg = _nodeToInstrEvaluators[opcode](node, self()); if (self()->comp()->getOptions()->getTraceCGOption(TR_TraceCGEvaluation)) { self()->getDebug()->printNodeEvaluation(node, "<- ", reg, false); _indentation -= 2; } if (self()->comp()->getOption(TR_TraceRegisterPressureDetails)) { traceMsg(self()->comp(), " evaluated %s", self()->getDebug()->getName(node)); self()->getDebug()->dumpLiveRegisters(); traceMsg(self()->comp(), "\n"); } // Pop off and decrement tracked nodes // while (_stackOfArtificiallyInflatedNodes.topIndex() > topOfNodeStackBeforeEvaluation) { TR::Node * artificiallyInflatedNode = _stackOfArtificiallyInflatedNodes.pop(); if (artificiallyInflatedNode->getReferenceCount() == 1) { // When inflating reference counts, two cases exist: // // 1) N's ref count reaches 1 in a populate* call, which is then inc'ed to 2. // // 1a) N is never evaluated, so the ref count never goes down to 1. (node was not commoned in another subtree) // // - no tree difference should be seen in this case. // // 1b) N is evaluated, so the ref count then goes down to 1. (node was commoned in another subtree) // // - register shuffling _could_ be seen in this case. // - but a bug might have been avoided: partial and complete evaluation of a commoned node occurred. // if (self()->comp()->getOption(TR_TraceCG)) { self()->comp()->getDebug()->trace(" _stackOfArtificiallyInflatedNodes.pop(): node %p part of commoned case, might have avoided a bug!\n", artificiallyInflatedNode); } } self()->decReferenceCount(artificiallyInflatedNode); #ifdef J9_PROJECT_SPECIFIC #if defined(TR_TARGET_S390) if (artificiallyInflatedNode->getOpaquePseudoRegister()) { TR_OpaquePseudoRegister *reg = artificiallyInflatedNode->getOpaquePseudoRegister(); TR_StorageReference *ref = reg->getStorageReference(); self()->processUnusedStorageRef(ref); } #endif #endif if (self()->comp()->getOption(TR_TraceCG)) { self()->comp()->getDebug()->trace(" _stackOfArtificiallyInflatedNodes.pop() %p, decReferenceCount(...) called. reg=%s\n", artificiallyInflatedNode, artificiallyInflatedNode->getRegister()?artificiallyInflatedNode->getRegister()->getRegisterName(self()->comp()):"null"); } } #if defined(TR_TARGET_S390) self()->StopUsingEscapedMemRefsRegisters(topOfMemRefStackBeforeEvaluation); #endif bool checkRefCount = (node->getReferenceCount() <= 1 || (reg && reg == node->getRegister())); // for anchor mode, if node is an indirect store, it can have // ref count <= 2 // but for compressedRefs, the indirect store must be an address if (self()->comp()->useAnchors()) { if (((node->getOpCode().isStoreIndirect() && (self()->comp()->useCompressedPointers() && (node->getSymbolReference()->getSymbol()->getDataType() == TR::Address))) || opcode == TR::wrtbari) && node->getReferenceCount() <= 2 && !checkRefCount) checkRefCount = true; } TR_ASSERT(checkRefCount, "evaluate: the node's register wasn't set (node [%s])", node->getName(self()->comp()->getDebug())); } return reg; }
void TR::DeadTreesElimination::prePerformOnBlocks() { _cannotBeEliminated = false; _delayedRegStores = false; _targetTrees.deleteAll(); // Walk through all the blocks to remove trivial dead trees of the form // treetop // => node // The problem with these trees is in the scenario where the earlier use // of 'node' is also dead. However, our analysis won't find that because // the reference count is > 1. vcount_t visitCount = comp()->incOrResetVisitCount(); for (TR::TreeTop *tt = comp()->getStartTree(); tt != 0; tt = tt->getNextTreeTop()) { bool removed = false; TR::Node *node = tt->getNode(); if (node->getOpCodeValue() == TR::treetop && node->getFirstChild()->getVisitCount() == visitCount && performTransformation(comp(), "%sRemove trivial dead tree: %p\n", optDetailString(), node)) { TR::TransformUtil::removeTree(comp(), tt); removed = true; } else { if (node->getOpCode().isCheck() && node->getFirstChild()->getOpCode().isCall() && node->getFirstChild()->getReferenceCount() == 1 && node->getFirstChild()->getSymbolReference()->getSymbol()->isResolvedMethod() && node->getFirstChild()->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->isSideEffectFree() && performTransformation(comp(), "%sRemove dead check of side-effect free call: %p\n", optDetailString(), node)) { TR::TransformUtil::removeTree(comp(), tt); removed = true; } } if (removed && tt->getNextTreeTop()->getNode()->getOpCodeValue() == TR::Goto && tt->getPrevTreeTop()->getNode()->getOpCodeValue() == TR::BBStart && !tt->getPrevTreeTop()->getNode()->getBlock()->isExtensionOfPreviousBlock()) { requestOpt(OMR::redundantGotoElimination, tt->getEnclosingBlock()); } if (node->getVisitCount() >= visitCount) continue; TR::TransformUtil::recursivelySetNodeVisitCount(tt->getNode(), visitCount); } // If the last use of an iRegLoad has been removed, then remove the node from // the BBStart and remove the corresponding dependency node from each of the block's // predecessors. // while (1) { bool glRegDepRemoved = false; for (TR::Block * b = comp()->getStartBlock(); b; b = b->getNextBlock()) { TR::TreeTop * startTT = b->getEntry(); TR::Node * startNode = startTT->getNode(); if (startNode->getNumChildren() > 0 && !debug("disableEliminationOfGlRegDeps")) { TR::Node * glRegDeps = startNode->getFirstChild(); TR_ASSERT(glRegDeps->getOpCodeValue() == TR::GlRegDeps, "expected TR::GlRegDeps"); for (int32_t i = glRegDeps->getNumChildren() - 1; i >= 0; --i) { TR::Node * dep = glRegDeps->getChild(i); if (dep->getReferenceCount() == 1 && (!dep->getOpCode().isFloatingPoint() || cg()->getSupportsJavaFloatSemantics()) && performTransformation(comp(), "%sRemove GlRegDep : %p\n", optDetailString(), glRegDeps->getChild(i))) { glRegDeps->removeChild(i); glRegDepRemoved = true; TR_GlobalRegisterNumber registerNum = dep->getGlobalRegisterNumber(); for (auto e = b->getPredecessors().begin(); e != b->getPredecessors().end(); ++e) { TR::Block * pred = toBlock((*e)->getFrom()); if (pred == comp()->getFlowGraph()->getStart()) continue; TR::Node * parent = pred->getLastRealTreeTop()->getNode(); if ( parent->getOpCode().isJumpWithMultipleTargets() && parent->getOpCode().hasBranchChildren()) { for (int32_t j = parent->getCaseIndexUpperBound() - 1; j > 0; --j) { TR::Node * caseNode = parent->getChild(j); TR_ASSERT(caseNode->getOpCode().isCase() || caseNode->getOpCodeValue() == TR::branch, "having problems navigating a switch"); if (caseNode->getBranchDestination() == startTT && caseNode->getNumChildren() > 0 && 0) // can't do this now that all glRegDeps are hung off the default branch removeGlRegDep(caseNode, registerNum, pred, this); } } else if (!parent->getOpCode().isReturn() && parent->getOpCodeValue() != TR::igoto && !( parent->getOpCode().isJumpWithMultipleTargets() && parent->getOpCode().hasBranchChildren()) && !(parent->getOpCodeValue()==TR::treetop && parent->getFirstChild()->getOpCode().isCall() && parent->getFirstChild()->getOpCode().isIndirect())) { if (pred->getNextBlock() == b) parent = pred->getExit()->getNode(); removeGlRegDep(parent, registerNum, pred, this); } } } } if (glRegDeps->getNumChildren() == 0) startNode->removeChild(0); } } if (!glRegDepRemoved) break; } }
int32_t TR::DeadTreesElimination::process(TR::TreeTop *startTree, TR::TreeTop *endTree) { TR::StackMemoryRegion stackRegion(*comp()->trMemory()); LongestPathMap longestPaths(std::less<TR::Node*>(), stackRegion); typedef TR::typed_allocator<CRAnchor, TR::Region&> CRAnchorAlloc; typedef TR::forward_list<CRAnchor, CRAnchorAlloc> CRAnchorList; CRAnchorList anchors(stackRegion); vcount_t visitCount = comp()->incOrResetVisitCount(); TR::TreeTop *treeTop; for (treeTop = startTree; (treeTop != endTree); treeTop = treeTop->getNextTreeTop()) treeTop->getNode()->initializeFutureUseCounts(visitCount); TR::Block *block = NULL; bool delayedRegStoresBeforeThisPass = _delayedRegStores; // Update visitCount as they are used in this optimization and need to be visitCount = comp()->incOrResetVisitCount(); for (TR::TreeTopIterator iter(startTree, comp()); iter != endTree; ++iter) { TR::Node *node = iter.currentTree()->getNode(); if (node->getOpCodeValue() == TR::BBStart) { block = node->getBlock(); if (!block->isExtensionOfPreviousBlock()) longestPaths.clear(); } int vcountLimit = MAX_VCOUNT - 3; if (comp()->getVisitCount() > vcountLimit) { dumpOptDetails(comp(), "%sVisit count %d exceeds limit %d; stopping\n", optDetailString(), comp()->getVisitCount(), vcountLimit); return 0; } // correct at all intermediate stages // if ((node->getOpCodeValue() != TR::treetop) && (!node->getOpCode().isAnchor() || (node->getFirstChild()->getReferenceCount() != 1)) && (!node->getOpCode().isStoreReg() || (node->getFirstChild()->getReferenceCount() != 1)) && (delayedRegStoresBeforeThisPass || (iter.currentTree() == block->getLastRealTreeTop()) || !node->getOpCode().isStoreReg() || (node->getVisitCount() == visitCount))) { if (node->getOpCode().isAnchor() && node->getFirstChild()->getOpCode().isLoadIndirect()) anchors.push_front(CRAnchor(iter.currentTree(), block)); TR::TransformUtil::recursivelySetNodeVisitCount(node, visitCount); continue; } if (node->getOpCode().isStoreReg()) _delayedRegStores = true; TR::Node *child = node->getFirstChild(); if (child->getOpCodeValue() == TR::PassThrough) { TR::Node *newChild = child->getFirstChild(); node->setAndIncChild(0, newChild); newChild->incFutureUseCount(); if (child->getReferenceCount() <= 1) optimizer()->prepareForNodeRemoval(child); child->recursivelyDecReferenceCount(); recursivelyDecFutureUseCount(child); child = newChild; } bool treeTopCanBeEliminated = false; // If the treetop child has been seen before then it must be anchored // somewhere above already; so we don't need the treetop to be anchoring // this node (as the computation is already done at the first reference to // the node). // if (visitCount == child->getVisitCount()) { treeTopCanBeEliminated = true; } else { TR::ILOpCode &childOpCode = child->getOpCode(); TR::ILOpCodes opCodeValue = childOpCode.getOpCodeValue(); bool seenConditionalBranch = false; bool callWithNoSideEffects = child->getOpCode().isCall() && child->getSymbolReference()->getSymbol()->isResolvedMethod() && child->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->isSideEffectFree(); if (callWithNoSideEffects) { treeTopCanBeEliminated = true; } else if (!((childOpCode.isCall() && !callWithNoSideEffects) || childOpCode.isStore() || ((opCodeValue == TR::New || opCodeValue == TR::anewarray || opCodeValue == TR::newarray) && child->getReferenceCount() > 1) || opCodeValue == TR::multianewarray || opCodeValue == TR::MergeNew || opCodeValue == TR::checkcast || opCodeValue == TR::Prefetch || opCodeValue == TR::iu2l || ((childOpCode.isDiv() || childOpCode.isRem()) && child->getNumChildren() == 3))) { // Perform the rather complex check to see whether its safe // to disconnect the child node from the treetop // bool safeToReplaceNode = false; if (child->getReferenceCount() == 1) { safeToReplaceNode = true; #ifdef J9_PROJECT_SPECIFIC if (child->getOpCode().isPackedExponentiation()) { // pdexp has a possible message side effect in truncating or no significant digits left cases safeToReplaceNode = false; } #endif if (opCodeValue == TR::loadaddr) treeTopCanBeEliminated = true; } else if (!_cannotBeEliminated) { safeToReplaceNode = isSafeToReplaceNode( child, iter.currentTree(), &seenConditionalBranch, visitCount, comp(), &_targetTrees, _cannotBeEliminated, longestPaths); } if (safeToReplaceNode) { if (childOpCode.hasSymbolReference()) { TR::SymbolReference *symRef = child->getSymbolReference(); if (symRef->getSymbol()->isAuto() || symRef->getSymbol()->isParm()) treeTopCanBeEliminated = true; else { if (childOpCode.isLoad() || (opCodeValue == TR::loadaddr) || (opCodeValue == TR::instanceof) || (((opCodeValue == TR::New) || (opCodeValue == TR::anewarray || opCodeValue == TR::newarray)) && ///child->getFirstChild()->isNonNegative())) child->markedAllocationCanBeRemoved())) // opCodeValue == TR::multianewarray || // opCodeValue == TR::MergeNew) treeTopCanBeEliminated = true; } } else treeTopCanBeEliminated = true; } } // Fix for the case when a float to non-float conversion node swings // down past a branch on IA32; this would cause a FP value to be commoned // across a branch where there was none originally; this causes pblms // as a value is left on the stack. // if (treeTopCanBeEliminated && seenConditionalBranch) { if (!cg()->getSupportsJavaFloatSemantics()) { if (child->getOpCode().isConversion() || child->getOpCode().isBooleanCompare()) { if (child->getFirstChild()->getOpCode().isFloatingPoint() && !child->getOpCode().isFloatingPoint()) treeTopCanBeEliminated = false; } } } if (treeTopCanBeEliminated) { TR::NodeChecklist visited(comp()); bool containsFloatingPoint = false; for (int32_t i = 0; i < child->getNumChildren(); ++i) { // Anchor nodes with reference count > 1 // bool highGlobalIndex = false; if (fixUpTree(child->getChild(i), iter.currentTree(), visited, highGlobalIndex, self(), visitCount)) containsFloatingPoint = true; if (highGlobalIndex) { dumpOptDetails(comp(), "%sGlobal index limit exceeded; stopping\n", optDetailString()); return 0; } } if (seenConditionalBranch && containsFloatingPoint) { if (!cg()->getSupportsJavaFloatSemantics()) treeTopCanBeEliminated = false; } } } // Update visitCount as they are used in this optimization and need to be // correct at all intermediate stages // if (!treeTopCanBeEliminated) TR::TransformUtil::recursivelySetNodeVisitCount(node, visitCount); if (treeTopCanBeEliminated) { TR::TreeTop *prevTree = iter.currentTree()->getPrevTreeTop(); TR::TreeTop *nextTree = iter.currentTree()->getNextTreeTop(); if (!node->getOpCode().isStoreReg() || (node->getFirstChild()->getReferenceCount() == 1)) { // Actually going to remove the treetop now // if (performTransformation(comp(), "%sRemove tree : [" POINTER_PRINTF_FORMAT "] ([" POINTER_PRINTF_FORMAT "] = %s)\n", optDetailString(), node, node->getFirstChild(), node->getFirstChild()->getOpCode().getName())) { prevTree->join(nextTree); optimizer()->prepareForNodeRemoval(node); ///child->recursivelyDecReferenceCount(); node->recursivelyDecReferenceCount(); recursivelyDecFutureUseCount(child); iter.jumpTo(prevTree); if (child->getReferenceCount() == 1) requestOpt(OMR::treeSimplification, true, block); if (nextTree->getNode()->getOpCodeValue() == TR::Goto && prevTree->getNode()->getOpCodeValue() == TR::BBStart && !prevTree->getNode()->getBlock()->isExtensionOfPreviousBlock()) { requestOpt( OMR::redundantGotoElimination, prevTree->getNode()->getBlock()); } } } else { if (performTransformation(comp(), "%sMove tree : [" POINTER_PRINTF_FORMAT "]([" POINTER_PRINTF_FORMAT "] = %s) to end of block\n", optDetailString(), node, node->getFirstChild(), node->getFirstChild()->getOpCode().getName())) { prevTree->join(nextTree); node->setVisitCount(visitCount); TR::TreeTop *lastTree = findLastTreetop(block, prevTree); TR::TreeTop *prevLastTree = lastTree->getPrevTreeTop(); TR::TreeTop *cursorTreeTop = nextTree; while (cursorTreeTop != lastTree) { if (cursorTreeTop->getNode()->getOpCode().isStoreReg() && (cursorTreeTop->getNode()->getGlobalRegisterNumber() == iter.currentTree()->getNode()->getGlobalRegisterNumber())) { lastTree = cursorTreeTop; prevLastTree = lastTree->getPrevTreeTop(); break; } cursorTreeTop = cursorTreeTop->getNextTreeTop(); } if (lastTree->getNode()->getOpCodeValue() == TR::BBStart) { prevLastTree = lastTree; lastTree = block->getExit(); } TR::Node *lastNode = lastTree->getNode(); TR::Node *prevLastNode = prevLastTree->getNode(); if (lastNode->getOpCode().isIf() && !lastNode->getOpCode().isCompBranchOnly() && prevLastNode->getOpCode().isStoreReg() && ((prevLastNode->getFirstChild() == lastNode->getFirstChild()) || (prevLastNode->getFirstChild() == lastNode->getSecondChild()))) { lastTree = prevLastTree; prevLastTree = lastTree->getPrevTreeTop(); } prevLastTree->join(iter.currentTree()); iter.currentTree()->join(lastTree); iter.jumpTo(prevTree); requestOpt(OMR::treeSimplification, true, block); } } } } for (auto it = anchors.begin(); it != anchors.end(); ++it) { TR::Node *anchor = it->tree->getNode(); TR::Node *load = anchor->getChild(0); if (load->getReferenceCount() > 1) continue; // We can eliminate the indirect load immediately, but for the moment the // subtree providing the base object has to be anchored. TR::Node *heapBase = anchor->getChild(1); TR::Node::recreate(anchor, TR::treetop); anchor->setAndIncChild(0, load->getChild(0)); anchor->setChild(1, NULL); anchor->setNumChildren(1); if (!heapBase->getOpCode().isLoadConst()) { it->tree->insertAfter( TR::TreeTop::create( comp(), TR::Node::create(heapBase, TR::treetop, 1, heapBase))); } load->recursivelyDecReferenceCount(); heapBase->recursivelyDecReferenceCount(); // A later pass of dead trees can likely move (or even remove) the base // object expression. requestOpt(OMR::deadTreesElimination, true, it->block); } return 1; // actual cost }
// Returns true if there is any constraint to the move bool TR_LocalLiveRangeReduction::isAnySymInDefinedOrUsedBy(TR_TreeRefInfo *currentTreeRefInfo, TR::Node *currentNode, TR_TreeRefInfo *movingTreeRefInfo ) { TR::Node *movingNode = movingTreeRefInfo->getTreeTop()->getNode(); // ignore anchors // if (movingNode->getOpCode().isAnchor()) movingNode = movingNode->getFirstChild(); TR::ILOpCode &opCode = currentNode->getOpCode(); ////if ((opCode.getOpCodeValue() == TR::monent) || (opCode.getOpCodeValue() == TR::monexit)) if (nodeMaybeMonitor(currentNode)) { if (trace()) traceMsg(comp(),"cannot move %p beyond monitor %p\n",movingNode,currentNode); return true; } // Don't move gc points or things across gc points // if (movingNode->canGCandReturn() || currentNode->canGCandReturn()) { if (trace()) traceMsg(comp(), "cannot move gc points %p past %p\n", movingNode, currentNode); return true; } // Don't move checks or calls at all // if (containsCallOrCheck(movingTreeRefInfo,movingNode)) { if (trace()) traceMsg(comp(),"cannot move check or call %s\n", getDebug()->getName(movingNode)); return true; } // Don't move object header store past a GC point // if ((currentNode->getOpCode().isWrtBar() || currentNode->canCauseGC()) && mayBeObjectHeaderStore(movingNode, fe())) { if (trace()) traceMsg(comp(),"cannot move possible object header store %s past GC point %s\n", getDebug()->getName(movingNode), getDebug()->getName(currentNode)); return true; } if (TR::Compiler->target.cpu.isPower() && opCode.getOpCodeValue() == TR::allocationFence) { // Can't move allocations past flushes if (movingNode->getOpCodeValue() == TR::treetop && movingNode->getFirstChild()->getOpCode().isNew() && (currentNode->getAllocation() == NULL || currentNode->getAllocation() == movingNode->getFirstChild())) { if (trace()) { traceMsg(comp(),"cannot move %p beyond flush %p - ", movingNode, currentNode); if (currentNode->getAllocation() == NULL) traceMsg(comp(),"(flush with null allocation)\n"); else traceMsg(comp(),"(flush for allocation %p)\n", currentNode->getAllocation()); } return true; } // Can't move certain stores past flushes // Exclude all indirect stores, they may be for stack allocs, in which case the flush is needed at least as a scheduling barrier // Direct stores to autos and parms are the only safe candidates if (movingNode->getOpCode().isStoreIndirect() || (movingNode->getOpCode().isStoreDirect() && !movingNode->getSymbol()->isParm() && !movingNode->getSymbol()->isAuto())) { if (trace()) traceMsg(comp(),"cannot move %p beyond flush %p - (flush for possible stack alloc)", movingNode, currentNode); return true; } } for (int32_t i = 0; i < currentNode->getNumChildren(); i++) { TR::Node *child = currentNode->getChild(i); //Any node that has side effects (like call and newarrya) cannot be evaluated in the middle of the tree. if (movingTreeRefInfo->getFirstRefNodesList()->find(child)) { //for calls and unresolve symbol that are not under check if (child->exceptionsRaised() || (child->getOpCode().hasSymbolReference() && child->getSymbolReference()->isUnresolved())) { if (trace()) traceMsg(comp(),"cannot move %p beyond %p - cannot change evaluation point of %p\n ",movingNode,currentTreeRefInfo->getTreeTop()->getNode(),child); return true; } else if(movingNode->getOpCode().isStore()) { TR::SymbolReference *stSymRef = movingNode->getSymbolReference(); int32_t stSymRefNum = stSymRef->getReferenceNumber(); //TR::SymbolReference *stSymRef = movingNode->getSymbolReference(); int32_t numHelperSymbols = comp()->getSymRefTab()->getNumHelperSymbols(); if ((comp()->getSymRefTab()->isNonHelper(stSymRefNum, TR::SymbolReferenceTable::vftSymbol))|| (comp()->getSymRefTab()->isNonHelper(stSymRefNum, TR::SymbolReferenceTable::contiguousArraySizeSymbol))|| (comp()->getSymRefTab()->isNonHelper(stSymRefNum, TR::SymbolReferenceTable::discontiguousArraySizeSymbol))|| (stSymRef == comp()->getSymRefTab()->findHeaderFlagsSymbolRef())|| (stSymRef->getSymbol() == comp()->getSymRefTab()->findGenericIntShadowSymbol())) return true; } else if (movingNode->getOpCode().isResolveOrNullCheck()) { if (trace()) traceMsg(comp(),"cannot move %p beyond %p - node %p under ResolveOrNullCheck",movingNode,currentTreeRefInfo->getTreeTop()->getNode(),currentNode); return true; } else if (TR::Compiler->target.is64Bit() && movingNode->getOpCode().isBndCheck() && ((opCode.getOpCodeValue() == TR::i2l) || (opCode.getOpCodeValue() == TR::iu2l)) && !child->isNonNegative()) { if (trace()) traceMsg(comp(),"cannot move %p beyond %p - changing the eval point of %p will casue extra cg instruction ",movingNode,currentTreeRefInfo->getTreeTop()->getNode(),currentNode); return true; } } //don't recurse over nodes each are not the first reference if (child->getReferenceCount()==1 || currentTreeRefInfo->getFirstRefNodesList()->find(child)) { if (isAnySymInDefinedOrUsedBy(currentTreeRefInfo, child, movingTreeRefInfo )) return true; } } return false; }
void TR_LocalLiveRangeReduction::populatePotentialDeps(TR_TreeRefInfo *treeRefInfo,TR::Node *node) { TR::ILOpCode &opCode = node->getOpCode(); if (node->getOpCode().hasSymbolReference()) { TR::SymbolReference *symRef = node->getSymbolReference(); int32_t symRefNum = symRef->getReferenceNumber(); //set defSym - all symbols that might be written if (opCode.isCall() || opCode.isResolveCheck()|| opCode.isStore() || node->mightHaveVolatileSymbolReference()) { bool isCallDirect = false; if (node->getOpCode().isCallDirect()) isCallDirect = true; if (!symRef->getUseDefAliases(isCallDirect).isZero(comp())) { TR::SparseBitVector useDefAliases(comp()->allocator()); symRef->getUseDefAliases(isCallDirect).getAliases(useDefAliases); TR::SparseBitVector::Cursor aliasCursor(useDefAliases); for (aliasCursor.SetToFirstOne(); aliasCursor.Valid(); aliasCursor.SetToNextOne()) { int32_t nextAlias = aliasCursor; treeRefInfo->getDefSym()->set(nextAlias); } } if (opCode.isStore()) treeRefInfo->getDefSym()->set(symRefNum); } //set useSym - all symbols that are used if (opCode.canRaiseException()) { TR::SparseBitVector useAliases(comp()->allocator()); symRef->getUseonlyAliases().getAliases(useAliases); { TR::SparseBitVector::Cursor aliasesCursor(useAliases); for (aliasesCursor.SetToFirstOne(); aliasesCursor.Valid(); aliasesCursor.SetToNextOne()) { int32_t nextAlias = aliasesCursor; treeRefInfo->getUseSym()->set(nextAlias); } } } if (opCode.isLoadVar() || (opCode.getOpCodeValue() == TR::loadaddr)) { treeRefInfo->getUseSym()->set(symRefNum); } } for (int32_t i = 0; i < node->getNumChildren(); i++) { TR::Node *child = node->getChild(i); //don't recurse over references (nodes which are not the first reference) // if (child->getReferenceCount()==1 || treeRefInfo->getFirstRefNodesList()->find(child)) populatePotentialDeps(treeRefInfo,child ); } return; }
// Build arguments for system linkage dispatch. // int32_t TR::AMD64SystemLinkage::buildArgs( TR::Node *callNode, TR::RegisterDependencyConditions *deps) { TR::SymbolReference *methodSymRef = callNode->getSymbolReference(); TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol(); TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg; TR::RealRegister *espReal = machine()->getX86RealRegister(TR::RealRegister::esp); int32_t firstNodeArgument = callNode->getFirstArgumentIndex(); int32_t lastNodeArgument = callNode->getNumChildren() - 1; int32_t offset = 0; int32_t sizeOfOutGoingArgs= 0; uint16_t numIntArgs = 0, numFloatArgs = 0; int32_t first, last, direction; int32_t numCopiedRegs = 0; TR::Register *copiedRegs[TR::X86LinkageProperties::MaxArgumentRegisters]; if (getProperties().passArgsRightToLeft()) { first = lastNodeArgument; last = firstNodeArgument - 1; direction = -1; } else { first = firstNodeArgument; last = lastNodeArgument + 1; direction = 1; } // If the dispatch is indirect we must add the VFT register to the preconditions // so that it gets register assigned with the other preconditions to the call. // if (callNode->getOpCode().isIndirect()) { TR::Node *vftChild = callNode->getFirstChild(); TR_ASSERT(vftChild->getRegister(), "expecting VFT child to be evaluated"); TR::RealRegister::RegNum scratchRegIndex = getProperties().getIntegerScratchRegister(1); deps->addPreCondition(vftChild->getRegister(), scratchRegIndex, cg()); } int32_t i; for (i = first; i != last; i += direction) { TR::parmLayoutResult layoutResult; TR::RealRegister::RegNum rregIndex = noReg; TR::Node *child = callNode->getChild(i); layoutParm(child, sizeOfOutGoingArgs, numIntArgs, numFloatArgs, layoutResult); if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG_PAIR) { // TODO: AMD64 SysV ABI might put a struct into a pair of linkage registerr TR_ASSERT(false, "haven't support linkage_reg_pair yet.\n"); } else if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG) { TR_RegisterKinds regKind = layoutResult.regs[0].regKind; uint32_t regIndex = layoutResult.regs[0].regIndex; TR_ASSERT(regKind == TR_GPR || regKind == TR_FPR, "linkage registers includes TR_GPR and TR_FPR\n"); rregIndex = (regKind == TR_FPR) ? getProperties().getFloatArgumentRegister(regIndex): getProperties().getIntegerArgumentRegister(regIndex); } else { offset = layoutResult.offset; } TR::Register *vreg; vreg = cg()->evaluate(child); bool needsStackOffsetUpdate = false; if (rregIndex != noReg) { // For NULL JNI reference parameters, it is possible that the NULL value will be evaluated into // a different register than the child. In that case it is not necessary to copy the temporary scratch // register across the call. // if ((child->getReferenceCount() > 1) && (vreg == child->getRegister())) { TR::Register *argReg = cg()->allocateRegister(); if (vreg->containsCollectedReference()) argReg->setContainsCollectedReference(); generateRegRegInstruction(TR::Linkage::movOpcodes(RegReg, movType(child->getDataType())), child, argReg, vreg, cg()); vreg = argReg; copiedRegs[numCopiedRegs++] = vreg; } deps->addPreCondition(vreg, rregIndex, cg()); } else { // Ideally, we would like to push rather than move generateMemRegInstruction(TR::Linkage::movOpcodes(MemReg, fullRegisterMovType(vreg)), child, generateX86MemoryReference(espReal, offset, cg()), vreg, cg()); } cg()->decReferenceCount(child); } // Now that we're finished making the preconditions, all the interferences // are established and we can kill these regs. // for (i = 0; i < numCopiedRegs; i++) cg()->stopUsingRegister(copiedRegs[i]); deps->stopAddingPreConditions(); return sizeOfOutGoingArgs; }
TR::Register * TR::AMD64SystemLinkage::buildVolatileAndReturnDependencies( TR::Node *callNode, TR::RegisterDependencyConditions *deps) { if (callNode->getOpCode().isIndirect()) { TR::Node *vftChild = callNode->getFirstChild(); if (vftChild->getRegister() && (vftChild->getReferenceCount() > 1)) { } else { // VFT child dies here; decrement it early so it doesn't interfere with dummy regs. cg()->recursivelyDecReferenceCount(vftChild); } } TR_ASSERT(deps != NULL, "expected register dependencies"); // Figure out which is the return register. // TR::RealRegister::RegNum returnRegIndex; TR_RegisterKinds returnKind; switch (callNode->getDataType()) { case TR::NoType: returnRegIndex = TR::RealRegister::NoReg; returnKind = TR_NoRegister; break; case TR::Int8: case TR::Int16: case TR::Int32: case TR::Int64: case TR::Address: returnRegIndex = getProperties().getIntegerReturnRegister(); returnKind = TR_GPR; break; case TR::Float: case TR::Double: returnRegIndex = getProperties().getFloatReturnRegister(); returnKind = TR_FPR; break; case TR::Aggregate: default: TR_ASSERT(false, "Unrecognized call node data type: #%d", (int)callNode->getDataType()); break; } // Kill all non-preserved int and float regs besides the return register. // int32_t i; TR::RealRegister::RegNum scratchIndex = getProperties().getIntegerScratchRegister(1); for (i=0; i<getProperties().getNumVolatileRegisters(); i++) { TR::RealRegister::RegNum regIndex = getProperties()._volatileRegisters[i]; if (regIndex != returnRegIndex) { TR_RegisterKinds rk = (i < getProperties()._numberOfVolatileGPRegisters) ? TR_GPR : TR_FPR; TR::Register *dummy = cg()->allocateRegister(rk); deps->addPostCondition(dummy, regIndex, cg()); // Note that we don't setPlaceholderReg here. If this volatile reg is also volatile // in the caller's linkage, then that flag doesn't matter much anyway. If it's preserved // in the caller's linkage, then we don't want to set that flag because we want this // use of the register to count as a "real" use, thereby motivating the prologue to // preserve the register. // A scratch register is necessary to call the native without a trampoline. // if (callNode->getOpCode().isIndirect() || (regIndex != scratchIndex)) cg()->stopUsingRegister(dummy); } } #if defined (PYTHON) && 0 // Evict the preserved registers across the call // for (i=0; i<getProperties().getNumberOfPreservedGPRegisters(); i++) { TR::RealRegister::RegNum regIndex = getProperties()._preservedRegisters[i]; TR::Register *dummy = cg()->allocateRegister(TR_GPR); deps->addPostCondition(dummy, regIndex, cg()); // Note that we don't setPlaceholderReg here. If this volatile reg is also volatile // in the caller's linkage, then that flag doesn't matter much anyway. If it's preserved // in the caller's linkage, then we don't want to set that flag because we want this // use of the register to count as a "real" use, thereby motivating the prologue to // preserve the register. // A scratch register is necessary to call the native without a trampoline. // if (callNode->getOpCode().isIndirect() || (regIndex != scratchIndex)) cg()->stopUsingRegister(dummy); } #endif if (callNode->getOpCode().isIndirect()) { TR::Node *vftChild = callNode->getFirstChild(); if (vftChild->getRegister() && (vftChild->getReferenceCount() > 1)) { // VFT child survives the call, so we must include it in the postconditions. deps->addPostCondition(vftChild->getRegister(), TR::RealRegister::NoReg, cg()); cg()->recursivelyDecReferenceCount(vftChild); } } // Now that everything is dead, we can allocate the return register without // interference // TR::Register *returnRegister; if (returnRegIndex) { TR_ASSERT(returnKind != TR_NoRegister, "assertion failure"); if (callNode->getDataType() == TR::Address) returnRegister = cg()->allocateCollectedReferenceRegister(); else { returnRegister = cg()->allocateRegister(returnKind); if (callNode->getDataType() == TR::Float) returnRegister->setIsSinglePrecision(); } deps->addPostCondition(returnRegister, returnRegIndex, cg()); } else returnRegister = NULL; // The reg dependency is left open intentionally, and need to be closed by // the caller. The reason is because, child class might call this method, while // adding more register dependecies; if we close the reg dependency here, // the child class won't be able to add more register dependencies. return returnRegister; }
void TR_ExpressionsSimplification::setSummationReductionCandidates(TR::Node *node, TR::TreeTop *tt) { // Must be a store node // if (node->getOpCodeValue() != TR::istore /* || node->getOpCodeValue() != TR::astore */) { if (trace()) traceMsg(comp(), "Node %p: The opcode is not istore so not a summation reduction candidate\n",node); return; } TR::Node *opNode = node->getFirstChild(); if (opNode->getOpCodeValue() == TR::iadd || opNode->getOpCodeValue() == TR::isub) { TR::Node *firstNode = opNode->getFirstChild(); TR::Node *secondNode = opNode->getSecondChild(); if (firstNode->getOpCode().hasSymbolReference() && node->getSymbolReference() == firstNode->getSymbolReference() && opNode->getReferenceCount() == 1 && firstNode->getReferenceCount() == 1) { // The second node must be loop invariant // if (!_currentRegion->isExprInvariant(secondNode)) { if (trace()) { traceMsg(comp(), "The node %p is not loop invariant\n",secondNode); // This can be the arithmetic series case // only when the node is an induction variable if (secondNode->getNumChildren() == 1 && secondNode->getOpCode().hasSymbolReference()) { TR_InductionVariable *indVar = _currentRegion->findMatchingIV(secondNode->getSymbolReference()); if (indVar) { //printf("Found Candidate of arithmetic series\n" ); } } } return; } _candidateTTs->add(tt); } else if (secondNode->getOpCode().hasSymbolReference() && node->getSymbolReference() == secondNode->getSymbolReference() && opNode->getReferenceCount() == 1 && secondNode->getReferenceCount() == 1 && _currentRegion->isExprInvariant(firstNode)) { _candidateTTs->add(tt); } } else if (opNode->getOpCodeValue() == TR::ixor || opNode->getOpCodeValue() == TR::ineg) { if (opNode->getFirstChild()->getOpCode().hasSymbolReference() && node->getSymbolReference() == opNode->getFirstChild()->getSymbolReference() && opNode->getReferenceCount() == 1 && opNode->getFirstChild()->getReferenceCount() == 1 && (opNode->getOpCodeValue() == TR::ineg || _currentRegion->isExprInvariant(opNode->getSecondChild()))) _candidateTTs->add(tt); else if (opNode->getOpCodeValue() == TR::ixor && opNode->getSecondChild()->getOpCode().hasSymbolReference() && node->getSymbolReference() == opNode->getSecondChild()->getSymbolReference() && opNode->getReferenceCount() == 1 && opNode->getSecondChild()->getReferenceCount() == 1 && _currentRegion->isExprInvariant(opNode->getFirstChild())) _candidateTTs->add(tt); } }
TR_ExpressionsSimplification::LoopInfo* TR_ExpressionsSimplification::findLoopInfo(TR_RegionStructure* region) { ListIterator<TR::CFGEdge> exitEdges(®ion->getExitEdges()); if (region->getExitEdges().getSize() != 1) { if (trace()) traceMsg(comp(), "Region with more than 1 exit edges can't be handled\n"); return 0; } TR_StructureSubGraphNode* exitNode = toStructureSubGraphNode(exitEdges.getFirst()->getFrom()); if (!exitNode->getStructure()->asBlock()) { if (trace()) traceMsg(comp(), "The exit block can't be found\n"); return 0; } TR::Block *exitBlock = exitNode->getStructure()->asBlock()->getBlock(); TR::Node *lastTreeInExitBlock = exitBlock->getLastRealTreeTop()->getNode(); if (trace()) { traceMsg(comp(), "The exit block is %d\n", exitBlock->getNumber()); traceMsg(comp(), "The branch node is %p\n", lastTreeInExitBlock); } if (!lastTreeInExitBlock->getOpCode().isBranch()) { if (trace()) traceMsg(comp(), "The branch node couldn't be found\n"); return 0; } if (lastTreeInExitBlock->getNumChildren() < 2) { if (trace()) traceMsg(comp(), "The branch node has less than 2 children\n"); return 0; } TR::Node *firstChildOfLastTree = lastTreeInExitBlock->getFirstChild(); TR::Node *secondChildOfLastTree = lastTreeInExitBlock->getSecondChild(); if (!firstChildOfLastTree->getOpCode().hasSymbolReference()) { if (trace()) traceMsg(comp(), "The branch node's first child node %p - its opcode does not have a symbol reference\n", firstChildOfLastTree); return 0; } TR::SymbolReference *firstChildSymRef = firstChildOfLastTree->getSymbolReference(); if (trace()) traceMsg(comp(), "Symbol Reference: %p Symbol: %p\n", firstChildSymRef, firstChildSymRef->getSymbol()); // Locate the induction variable that matches with the exit node symbol // TR_InductionVariable *indVar = region->findMatchingIV(firstChildSymRef); if (!indVar) return 0; if (!indVar->getIncr()->asIntConst()) { if (trace()) traceMsg(comp(), "Increment is not a constant\n"); return 0; } int32_t increment = indVar->getIncr()->getLowInt(); _visitCount = comp()->incVisitCount(); bool indVarWrittenAndUsedUnexpectedly = false; if (firstChildOfLastTree->getReferenceCount() > 1) { TR::TreeTop *cursorTreeTopInExitBlock = exitBlock->getEntry(); TR::TreeTop *exitTreeTopInExitBlock = exitBlock->getExit(); bool loadSeen = false; while (cursorTreeTopInExitBlock != exitTreeTopInExitBlock) { TR::Node *cursorNode = cursorTreeTopInExitBlock->getNode(); if (checkForLoad(cursorNode, firstChildOfLastTree)) loadSeen = true; if (!cursorNode->getOpCode().isStore() && (cursorNode->getNumChildren() > 0)) cursorNode = cursorNode->getFirstChild(); if (cursorNode->getOpCode().isStore() && (cursorNode->getSymbolReference() == firstChildSymRef)) { indVarWrittenAndUsedUnexpectedly = true; if ((cursorNode->getFirstChild() == firstChildOfLastTree) || !loadSeen) indVarWrittenAndUsedUnexpectedly = false; else break; } cursorTreeTopInExitBlock = cursorTreeTopInExitBlock->getNextTreeTop(); } } if (indVarWrittenAndUsedUnexpectedly) { return 0; } int32_t lowerBound; int32_t upperBound = 0; TR::Node *bound = 0; bool equals = false; switch(lastTreeInExitBlock->getOpCodeValue()) { case TR::ificmplt: case TR::ificmpgt: equals = true; case TR::ificmple: case TR::ificmpge: if (!(indVar->getEntry() && indVar->getEntry()->asIntConst())) { if (trace()) traceMsg(comp(), "Entry value is not a constant\n"); return 0; } lowerBound = indVar->getEntry()->getLowInt(); if (secondChildOfLastTree->getOpCode().isLoadConst()) { upperBound = secondChildOfLastTree->getInt(); } else if (secondChildOfLastTree->getOpCode().isLoadVar()) { bound = secondChildOfLastTree; } else { if (trace()) traceMsg(comp(), "Second child is not a const or a load\n"); return 0; } return new (trStackMemory()) LoopInfo(bound, lowerBound, upperBound, increment, equals); default: if (trace()) traceMsg(comp(), "The condition has not been implemeted\n"); return 0; } return 0; }