void TR_LoadExtensions::flagPreferredLoadExtensions(TR::Node* parent) { if (isSupportedType(parent) && parent->getOpCode().isConversion()) { TR::Node* child = parent->getFirstChild(); bool canSkipConversion = false; if (isSupportedType(child)) { if (parent->getSize() == child->getSize()) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "codegen/LoadExtensions/success/unneededConversion/%s", comp()->signature())); parent->setUnneededConversion(true); } else { TR::ILOpCode& childOpCode = child->getOpCode(); if (childOpCode.isLoadReg() && !(parent->getSize() > 4 && TR::Compiler->target.is32Bit()) && excludedNodes->count(parent) == 0) { TR::Node* useRegLoad = child; TR_UseDefInfo* useDefInfo = optimizer()->getUseDefInfo(); if (useDefInfo != NULL && useDefInfo->infoIsValid() && useRegLoad->getUseDefIndex() != 0 && useDefInfo->isUseIndex(useRegLoad->getUseDefIndex() != 0)) { TR_UseDefInfo::BitVector info(comp()->allocator()); if (useDefInfo->getUseDef(info, useRegLoad->getUseDefIndex())) { TR_UseDefInfo::BitVector::Cursor cursor(info); int32_t firstDefIndex = useDefInfo->getFirstRealDefIndex(); int32_t firstUseIndex = useDefInfo->getFirstUseIndex(); canSkipConversion = true; bool forceExtensionOnAnyLoads = false; bool forceExtensionOnAllLoads = true; for (cursor.SetToFirstOne(); cursor.Valid() && canSkipConversion; cursor.SetToNextOne()) { int32_t defIndex = cursor; // We've examined all the defs of this particular use if (defIndex >= firstUseIndex) { break; } // Do not consider defs that correspond to method arguments as we cannot force extension on those if (defIndex < firstDefIndex) { continue; } TR::Node* defRegLoad = useDefInfo->getNode(defIndex); if (defRegLoad != NULL) { TR::Node* defRegLoadChild = defRegLoad->getFirstChild(); bool forceExtension = false; canSkipConversion = TR_LoadExtensions::canSkipConversion(parent, defRegLoadChild, forceExtension); forceExtensionOnAnyLoads |= forceExtension; forceExtensionOnAllLoads &= forceExtension; // If we have to force extension on any loads which feed a def of this use ensure we must also // force extension on all such loads. Conversely the conversion can be skipped if none of the // loads feeding the def of this use need to be extended. This ensures either all loads feeding // into defs of this use should be extended or none of them. canSkipConversion &= forceExtensionOnAllLoads == forceExtensionOnAnyLoads; if (trace()) { traceMsg(comp(), "\t\tPeeked through %s [%p] and found %s [%p] with child %s [%p] - conversion %s be skipped\n", useRegLoad->getOpCode().getName(), useRegLoad, defRegLoad->getOpCode().getName(), defRegLoad, defRegLoadChild->getOpCode().getName(), defRegLoadChild, canSkipConversion ? "can" : "cannot"); } } } if (canSkipConversion && performTransformation(comp(), "%sSkipping conversion %s [%p] after RegLoad\n", optDetailString(), parent->getOpCode().getName(), parent)) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "codegen/LoadExtensions/success/unneededConversion/GRA/%s", comp()->signature())); parent->setUnneededConversion(true); if (forceExtensionOnAllLoads) { TR_UseDefInfo::BitVector info(comp()->allocator()); if (useDefInfo->getUseDef(info, useRegLoad->getUseDefIndex())) { TR_UseDefInfo::BitVector::Cursor cursor(info); for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne()) { int32_t defIndex = cursor; // We've examined all the defs of this particular use if (defIndex >= firstUseIndex) { break; } // Do not consider defs that correspond to method arguments as we cannot force extension on those if (defIndex < firstDefIndex) { continue; } TR::Node *defRegLoad = useDefInfo->getNode(defIndex); if (defRegLoad != NULL) { TR::Node* defRegLoadChild = defRegLoad->getFirstChild(); const int32_t preference = getExtensionPreference(defRegLoadChild); if (preference > 0) { if (trace()) { traceMsg(comp(), "\t\t\tForcing sign extension on %s [%p]\n", defRegLoadChild->getOpCode().getName(), defRegLoadChild); } if (parent->getSize() == 8 || parent->useSignExtensionMode()) { defRegLoadChild->setSignExtendTo64BitAtSource(true); } else { defRegLoadChild->setSignExtendTo32BitAtSource(true); } } if (preference < 0) { if (trace()) { traceMsg(comp(), "\t\t\tForcing zero extension on %s [%p]\n", defRegLoadChild->getOpCode().getName(), defRegLoadChild); } if (parent->getSize() == 8 || parent->useSignExtensionMode()) { defRegLoadChild->setZeroExtendTo64BitAtSource(true); } else { defRegLoadChild->setZeroExtendTo32BitAtSource(true); } } } } } } if (parent->getType().isInt64() && parent->getSize() > child->getSize()) { if (trace()) { traceMsg(comp(), "\t\t\tSet global register %s in getExtendedToInt64GlobalRegisters for child %s [%p] with parent node %s [%p]\n", comp()->getDebug()->getGlobalRegisterName(child->getGlobalRegisterNumber()), child->getOpCode().getName(), child, parent->getOpCode().getName(), parent); } // getExtendedToInt64GlobalRegisters is used by the evaluators to force a larger virtual register to be used when // evaluating the regload so any instructions generated by local RA are the correct size to preserve the upper bits cg()->getExtendedToInt64GlobalRegisters()[child->getGlobalRegisterNumber()] = true; } } } } } } } if (!canSkipConversion) { bool forceExtension = false; canSkipConversion = TR_LoadExtensions::canSkipConversion(parent, child, forceExtension); if (canSkipConversion && performTransformation(comp(), "%sSkipping conversion %s [%p]\n", optDetailString(), parent->getOpCode().getName(), parent)) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "codegen/LoadExtensions/success/unneededConversion/%s", comp()->signature())); parent->setUnneededConversion(true); if (forceExtension) { const int32_t preference = getExtensionPreference(child); if (preference > 0) { if (trace()) { traceMsg(comp(), "\t\t\tForcing sign extension on %s [%p]\n", child->getOpCode().getName(), child); } if (parent->getSize() == 8 || parent->useSignExtensionMode()) { child->setSignExtendTo64BitAtSource(true); } else { child->setSignExtendTo32BitAtSource(true); } } if (preference < 0) { if (trace()) { traceMsg(comp(), "\t\t\tForcing zero extension on %s [%p]\n", child->getOpCode().getName(), child); } if (parent->getSize() == 8 || parent->useSignExtensionMode()) { child->setZeroExtendTo64BitAtSource(true); } else { child->setZeroExtendTo32BitAtSource(true); } } } } } } }
const bool TR_LoadExtensions::canSkipConversion(TR::Node* conversion, TR::Node* child, bool& forceExtension) { bool result = false; // Assume we are not forcing the load (if the child is really a load) to be zero/sign extended forceExtension = false; if (trace()) { traceMsg(comp(), "\t\tExamining conversion %s [%p]\n", conversion->getOpCode().getName(), conversion); } if (isSupportedType(child) && excludedNodes->count(child) == 0) { const int32_t preference = getExtensionPreference(child); const bool loadPrefersSignExtension = preference > 0; const bool loadPrefersZeroExtension = preference < 0; TR::ILOpCode& conversionOpCode = conversion->getOpCode(); if (isSupportedLoad(child) && // Only consider widening conversions conversion->getSize() > child->getSize() && // Ensure we do not use register pairs for 64-bit loads on 32-bit platforms (TR::Compiler->target.is64Bit() || comp()->cg()->use64BitRegsOn32Bit() || conversion->getSize() != 8) && // Ensure the conversion matches our preferred extension on the load (loadPrefersSignExtension && loadPrefersSignExtension == conversionOpCode.isSignExtension() || loadPrefersZeroExtension && loadPrefersZeroExtension == conversion->isZeroExtension())) { if (trace()) { traceMsg(comp(), "\t\tDetected sign extension pattern on widening conversion %s [%p] and load %s [%p]\n", conversion->getOpCode().getName(), conversion, child->getOpCode().getName(), child); } forceExtension = true; result = true; } if (conversion->getSize() < child->getSize()) { // TODO (Issue #2213): Determine whether this case is ever needed and why? Shouldn't the simplifier have eliminated such IL? if (child->getOpCode().isConversion()) { TR::Node* grandChild = child->getFirstChild(); if (isSupportedLoad(grandChild) && // Conversion is narrowing down to the original width (i.e. stacked conversion which is a NOP) conversion->getSize() == grandChild->getSize()) { if (trace()) { traceMsg(comp(), "\t\tDetected sign extension pattern on narrowing conversion %s [%p] and load %s [%p]\n", conversion->getOpCode().getName(), conversion, child->getOpCode().getName(), child); } result = true; } } } } return result; }
void TR_LoadExtensions::findPreferredLoadExtensions(TR::Node* parent) { TR::ILOpCode& parentOpCode = parent->getOpCode(); // count how a load is being used. As a signed or unsigned number? if (isSupportedType(parent) && parentOpCode.isConversion()) { TR::Node* child = parent->getFirstChild(); // Only examine non-trivial conversions if (isSupportedType(child) && parent->getSize() != child->getSize()) { if (isSupportedLoad(child)) { setExtensionPreference(child, parent); } else if (child->getOpCode().isLoadReg()) { TR::Node* useRegLoad = child; TR_UseDefInfo* useDefInfo = optimizer()->getUseDefInfo(); // If we have usedef info we can traverse all defs of this particular use and if all the defs are stores // of supported counted loads then we can count such loads as well. If this criteria is not met then there // exists at least one def (store) of this particular use which feeds from a non-load operation (an // addition for example). These are not candidates for skipping extension because we cannot easily extend // a non-load operation. if (useDefInfo != NULL && useDefInfo->infoIsValid() && useRegLoad->getUseDefIndex() != 0 && useDefInfo->isUseIndex(useRegLoad->getUseDefIndex() != 0)) { TR_UseDefInfo::BitVector info(comp()->allocator()); if (useDefInfo->getUseDef(info, useRegLoad->getUseDefIndex())) { if (trace()) { traceMsg(comp(), "\t\tPeeking through RegLoad %p for conversion %s [%p]\n", useRegLoad, parentOpCode.getName(), parent); } TR_UseDefInfo::BitVector::Cursor cursor(info); int32_t firstDefIndex = useDefInfo->getFirstRealDefIndex(); int32_t firstUseIndex = useDefInfo->getFirstUseIndex(); for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne()) { int32_t defIndex = cursor; // We've examined all the defs of this particular use if (defIndex >= firstUseIndex) { break; } // Do not consider defs that correspond to method arguments as we cannot force extension on those if (defIndex < firstDefIndex) { (*excludedNodes)[parent] = true; break; } TR::Node* defRegLoad = useDefInfo->getNode(defIndex); if (defRegLoad != NULL) { TR::Node* defRegLoadChild = defRegLoad->getFirstChild(); if (defRegLoad->getOpCode().isStoreReg() && isSupportedType(defRegLoadChild) && isSupportedLoad(defRegLoadChild)) { if (trace()) { traceMsg(comp(), "\t\tPeeked through use %s [%p] and found def %s [%p] with child %s [%p] - Counting [%p]\n", useRegLoad->getOpCode().getName(), useRegLoad, defRegLoad->getOpCode().getName(), defRegLoad, defRegLoadChild->getOpCode().getName(), defRegLoadChild, defRegLoadChild); } setExtensionPreference(defRegLoadChild, parent); } else { if (trace()) { traceMsg(comp(), "\t\tPeeked through use %s [%p] and found def %s [%p] with child %s [%p] - Excluding [%p]\n", useRegLoad->getOpCode().getName(), useRegLoad, defRegLoad->getOpCode().getName(), defRegLoad, defRegLoadChild != NULL ? defRegLoadChild->getOpCode().getName() : "NULL", defRegLoadChild, parent); } (*excludedNodes)[parent] = true; } } } } } else { (*excludedNodes)[parent] = true; } } } } // Exclude all loads which feed into global register stores which require sign extensions. This must be done // because Load Extensions is a local optimization and it must respect global sign extension decisions made // by GRA. Excluding such loads prevents a situation where GRA decided that a particular global register // should be sign extended at its definitions however Load Extensions has determined that the same load // should be zero extended. If local RA were to pick the same register for the global register as well as // the load then we have a conflicting decision which will result in a conversion to be skipped when it is // not supposed to be. if (parentOpCode.isStoreReg() && parent->needsSignExtension() && parent->getFirstChild()->getOpCode().isLoadVar()) { (*excludedNodes)[parent->getFirstChild()] = true; } }
TR::Node * OMR::Simplifier::unaryCancelOutWithChild(TR::Node * node, TR::Node * firstChild, TR::TreeTop *anchorTree, TR::ILOpCodes opcode, bool anchorChildren) { if (!isLegalToUnaryCancel(node, firstChild, opcode)) return NULL; if (firstChild->getOpCodeValue() == opcode && (node->getType().isAggregate() || firstChild->getType().isAggregate()) && (node->getSize() > firstChild->getSize() || node->getSize() != firstChild->getFirstChild()->getSize())) { // ensure a truncation side-effect of a conversion is not lost // o2a size=3 // a2o size=3 // conversion truncates in addition to type cast so cannot be removed // loadaddr size=4 // This restriction could be loosened to only disallow intermediate truncations (see BCD case above) but then would require a node // op that would just correct for size (e.g. addrSizeMod size=3 to replace the o2a/a2o pair) // // Do allow cases when all three sizes are the same and when the middle node widens but the top and bottom node have the same size, e.g. // // i2o size=3 // o2i size=4 // oload size=3 // // Also allow the special case where the grandchild is not really truncated as the 'truncated' bytes are known to be zero // (i.e. there really isn't an intermediate truncation of 4->3 even though it appears that way from looking at the sizes alone) // o2i // i2o size=3 // iushr // x // iconst 8 bool disallow = true; TR::Node *grandChild = firstChild->getFirstChild(); size_t nodeSize = node->getSize(); if (node->getType().isIntegral() && nodeSize == grandChild->getSize() && nodeSize > firstChild->getSize()) { size_t truncatedBits = (nodeSize - firstChild->getSize()) * 8; if (grandChild->getOpCode().isRightShift() && grandChild->getOpCode().isShiftLogical() && grandChild->getSecondChild()->getOpCode().isLoadConst() && (grandChild->getSecondChild()->get64bitIntegralValue() == truncatedBits)) { disallow = false; if (trace()) traceMsg(comp(),"do allow unaryCancel of node %s (%p) and firstChild %s (%p) as grandChild %s (%p) zeros the %d truncated bytes\n", node->getOpCode().getName(),node,firstChild->getOpCode().getName(),firstChild, grandChild->getOpCode().getName(),grandChild,truncatedBits/8); } } if (disallow) { if (trace()) traceMsg(comp(),"disallow unaryCancel of node %s (%p) and firstChild %s (%p) due to unequal sizes (nodeSize %d, firstChildSize %d, firstChild->childSize %d)\n", node->getOpCode().getName(),node,firstChild->getOpCode().getName(),firstChild, node->getSize(),firstChild->getSize(),firstChild->getFirstChild()->getSize()); return NULL; } } if (firstChild->getOpCodeValue() == opcode && performTransformation(comp(), "%sRemoving node [" POINTER_PRINTF_FORMAT "] %s and its child [" POINTER_PRINTF_FORMAT "] %s\n", optDetailString(), node, node->getOpCode().getName(), firstChild, firstChild->getOpCode().getName())) { TR::Node *grandChild = firstChild->getFirstChild(); grandChild->incReferenceCount(); bool anchorChildrenNeeded = anchorChildren && (node->getNumChildren() > 1 || firstChild->getNumChildren() > 1 || node->getOpCode().hasSymbolReference() || firstChild->getOpCode().hasSymbolReference()); prepareToStopUsingNode(node, anchorTree, anchorChildrenNeeded); node->recursivelyDecReferenceCount(); node->setVisitCount(0); return grandChild; } return NULL; }