bool TR::CompilationController::init(TR::CompilationInfo *compInfo) { TR::Options *options = TR::Options::getCmdLineOptions(); char *strategyName = options->getCompilationStrategyName(); { _compInfo = compInfo; _compilationStrategy = new (PERSISTENT_NEW) TR::DefaultCompilationStrategy(); { TR_OptimizationPlan::_optimizationPlanMonitor = TR::Monitor::create("OptimizationPlanMonitor"); _useController = (TR_OptimizationPlan::_optimizationPlanMonitor != 0); if (_useController) { static char *verboseController = feGetEnv("TR_VerboseController"); if (verboseController) setVerbose(atoi(verboseController)); if (verbose() >= LEVEL1) fprintf(stderr, "Using %s comp strategy\n", strategyName); } } } tlsAlloc(OMR::compilation); return _useController; }
static bool safeToMoveGuard(TR::Block *destination, TR::TreeTop *guardCandidate, TR::TreeTop *branchDest, TR_BitVector &privArgSymRefs) { static char *disablePrivArgMovement = feGetEnv("TR_DisableRuntimeGuardPrivArgMovement"); TR::TreeTop *start = destination ? destination->getExit() : TR::comp()->getStartTree(); if (guardCandidate->getNode()->isHCRGuard()) { for (TR::TreeTop *tt = start; tt && tt != guardCandidate; tt = tt->getNextTreeTop()) { if (tt->getNode()->canGCandReturn()) return false; } } else if (guardCandidate->getNode()->isOSRGuard()) { for (TR::TreeTop *tt = start; tt && tt != guardCandidate; tt = tt->getNextTreeTop()) { if (TR::comp()->isPotentialOSRPoint(tt->getNode(), NULL, true)) return false; } } else { privArgSymRefs.empty(); for (TR::TreeTop *tt = start; tt && tt != guardCandidate; tt = tt->getNextTreeTop()) { // It's safe to move the guard if there are only priv arg stores and live monitor stores // ahead of the guard if (tt->getNode()->getOpCodeValue() != TR::BBStart && tt->getNode()->getOpCodeValue() != TR::BBEnd && !tt->getNode()->chkIsPrivatizedInlinerArg() && !(tt->getNode()->getOpCode().hasSymbolReference() && tt->getNode()->getSymbol()->holdsMonitoredObject()) && !tt->getNode()->isNopableInlineGuard()) return false; if (tt->getNode()->chkIsPrivatizedInlinerArg() && (disablePrivArgMovement || // If the priv arg is not for this guard (guardCandidate->getNode()->getInlinedSiteIndex() > -1 && // if priv arg store does not have the same inlined site index as the guard's caller, that means it is not a priv arg for this guard, // then we cannot move the guard and its priv args up across other calls' priv args tt->getNode()->getInlinedSiteIndex() != TR::comp()->getInlinedCallSite(guardCandidate->getNode()->getInlinedSiteIndex())._byteCodeInfo.getCallerIndex()))) return false; if (tt->getNode()->chkIsPrivatizedInlinerArg()) privArgSymRefs.set(tt->getNode()->getSymbolReference()->getReferenceNumber()); if (tt->getNode()->isNopableInlineGuard() && tt->getNode()->getBranchDestination() != branchDest) return false; } } return true; }
void TR::ILValidator::updateNodeState(Location &newLocation) { TR::Node *node = newLocation.currentNode(); NodeState &state = _nodeStates[node]; if (node->getReferenceCount() == state._futureReferenceCount) { // First occurrence -- do some bookkeeping // if (node->getReferenceCount() == 0) { validityRule(newLocation, node->getOpCode().isTreeTop(), "Only nodes with isTreeTop opcodes can have refcount == 0"); } else { _liveNodes.add(node); } } if (_liveNodes.contains(node)) { validityRule(newLocation, state._futureReferenceCount >= 1, "Node already has reference count 0"); if (--state._futureReferenceCount == 0) { _liveNodes.remove(node); } } else { validityRule(newLocation, node->getOpCode().isTreeTop(), "Node has already gone dead"); } if (isLoggingEnabled()) { static const char *traceLiveNodesDuringValidation = feGetEnv("TR_traceLiveNodesDuringValidation"); if (traceLiveNodesDuringValidation && !_liveNodes.isEmpty()) { traceMsg(comp(), " -- Live nodes: {"); char *separator = ""; for (LiveNodeWindow::Iterator lnwi(_liveNodes); lnwi.currentNode(); ++lnwi) { traceMsg(comp(), "%sn%dn", separator, lnwi.currentNode()->getGlobalIndex()); separator = ", "; } traceMsg(comp(), "}\n"); } } }
int32_t OMR::X86::AMD64::CodeGenerator::getMaximumNumberOfGPRsAllowedAcrossEdge(TR::Node *node) { // TODO: Currently, lookupEvaluator doesn't deal properly with different // glRegDeps on different cases of a lookupswitch. // static const char *enableLookupswitch = feGetEnv("TR_enableGRAAcrossLookupSwitch"); if (!enableLookupswitch && node->getOpCode().getOpCodeValue()==TR::lookup) return 1; if (node->getOpCode().isIf() && node->getFirstChild()->getOpCodeValue() == TR::instanceof) return self()->getNumberOfGlobalGPRs() - 6; else if(node->getOpCode().isSwitch()) return self()->getNumberOfGlobalGPRs() - 3; // A memref in a jump for a MemTable might need a base, index and address register. return INT_MAX; }
void TestCompiler::FrontEnd::generateBinaryEncodingPrologue( TR_BinaryEncodingData *beData, TR::CodeGenerator *cg) { TR::Compilation* comp = cg->comp(); TR_S390BinaryEncodingData *data = (TR_S390BinaryEncodingData *)beData; data->cursorInstruction = comp->getFirstInstruction(); data->estimate = 0; data->preProcInstruction = data->cursorInstruction; data->jitTojitStart = data->cursorInstruction; data->cursorInstruction = NULL; TR::Instruction * preLoadArgs, * endLoadArgs; preLoadArgs = data->preProcInstruction; endLoadArgs = preLoadArgs; TR::Instruction * oldFirstInstruction = data->cursorInstruction; data->cursorInstruction = comp->getFirstInstruction(); static char *disableAlignJITEP = feGetEnv("TR_DisableAlignJITEP"); // Padding for JIT Entry Point if (!disableAlignJITEP) { data->estimate += 256; } while (data->cursorInstruction && data->cursorInstruction->getOpCodeValue() != TR::InstOpCode::PROC) { data->estimate = data->cursorInstruction->estimateBinaryLength(data->estimate); data->cursorInstruction = data->cursorInstruction->getNext(); } cg->getLinkage()->createPrologue(data->cursorInstruction); //cg->getLinkage()->analyzePrologue(); }
int32_t TR_AsyncCheckInsertion::perform() { TR::StackMemoryRegion stackMemoryRegion(*trMemory()); // If this is a large acyclic method - add a yield point at each return from this method // so that sampling will realize that we are actually in this method. // static const char *p; static uint32_t numNodesInLargeMethod = (p = feGetEnv("TR_LargeMethodNodes")) ? atoi(p) : NUMBER_OF_NODES_IN_LARGE_METHOD; const bool largeAcyclicMethod = !comp()->mayHaveLoops() && comp()->getNodeCount() > numNodesInLargeMethod; // If this method has loops whose asyncchecks were versioned out, it may // still spend a significant amount of time in each invocation without // yielding. In this case, insert yield points before returns whenever there // is a sufficiently frequent block somewhere in the method. // bool loopyMethodWithVersionedAsyncChecks = false; if (!largeAcyclicMethod && comp()->getLoopWasVersionedWrtAsyncChecks()) { // The max (normalized) block frequency is fixed, but very frequent // blocks push down the frequency of method entry. int32_t entry = comp()->getStartTree()->getNode()->getBlock()->getFrequency(); int32_t limit = comp()->getOptions()->getLoopyAsyncCheckInsertionMaxEntryFreq(); loopyMethodWithVersionedAsyncChecks = 0 <= entry && entry <= limit; } if (largeAcyclicMethod || loopyMethodWithVersionedAsyncChecks) { const char * counterPrefix = largeAcyclicMethod ? "acyclic" : "loopy"; int32_t numAsyncChecksInserted = insertReturnAsyncChecks(this, counterPrefix); if (trace()) traceMsg(comp(), "Inserted %d async checks\n", numAsyncChecksInserted); return 1; } return 0; }
TR::Register *OMR::X86::AMD64::TreeEvaluator::dbits2lEvaluator(TR::Node *node, TR::CodeGenerator *cg) { // TODO:AMD64: Peepholing TR::Node *child = node->getFirstChild(); TR::Register *sreg = cg->evaluate(child); TR::Register *treg = cg->allocateRegister(TR_GPR); generateRegRegInstruction(MOVQReg8Reg, node, treg, sreg, cg); if (node->normalizeNanValues()) { static char *disableFastNormalizeNaNs = feGetEnv("TR_disableFastNormalizeNaNs"); if (disableFastNormalizeNaNs) { // This one is not clever, but it is simple, and it's based directly // on the IA32 version which is known to work, so is safer. // TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, cg); deps->addPostCondition(treg, TR::RealRegister::NoReg, cg); TR::IA32ConstantDataSnippet *nan1Snippet = cg->findOrCreate8ByteConstant(node, DOUBLE_NAN_1_LOW); TR::IA32ConstantDataSnippet *nan2Snippet = cg->findOrCreate8ByteConstant(node, DOUBLE_NAN_2_LOW); TR::MemoryReference *nan1MR = generateX86MemoryReference(nan1Snippet, cg); TR::MemoryReference *nan2MR = generateX86MemoryReference(nan2Snippet, cg); TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *normalizeLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *endLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); startLabel->setStartInternalControlFlow(); endLabel ->setEndInternalControlFlow(); generateLabelInstruction( LABEL, node, startLabel, cg); generateRegMemInstruction( CMP8RegMem, node, treg, nan1MR, cg); generateLabelInstruction( JGE4, node, normalizeLabel, cg); generateRegMemInstruction( CMP8RegMem, node, treg, nan2MR, cg); generateLabelInstruction( JB4, node, endLabel, cg); generateLabelInstruction( LABEL, node, normalizeLabel, cg); generateRegImm64Instruction( MOV8RegImm64, node, treg, DOUBLE_NAN, cg); generateLabelInstruction( LABEL, node, endLabel, deps, cg); } else { // A bunch of bookkeeping // uint64_t nanDetector = DOUBLE_NAN_2_LOW; TR::RegisterDependencyConditions *internalControlFlowDeps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, cg); internalControlFlowDeps->addPostCondition(treg, TR::RealRegister::NoReg, cg); TR::RegisterDependencyConditions *helperDeps = generateRegisterDependencyConditions((uint8_t)1, (uint8_t)1, cg); helperDeps->addPreCondition( treg, TR::RealRegister::eax, cg); helperDeps->addPostCondition(treg, TR::RealRegister::eax, cg); TR::IA32ConstantDataSnippet *nanDetectorSnippet = cg->findOrCreate8ByteConstant(node, nanDetector); TR::MemoryReference *nanDetectorMR = generateX86MemoryReference(nanDetectorSnippet, cg); TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *slowPathLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *normalizeLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *endLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); startLabel->setStartInternalControlFlow(); endLabel ->setEndInternalControlFlow(); // Fast path: if subtracting nanDetector leaves CF=0 or OF=1, then it // must be a NaN. // generateLabelInstruction( LABEL, node, startLabel, cg); generateRegMemInstruction( CMP8RegMem, node, treg, nanDetectorMR, cg); generateLabelInstruction( JAE4, node, slowPathLabel, cg); generateLabelInstruction( JO4, node, slowPathLabel, cg); // Slow path // TR_OutlinedInstructions *slowPath = new (cg->trHeapMemory()) TR_OutlinedInstructions(slowPathLabel, cg); cg->getOutlinedInstructionsList().push_front(slowPath); slowPath->swapInstructionListsWithCompilation(); generateLabelInstruction(NULL, LABEL, slowPathLabel, cg)->setNode(node); generateRegImm64Instruction(MOV8RegImm64, node, treg, DOUBLE_NAN, cg); generateLabelInstruction( JMP4, node, endLabel, cg); slowPath->swapInstructionListsWithCompilation(); // Merge point // generateLabelInstruction(LABEL, node, endLabel, internalControlFlowDeps, cg); } } node->setRegister(treg); cg->decReferenceCount(child); return treg; }
static bool isMergeableGuard(TR::Node *node) { static char *mergeOnlyHCRGuards = feGetEnv("TR_MergeOnlyHCRGuards"); return mergeOnlyHCRGuards ? node->isHCRGuard() : node->isNopableInlineGuard(); }
// This opt tries to reduce merge backs from cold code that are the result of inliner // gnerated nopable virtual guards // It looks for one basic pattern // // guard1 -> cold1 // BBEND // BBSTART // guard2 -> cold2 // if guard1 is the guard for a method which calls the method guard2 protects or cold1 is // a predecessor of cold2 (a situation commonly greated by virtual guard tail splitter) we // can transform the guards as follows when guard1 and guard2 a // guard1 -> cold1 // BBEND // BBSTART // guard2 -> cold1 // This is safe because there are no trees between the guards and calling the caller will // result in the call to the callee if we need to patch guard2. cold2 and its mergebacks // can then be eliminated // // In addition this opt will try to move guard2 up from the end of a block to the // start of the block. We can do this if guard2 is an HCR guard and there is no GC point // between BBSTART and guard2 since HCR is a stop-the-world event. // // Finally, there is a simple tail splitting step run before the analysis of a guard if we // detect that the taken side of the guard merges back in the next block - this happens // for some empty methods and is common for Object.<init> at the top of constructors. int32_t TR_VirtualGuardHeadMerger::perform() { static char *disableVGHeadMergerTailSplitting = feGetEnv("TR_DisableVGHeadMergerTailSplitting"); TR::CFG *cfg = comp()->getFlowGraph(); // Cache the loads for the outer guard's cold path TR_BitVector coldPathLoads(comp()->trMemory()->currentStackRegion()); TR_BitVector privArgSymRefs(comp()->trMemory()->currentStackRegion()); bool evaluatedColdPathLoads = false; for (TR::Block *block = optimizer()->getMethodSymbol()->getFirstTreeTop()->getNode()->getBlock(); block; block = block->getNextBlock()) { TR::Node *guard1 = block->getLastRealTreeTop()->getNode(); if (isMergeableGuard(guard1)) { if (trace()) traceMsg(comp(), "Found mergeable guard in block_%d\n", block->getNumber()); TR::Block *cold1 = guard1->getBranchDestination()->getEnclosingBlock(); // check for an immediate merge back from the cold block and // tail split one block if we can - we only handle splitting a block // ending in a fallthrough, a branch or a goto for now for simplicity if (!disableVGHeadMergerTailSplitting && (cold1->getSuccessors().size() == 1) && cold1->hasSuccessor(block->getNextBlock()) && cold1->getLastRealTreeTop()->getNode()->getOpCode().isGoto()) { // TODO handle moving code earlier in the block down below the guard // tail split if ((block->getNextBlock()->getSuccessors().size() == 1) || ((block->getNextBlock()->getSuccessors().size() == 2) && block->getNextBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) && performTransformation(comp(), "%sCloning block_%d and placing clone after block_%d to reduce HCR guard nops\n", OPT_DETAILS, block->getNextBlock()->getNumber(), cold1->getNumber())) tailSplitBlock(block, cold1); } // guard motion is fairly complex but what we want to achieve around guard1 is a sequence // of relocated privarg blocks, followed by a sequence of runtime patchable guards going to // guard1's cold block, followed by a sequence of stop-the-world guards going to guard1's // cold block // // The following code is to setup the various insert points based on the following diagrams // of basic blocks: // // start: setup: end result after moving runtime guard' // | | +-------+ <-- privargIns // | | <-- privargIns | // +-------+ <-- runtimeIns +-------+ // | | | | Guard'| // | | V +-------+ <-- runtimeIns // +-------+ +-------+ | // | Guard | | Guard | V // +-------+ +-------+ <-- HCRIns +-------+ // | ===> | ===> | Guard | // V V +-------+ <-- HCRIns // +-------+ +-------+ | // | | | | V // | | | | +-------+ // // Note we always split the block - this may create an empty block but preserves the incoming // control flow we leave the rest to block extension to fix later block = block->split(block->getLastRealTreeTop(), cfg, true, false); TR::Block *privargIns = block->getPrevBlock(); TR::Block *runtimeIns = block->getPrevBlock(); TR::Block *HCRIns = block; // New outer guard so cold paths must be evaluated evaluatedColdPathLoads = false; // scan for candidate guards to merge with guard1 identified above for (TR::Block *nextBlock = block->getNextBlock(); nextBlock; nextBlock = nextBlock->getNextBlock()) { if (!(nextBlock->getPredecessors().size() == 1) || !nextBlock->hasPredecessor(block)) { break; } TR::TreeTop *guard2Tree = NULL; if (isMergeableGuard(nextBlock->getFirstRealTreeTop()->getNode())) { guard2Tree = nextBlock->getFirstRealTreeTop(); } else if (isMergeableGuard(nextBlock->getLastRealTreeTop()->getNode())) { guard2Tree = nextBlock->getLastRealTreeTop(); } else break; TR::Node *guard2 = guard2Tree->getNode(); TR::Block *guard2Block = nextBlock; // It is not possible to shift an OSR guard unless the destination is already an OSR point // as the necessary OSR state will not be available if (guard2->isOSRGuard() && !guard1->isOSRGuard()) break; TR::Block *insertPoint = isStopTheWorldGuard(guard2) ? HCRIns : runtimeIns; if (!safeToMoveGuard(insertPoint, guard2Tree, guard1->getBranchDestination(), privArgSymRefs)) break; // now we figure out if we can redirect guard2 to guard1's cold block // ie can we do the head merge TR::Block *cold2 = guard2->getBranchDestination()->getEnclosingBlock(); if (guard1->getInlinedSiteIndex() == guard2->getInlinedSiteIndex()) { if (trace()) traceMsg(comp(), " Guard1 [%p] is guarding the same call as Guard2 [%p] - proceeding with guard merging\n", guard1, guard2); } else if (guard2->getInlinedSiteIndex() > -1 && guard1->getInlinedSiteIndex() == TR::comp()->getInlinedCallSite(guard2->getInlinedSiteIndex())._byteCodeInfo.getCallerIndex()) { if (trace()) traceMsg(comp(), " Guard1 [%p] is the caller of Guard2 [%p] - proceeding with guard merging\n", guard1, guard2); } else if ((cold1->getSuccessors().size() == 1) && cold1->hasSuccessor(cold2)) { if (trace()) traceMsg(comp(), " Guard1 cold destination block_%d has guard2 cold destination block_%d as its only successor - proceeding with guard merging\n", cold1->getNumber(), cold2->getNumber()); } else { if (trace()) traceMsg(comp(), " Cold1 block_%d and cold2 block_%d of guard2 [%p] in unknown relationship - abandon the merge attempt\n", cold1->getNumber(), cold2->getNumber(), guard2); break; } // Runtime guards will shift their privargs, so it is necessary to check such a move is safe // This is possible if a privarg temp was recycled for the inner call site, with a prior use as an // argument for the outer call site. As the privargs for the inner call site must be evaluated before // both guards, this would result in the recycled temp holding the incorrect value if the guard is ever // taken. if (!isStopTheWorldGuard(guard2)) { if (!evaluatedColdPathLoads) { collectColdPathLoads(cold1, coldPathLoads); evaluatedColdPathLoads = true; } if (coldPathLoads.intersects(privArgSymRefs)) { if (trace()) traceMsg(comp(), " Recycled temp live in cold1 block_%d and used as privarg before guard2 [%p] - stop guard merging", cold1->getNumber(), guard2); break; } } if (!performTransformation(comp(), "%sRedirecting %s guard [%p] in block_%d to parent guard cold block_%d\n", OPT_DETAILS, isStopTheWorldGuard(guard2) ? "stop the world" : "runtime", guard2, guard2Block->getNumber(), cold1->getNumber())) continue; if (guard2->getBranchDestination() != guard1->getBranchDestination()) guard2Block->changeBranchDestination(guard1->getBranchDestination(), cfg); if (guard2Tree != guard2Block->getFirstRealTreeTop()) { cfg->setStructure(NULL); // We should leave code ahead of an HCR guard in place because: // 1, it might have side effect to runtime guards after it, moving it up might cause us to falsely merge // the subsequent runtime guards // 2, it might contain live monitor, moving it up above a guard can affect the monitor's live range if (!isStopTheWorldGuard(guard2)) { // the block created above guard2 contains only privarg treetops or monitor stores if // guard2 is a runtime-patchable guard and is safe to merge. We need to move the priv // args up to the runtime insert point and leave the monitor stores in place // It's safe to do so because there is no data dependency between the monitor store and // the priv arg store, because the priv arg store does not load the value from the temp // holding the monitored object // Split priv arg stores from monitor stores // Monitor store is generated for the caller of the method guard2 protects, so should appear before // priv arg stores for the method guard2 protects TR::Block *privargBlock = guard2Block; guard2Block = splitRuntimeGuardBlock(comp(), guard2Block, cfg); if (privargBlock != guard2Block) { if (trace()) traceMsg(comp(), " Moving privarg block_%d after block_%d\n", privargBlock->getNumber(), privargIns->getNumber()); moveBlockAfterDest(cfg, privargBlock, privargIns); if (HCRIns == privargIns) HCRIns = privargBlock; if (runtimeIns == privargIns) runtimeIns = privargBlock; privargIns = privargBlock; // refresh the insertPoint since it could be stale after the above updates insertPoint = runtimeIns; } } guard2Block = guard2Block->split(guard2Tree, cfg, true, false); if (trace()) traceMsg(comp(), " Created new block_%d to hold guard [%p] from block_%d\n", guard2Block->getNumber(), guard2, guard2Block->getNumber()); } if (insertPoint != guard2Block->getPrevBlock()) { TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "headMerger/%s_%s/(%s)", isStopTheWorldGuard(guard1) ? "stop the world" : "runtime", isStopTheWorldGuard(guard2) ? "stop the world" : "runtime", comp()->signature())); cfg->setStructure(NULL); block = nextBlock = guard2Block->getPrevBlock(); if (trace()) traceMsg(comp(), " Moving guard2 block block_%d after block_%d\n", guard2Block->getNumber(), insertPoint->getNumber()); moveBlockAfterDest(cfg, guard2Block, insertPoint); if (HCRIns == insertPoint) HCRIns = guard2Block; if (runtimeIns == insertPoint) runtimeIns = guard2Block; } else { block = guard2Block; } guard1 = guard2; } } } return 1; }
OMR::X86::AMD64::CodeGenerator::CodeGenerator() : OMR::X86::CodeGenerator() { if (self()->comp()->getOption(TR_DisableTraps)) { _numberBytesReadInaccessible = 0; _numberBytesWriteInaccessible = 0; } else { _numberBytesReadInaccessible = 4096; _numberBytesWriteInaccessible = 4096; self()->setHasResumableTrapHandler(); self()->setEnableImplicitDivideCheck(); } self()->setSupportsDivCheck(); static char *c = feGetEnv("TR_disableAMD64ValueProfiling"); if (c) self()->comp()->setOption(TR_DisableValueProfiling); static char *accessStaticsIndirectly = feGetEnv("TR_AccessStaticsIndirectly"); if (accessStaticsIndirectly) self()->setAccessStaticsIndirectly(true); self()->setSupportsDoubleWordCAS(); self()->setSupportsDoubleWordSet(); self()->setSupportsGlRegDepOnFirstBlock(); self()->setConsiderAllAutosAsTacticalGlobalRegisterCandidates(); // Interpreter frame shape requires all autos to occupy an 8-byte slot on 64-bit. // if (self()->comp()->getOption(TR_MimicInterpreterFrameShape)) self()->setMapAutosTo8ByteSlots(); // Common X86 initialization // self()->initialize(self()->comp()); self()->initLinkageToGlobalRegisterMap(); self()->setRealVMThreadRegister(self()->machine()->getRealRegister(TR::RealRegister::ebp)); // GRA-related initialization is done after calling initialize() so we can // use such things as getNumberOfGlobal[FG]PRs(). _globalGPRsPreservedAcrossCalls.init(self()->getNumberOfGlobalRegisters(), self()->trMemory()); _globalFPRsPreservedAcrossCalls.init(self()->getNumberOfGlobalRegisters(), self()->trMemory()); int16_t i; TR_GlobalRegisterNumber grn; for (i=0; i < self()->getNumberOfGlobalGPRs(); i++) { grn = self()->getFirstGlobalGPR() + i; if (self()->getProperties().isPreservedRegister((TR::RealRegister::RegNum)self()->getGlobalRegister(grn))) _globalGPRsPreservedAcrossCalls.set(grn); } for (i=0; i < self()->getNumberOfGlobalFPRs(); i++) { grn = self()->getFirstGlobalFPR() + i; if (self()->getProperties().isPreservedRegister((TR::RealRegister::RegNum)self()->getGlobalRegister(grn))) _globalFPRsPreservedAcrossCalls.set(grn); } // Reduce the maxObjectSizeGuaranteedNotToOverflow value on 64-bit such that the // runtime comparison does not sign extend (saves an instruction on array allocations). // INT_MAX should be sufficiently large. // if ((uint32_t)_maxObjectSizeGuaranteedNotToOverflow > (uint32_t)INT_MAX) { _maxObjectSizeGuaranteedNotToOverflow = (uint32_t)INT_MAX; } }
OMR::X86::I386::CodeGenerator::CodeGenerator() : OMR::X86::CodeGenerator() { // Common X86 initialization // self()->initialize( self()->comp() ); self()->setUsesRegisterPairsForLongs(); if (debug("supportsArrayTranslateAndTest")) self()->setSupportsArrayTranslateAndTest(); if (debug("supportsArrayCmp")) self()->setSupportsArrayCmp(); self()->setSupportsDoubleWordCAS(); self()->setSupportsDoubleWordSet(); if (TR::Compiler->target.isWindows()) { if (self()->comp()->getOption(TR_DisableTraps)) { _numberBytesReadInaccessible = 0; _numberBytesWriteInaccessible = 0; } else { _numberBytesReadInaccessible = 4096; _numberBytesWriteInaccessible = 4096; self()->setHasResumableTrapHandler(); self()->setEnableImplicitDivideCheck(); } self()->setSupportsDivCheck(); self()->setJNILinkageCalleeCleanup(); self()->setRealVMThreadRegister(self()->machine()->getX86RealRegister(TR::RealRegister::ebp)); } else if (TR::Compiler->target.isLinux()) { if (self()->comp()->getOption(TR_DisableTraps)) { _numberBytesReadInaccessible = 0; _numberBytesWriteInaccessible = 0; } else { _numberBytesReadInaccessible = 4096; _numberBytesWriteInaccessible = 4096; self()->setHasResumableTrapHandler(); self()->setEnableImplicitDivideCheck(); } self()->setRealVMThreadRegister(self()->machine()->getX86RealRegister(TR::RealRegister::ebp)); self()->setSupportsDivCheck(); } else { TR_ASSERT(0, "unknown target"); } self()->setSupportsInlinedAtomicLongVolatiles(); static char *dontConsiderAllAutosForGRA = feGetEnv("TR_dontConsiderAllAutosForGRA"); if (!dontConsiderAllAutosForGRA) self()->setConsiderAllAutosAsTacticalGlobalRegisterCandidates(); }
int32_t OMR::X86::I386::CodeGenerator::getMaximumNumberOfGPRsAllowedAcrossEdge(TR::Node *node) { // TODO: Currently, lookupEvaluator doesn't deal properly with different // glRegDeps on different cases of a lookupswitch. // static const char *enableLookupswitch = feGetEnv("TR_enableGRAAcrossLookupSwitch"); if (!enableLookupswitch && node->getOpCode().getOpCodeValue()==TR::lookup) return 1; if (node->getOpCode().getOpCodeValue()==TR::table) { // 1 for jump table base reg, which is not apparent in the trees // 1 for ebp when it is needed for the VMThread // return self()->getNumberOfGlobalGPRs() - 2; } if (node->getOpCode().isIf()) { // we run out of all but one/two registers in these cases // if (node->getFirstChild()->getType().isInt64()) { if (node->getOpCode().isBranch()) { TR::Node *firstChild = node->getFirstChild(); TR::Node *secondChild = node->getSecondChild(); int extraRegsAvailable = 0; if(firstChild->getOpCodeValue() == TR::d2l || secondChild->getOpCodeValue() == TR::d2l) { return 1; } if ((firstChild->getReferenceCount() == 1 && firstChild->getOpCode().isLoadVarDirect()) || (secondChild->getReferenceCount() == 1 && firstChild->getOpCode().isLoadVarDirect())) extraRegsAvailable += 0; // TODO: put it back to 2 when looking at GRA, GRA pushes allocation of 8 registers return 2 + extraRegsAvailable; } else { // TR_lcmpXX opcodes take up 5 regs // return 1; } } // we run out of all but one register in these cases....last time I tried.... // if (node->getFirstChild()->getOpCodeValue() == TR::instanceof) { if (!TR::TreeEvaluator::instanceOfOrCheckCastNeedSuperTest(node->getFirstChild(), self()) && TR::TreeEvaluator::instanceOfOrCheckCastNeedEqualityTest(node->getFirstChild(), self())) return self()->getNumberOfGlobalGPRs() - 4; // ebp plus three other regs if vft masking is enabled else return 0; } // All other conditional branches, we usually need one reg for the compare and possibly one for the vmthread //return getNumberOfGlobalGPRs() - 1 - (node->isVMThreadRequired()? 1 : 0); // vmThread required might be set on a node after GRA has ran return self()->getNumberOfGlobalGPRs() - 2; } return INT_MAX; }
TR_GlobalRegisterNumber OMR::X86::I386::CodeGenerator::pickRegister( TR_RegisterCandidate *rc, TR::Block **allBlocks, TR_BitVector &availableRegisters, TR_GlobalRegisterNumber &highRegisterNumber, TR_LinkHead<TR_RegisterCandidate> *candidates) { if (!self()->comp()->getOption(TR_DisableRegisterPressureSimulation)) { if (self()->comp()->getOption(TR_AssignEveryGlobalRegister)) { // This is not really necessary except for testing purposes. // Conceptually, the common pickRegister code should be free to make // its choices based only on performance considerations, and shouldn't // need to worry about correctness. When SupportsVMThreadGRA is not set, // it is incorrect to choose the VMThread register. Therefore we mask // it out here. // // Having said that, the common code *does* already mask out the // VMThread register for convenience, so under normal circumstances, // this code is redundant. It is only necessary when // TR_AssignEveryGlobalRegister is set. // availableRegisters -= *self()->getGlobalRegisters(TR_vmThreadSpill, self()->comp()->getMethodSymbol()->getLinkageConvention()); } return OMR::CodeGenerator::pickRegister(rc, allBlocks, availableRegisters, highRegisterNumber, candidates); } if ((rc->getSymbol()->getDataType() == TR::Float) || (rc->getSymbol()->getDataType() == TR::Double)) { if (availableRegisters.get(7)) return 7; if (availableRegisters.get(8)) return 8; if (availableRegisters.get(9)) return 9; if (availableRegisters.get(10)) return 10; if (availableRegisters.get(11)) return 11; if (availableRegisters.get(12)) return 12; return -1; } if (!_assignedGlobalRegisters) _assignedGlobalRegisters = new (self()->trStackMemory()) TR_BitVector(self()->comp()->getSymRefCount(), self()->trMemory(), stackAlloc, growable); if (availableRegisters.get(5)) return 5; // esi if (availableRegisters.get(2)) return 2; // ecx static char *dontUseEBXasGPR = feGetEnv("dontUseEBXasGPR"); if (!dontUseEBXasGPR && availableRegisters.get(1)) return 1; #ifdef J9_PROJECT_SPECIFIC TR::RecognizedMethod rm = self()->comp()->getMethodSymbol()->getRecognizedMethod(); if (rm == TR::java_util_HashtableHashEnumerator_hasMoreElements) { if (availableRegisters.get(4)) return 4; // edi if (availableRegisters.get(3)) return 3; // edx } else #endif { int32_t numExtraRegs = 0; int32_t maxRegisterPressure = 0; vcount_t visitCount = self()->comp()->incVisitCount(); TR_BitVectorIterator bvi(rc->getBlocksLiveOnEntry()); int32_t maxFrequency = 0; while (bvi.hasMoreElements()) { int32_t liveBlockNum = bvi.getNextElement(); TR::Block *block = allBlocks[liveBlockNum]; if (block->getFrequency() > maxFrequency) maxFrequency = block->getFrequency(); } int32_t maxStaticFrequency = 0; if (maxFrequency == 0) { bvi.setBitVector(rc->getBlocksLiveOnEntry()); while (bvi.hasMoreElements()) { int32_t liveBlockNum = bvi.getNextElement(); TR::Block *block = allBlocks[liveBlockNum]; TR_BlockStructure *blockStructure = block->getStructureOf(); int32_t blockWeight = 1; if (blockStructure && !block->isCold()) { blockStructure->calculateFrequencyOfExecution(&blockWeight); if (blockWeight > maxStaticFrequency) maxStaticFrequency = blockWeight; } } } bool assigningEDX = false; if (!availableRegisters.get(4) && availableRegisters.get(3)) assigningEDX = true; bool vmThreadUsed = false; bvi.setBitVector(rc->getBlocksLiveOnEntry()); while (bvi.hasMoreElements()) { int32_t liveBlockNum = bvi.getNextElement(); TR::Block *block = allBlocks[liveBlockNum]; _assignedGlobalRegisters->empty(); int32_t numAssignedGlobalRegs = 0; TR_RegisterCandidate *prev; for (prev = candidates->getFirst(); prev; prev = prev->getNext()) { bool gprCandidate = true; if ((prev->getSymbol()->getDataType() == TR::Float) || (prev->getSymbol()->getDataType() == TR::Double)) gprCandidate = false; if (gprCandidate && prev->getBlocksLiveOnEntry().get(liveBlockNum)) { numAssignedGlobalRegs++; if (prev->getDataType() == TR::Int64) numAssignedGlobalRegs++; _assignedGlobalRegisters->set(prev->getSymbolReference()->getReferenceNumber()); } } maxRegisterPressure = self()->estimateRegisterPressure(block, visitCount, maxStaticFrequency, maxFrequency, vmThreadUsed, numAssignedGlobalRegs, _assignedGlobalRegisters, rc->getSymbolReference(), assigningEDX); if (maxRegisterPressure >= self()->getMaximumNumbersOfAssignableGPRs()) break; } // Determine if we can spare any extra registers for this candidate without spilling // in any hot (critical) blocks // if (maxRegisterPressure < self()->getMaximumNumbersOfAssignableGPRs()) numExtraRegs = self()->getMaximumNumbersOfAssignableGPRs() - maxRegisterPressure; //dumpOptDetails("For global register candidate %d reg pressure is %d maxRegs %d numExtraRegs %d\n", rc->getSymbolReference()->getReferenceNumber(), maxRegisterPressure, comp()->cg()->getMaximumNumbersOfAssignableGPRs(), numExtraRegs); if (numExtraRegs > 0) { if (availableRegisters.get(4)) return 4; // edi if (availableRegisters.get(3)) return 3; // edx } } return -1; // -1 ==> don't use a global register }