bool TR::CompilationController::init(TR::CompilationInfo *compInfo)
   {
   TR::Options *options = TR::Options::getCmdLineOptions();
   char *strategyName = options->getCompilationStrategyName();

      {
      _compInfo = compInfo;
      _compilationStrategy = new (PERSISTENT_NEW) TR::DefaultCompilationStrategy();

         {
         TR_OptimizationPlan::_optimizationPlanMonitor = TR::Monitor::create("OptimizationPlanMonitor");
         _useController = (TR_OptimizationPlan::_optimizationPlanMonitor != 0);
         if (_useController)
            {
            static char *verboseController = feGetEnv("TR_VerboseController");
            if (verboseController)
               setVerbose(atoi(verboseController));
            if (verbose() >= LEVEL1)
               fprintf(stderr, "Using %s comp strategy\n", strategyName);
            }
         }
      }

   tlsAlloc(OMR::compilation);

   return _useController;
   }
Beispiel #2
0
static bool safeToMoveGuard(TR::Block *destination, TR::TreeTop *guardCandidate,
   TR::TreeTop *branchDest, TR_BitVector &privArgSymRefs)
   {
   static char *disablePrivArgMovement = feGetEnv("TR_DisableRuntimeGuardPrivArgMovement");
   TR::TreeTop *start = destination ? destination->getExit() : TR::comp()->getStartTree();
   if (guardCandidate->getNode()->isHCRGuard())
      {
      for (TR::TreeTop *tt = start; tt && tt != guardCandidate; tt = tt->getNextTreeTop())
         {
         if (tt->getNode()->canGCandReturn())
            return false;
         }
      }
   else if (guardCandidate->getNode()->isOSRGuard())
      {
      for (TR::TreeTop *tt = start; tt && tt != guardCandidate; tt = tt->getNextTreeTop())
         {
         if (TR::comp()->isPotentialOSRPoint(tt->getNode(), NULL, true))
            return false;
         }
      }
   else
      {
      privArgSymRefs.empty();
      for (TR::TreeTop *tt = start; tt && tt != guardCandidate; tt = tt->getNextTreeTop())
         {
         // It's safe to move the guard if there are only priv arg stores and live monitor stores
         // ahead of the guard
         if (tt->getNode()->getOpCodeValue() != TR::BBStart
             && tt->getNode()->getOpCodeValue() != TR::BBEnd
             && !tt->getNode()->chkIsPrivatizedInlinerArg()
             && !(tt->getNode()->getOpCode().hasSymbolReference() && tt->getNode()->getSymbol()->holdsMonitoredObject())
             && !tt->getNode()->isNopableInlineGuard())
                return false;

         if (tt->getNode()->chkIsPrivatizedInlinerArg() && (disablePrivArgMovement ||
             // If the priv arg is not for this guard
             (guardCandidate->getNode()->getInlinedSiteIndex() > -1 &&
             // if priv arg store does not have the same inlined site index as the guard's caller, that means it is not a priv arg for this guard,
             // then we cannot move the guard and its priv args up across other calls' priv args
             tt->getNode()->getInlinedSiteIndex() != TR::comp()->getInlinedCallSite(guardCandidate->getNode()->getInlinedSiteIndex())._byteCodeInfo.getCallerIndex())))
            return false;

         if (tt->getNode()->chkIsPrivatizedInlinerArg())
            privArgSymRefs.set(tt->getNode()->getSymbolReference()->getReferenceNumber());

         if (tt->getNode()->isNopableInlineGuard()
             && tt->getNode()->getBranchDestination() != branchDest)
            return false;
         }
      }
   return true;
   }
Beispiel #3
0
void TR::ILValidator::updateNodeState(Location &newLocation)
   {
   TR::Node  *node = newLocation.currentNode();
   NodeState &state = _nodeStates[node];
   if (node->getReferenceCount() == state._futureReferenceCount)
      {
      // First occurrence -- do some bookkeeping
      //
      if (node->getReferenceCount() == 0)
         {
         validityRule(newLocation, node->getOpCode().isTreeTop(), "Only nodes with isTreeTop opcodes can have refcount == 0");
         }
      else
         {
         _liveNodes.add(node);
         }
      }

   if (_liveNodes.contains(node))
      {
      validityRule(newLocation, state._futureReferenceCount >= 1, "Node already has reference count 0");
      if (--state._futureReferenceCount == 0)
         {
         _liveNodes.remove(node);
         }
      }
   else
      {
      validityRule(newLocation, node->getOpCode().isTreeTop(), "Node has already gone dead");
      }

   if (isLoggingEnabled())
      {
      static const char *traceLiveNodesDuringValidation = feGetEnv("TR_traceLiveNodesDuringValidation");
      if (traceLiveNodesDuringValidation && !_liveNodes.isEmpty())
         {
         traceMsg(comp(), "    -- Live nodes: {");
         char *separator = "";
         for (LiveNodeWindow::Iterator lnwi(_liveNodes); lnwi.currentNode(); ++lnwi)
            {
            traceMsg(comp(), "%sn%dn", separator, lnwi.currentNode()->getGlobalIndex());
            separator = ", ";
            }
         traceMsg(comp(), "}\n");
         }
      }

   }
Beispiel #4
0
int32_t
OMR::X86::AMD64::CodeGenerator::getMaximumNumberOfGPRsAllowedAcrossEdge(TR::Node *node)
   {
   // TODO: Currently, lookupEvaluator doesn't deal properly with different
   // glRegDeps on different cases of a lookupswitch.
   //
   static const char *enableLookupswitch = feGetEnv("TR_enableGRAAcrossLookupSwitch");
   if (!enableLookupswitch && node->getOpCode().getOpCodeValue()==TR::lookup)
      return 1;

   if (node->getOpCode().isIf() && node->getFirstChild()->getOpCodeValue() == TR::instanceof)
      return self()->getNumberOfGlobalGPRs() - 6;
   else if(node->getOpCode().isSwitch())
      return self()->getNumberOfGlobalGPRs() - 3; // A memref in a jump for a MemTable might need a base, index and address register.

   return INT_MAX;
   }
Beispiel #5
0
void
TestCompiler::FrontEnd::generateBinaryEncodingPrologue(
      TR_BinaryEncodingData *beData,
      TR::CodeGenerator *cg)
   {
   TR::Compilation* comp = cg->comp();
   TR_S390BinaryEncodingData *data = (TR_S390BinaryEncodingData *)beData;

   data->cursorInstruction = comp->getFirstInstruction();
   data->estimate = 0;
   data->preProcInstruction = data->cursorInstruction;
   data->jitTojitStart = data->cursorInstruction;
   data->cursorInstruction = NULL;

   TR::Instruction * preLoadArgs, * endLoadArgs;
   preLoadArgs = data->preProcInstruction;
   endLoadArgs = preLoadArgs;

   TR::Instruction * oldFirstInstruction = data->cursorInstruction;

   data->cursorInstruction = comp->getFirstInstruction();

   static char *disableAlignJITEP = feGetEnv("TR_DisableAlignJITEP");

   // Padding for JIT Entry Point
   if (!disableAlignJITEP)
      {
      data->estimate += 256;
      }

   while (data->cursorInstruction && data->cursorInstruction->getOpCodeValue() != TR::InstOpCode::PROC)
      {
      data->estimate = data->cursorInstruction->estimateBinaryLength(data->estimate);
      data->cursorInstruction = data->cursorInstruction->getNext();
      }

   cg->getLinkage()->createPrologue(data->cursorInstruction);
   //cg->getLinkage()->analyzePrologue();
   }
Beispiel #6
0
int32_t TR_AsyncCheckInsertion::perform()
   {
   TR::StackMemoryRegion stackMemoryRegion(*trMemory());

   // If this is a large acyclic method - add a yield point at each return from this method
   // so that sampling will realize that we are actually in this method.
   //
   static const char *p;
   static uint32_t numNodesInLargeMethod = (p = feGetEnv("TR_LargeMethodNodes")) ? atoi(p) : NUMBER_OF_NODES_IN_LARGE_METHOD;
   const bool largeAcyclicMethod =
      !comp()->mayHaveLoops() && comp()->getNodeCount() > numNodesInLargeMethod;

   // If this method has loops whose asyncchecks were versioned out, it may
   // still spend a significant amount of time in each invocation without
   // yielding. In this case, insert yield points before returns whenever there
   // is a sufficiently frequent block somewhere in the method.
   //
   bool loopyMethodWithVersionedAsyncChecks = false;
   if (!largeAcyclicMethod && comp()->getLoopWasVersionedWrtAsyncChecks())
      {
      // The max (normalized) block frequency is fixed, but very frequent
      // blocks push down the frequency of method entry.
      int32_t entry = comp()->getStartTree()->getNode()->getBlock()->getFrequency();
      int32_t limit = comp()->getOptions()->getLoopyAsyncCheckInsertionMaxEntryFreq();
      loopyMethodWithVersionedAsyncChecks = 0 <= entry && entry <= limit;
      }

   if (largeAcyclicMethod || loopyMethodWithVersionedAsyncChecks)
      {
      const char * counterPrefix = largeAcyclicMethod ? "acyclic" : "loopy";
      int32_t numAsyncChecksInserted = insertReturnAsyncChecks(this, counterPrefix);
      if (trace())
         traceMsg(comp(), "Inserted %d async checks\n", numAsyncChecksInserted);
      return 1;
      }

   return 0;
   }
Beispiel #7
0
TR::Register *OMR::X86::AMD64::TreeEvaluator::dbits2lEvaluator(TR::Node *node, TR::CodeGenerator *cg)
   {
   // TODO:AMD64: Peepholing
   TR::Node      *child  = node->getFirstChild();
   TR::Register  *sreg   = cg->evaluate(child);
   TR::Register  *treg   = cg->allocateRegister(TR_GPR);
   generateRegRegInstruction(MOVQReg8Reg, node, treg, sreg, cg);
   if (node->normalizeNanValues())
      {
      static char *disableFastNormalizeNaNs = feGetEnv("TR_disableFastNormalizeNaNs");
      if (disableFastNormalizeNaNs)
         {
         // This one is not clever, but it is simple, and it's based directly
         // on the IA32 version which is known to work, so is safer.
         //
         TR::RegisterDependencyConditions  *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, cg);
         deps->addPostCondition(treg, TR::RealRegister::NoReg, cg);

         TR::IA32ConstantDataSnippet *nan1Snippet = cg->findOrCreate8ByteConstant(node, DOUBLE_NAN_1_LOW);
         TR::IA32ConstantDataSnippet *nan2Snippet = cg->findOrCreate8ByteConstant(node, DOUBLE_NAN_2_LOW);
         TR::MemoryReference      *nan1MR      = generateX86MemoryReference(nan1Snippet, cg);
         TR::MemoryReference      *nan2MR      = generateX86MemoryReference(nan2Snippet, cg);

         TR::LabelSymbol *startLabel     = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *normalizeLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *endLabel       = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         startLabel->setStartInternalControlFlow();
         endLabel  ->setEndInternalControlFlow();

         generateLabelInstruction(   LABEL,       node, startLabel,               cg);
         generateRegMemInstruction(  CMP8RegMem,  node, treg, nan1MR,             cg);
         generateLabelInstruction(   JGE4,        node, normalizeLabel,           cg);
         generateRegMemInstruction(  CMP8RegMem,  node, treg, nan2MR,             cg);
         generateLabelInstruction(   JB4,         node, endLabel,                 cg);
         generateLabelInstruction(   LABEL,       node, normalizeLabel,           cg);
         generateRegImm64Instruction( MOV8RegImm64, node, treg, DOUBLE_NAN,         cg);
         generateLabelInstruction(   LABEL,       node, endLabel,           deps, cg);
         }
      else
         {
         // A bunch of bookkeeping
         //
         uint64_t nanDetector = DOUBLE_NAN_2_LOW;

         TR::RegisterDependencyConditions  *internalControlFlowDeps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, cg);
         internalControlFlowDeps->addPostCondition(treg, TR::RealRegister::NoReg, cg);

         TR::RegisterDependencyConditions  *helperDeps = generateRegisterDependencyConditions((uint8_t)1, (uint8_t)1, cg);
         helperDeps->addPreCondition( treg, TR::RealRegister::eax, cg);
         helperDeps->addPostCondition(treg, TR::RealRegister::eax, cg);

         TR::IA32ConstantDataSnippet *nanDetectorSnippet  = cg->findOrCreate8ByteConstant(node, nanDetector);
         TR::MemoryReference      *nanDetectorMR       = generateX86MemoryReference(nanDetectorSnippet,  cg);

         TR::LabelSymbol *startLabel     = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *slowPathLabel  = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *normalizeLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *endLabel       = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         startLabel->setStartInternalControlFlow();
         endLabel  ->setEndInternalControlFlow();

         // Fast path: if subtracting nanDetector leaves CF=0 or OF=1, then it
         // must be a NaN.
         //
         generateLabelInstruction(  LABEL,       node, startLabel,           cg);
         generateRegMemInstruction( CMP8RegMem,  node, treg, nanDetectorMR,  cg);
         generateLabelInstruction(  JAE4,        node, slowPathLabel,        cg);
         generateLabelInstruction(  JO4,         node, slowPathLabel,        cg);

         // Slow path
         //
         TR_OutlinedInstructions *slowPath = new (cg->trHeapMemory()) TR_OutlinedInstructions(slowPathLabel, cg);
         cg->getOutlinedInstructionsList().push_front(slowPath);
         slowPath->swapInstructionListsWithCompilation();
         generateLabelInstruction(NULL, LABEL,       slowPathLabel,          cg)->setNode(node);
         generateRegImm64Instruction(MOV8RegImm64, node, treg, DOUBLE_NAN, cg);
         generateLabelInstruction(      JMP4,        node, endLabel,         cg);
         slowPath->swapInstructionListsWithCompilation();

         // Merge point
         //
         generateLabelInstruction(LABEL, node, endLabel, internalControlFlowDeps, cg);
         }
      }
   node->setRegister(treg);
   cg->decReferenceCount(child);
   return treg;
   }
Beispiel #8
0
static bool isMergeableGuard(TR::Node *node)
   {
   static char *mergeOnlyHCRGuards = feGetEnv("TR_MergeOnlyHCRGuards");
   return mergeOnlyHCRGuards ? node->isHCRGuard() : node->isNopableInlineGuard();
   }
Beispiel #9
0
// This opt tries to reduce merge backs from cold code that are the result of inliner
// gnerated nopable virtual guards
// It looks for one basic pattern
//
// guard1 -> cold1
// BBEND
// BBSTART
// guard2 -> cold2
// if guard1 is the guard for a method which calls the method guard2 protects or cold1 is
// a predecessor of cold2 (a situation commonly greated by virtual guard tail splitter) we
// can transform the guards as follows when guard1 and guard2 a
// guard1 -> cold1
// BBEND
// BBSTART
// guard2 -> cold1
// This is safe because there are no trees between the guards and calling the caller will
// result in the call to the callee if we need to patch guard2. cold2 and its mergebacks
// can then be eliminated
//
// In addition this opt will try to move guard2 up from the end of a block to the
// start of the block. We can do this if guard2 is an HCR guard and there is no GC point
// between BBSTART and guard2 since HCR is a stop-the-world event.
//
// Finally, there is a simple tail splitting step run before the analysis of a guard if we
// detect that the taken side of the guard merges back in the next block - this happens
// for some empty methods and is common for Object.<init> at the top of constructors.
int32_t TR_VirtualGuardHeadMerger::perform() {
   static char *disableVGHeadMergerTailSplitting = feGetEnv("TR_DisableVGHeadMergerTailSplitting");
   TR::CFG *cfg = comp()->getFlowGraph();

   // Cache the loads for the outer guard's cold path
   TR_BitVector coldPathLoads(comp()->trMemory()->currentStackRegion());
   TR_BitVector privArgSymRefs(comp()->trMemory()->currentStackRegion());
   bool evaluatedColdPathLoads = false;

   for (TR::Block *block = optimizer()->getMethodSymbol()->getFirstTreeTop()->getNode()->getBlock();
        block; block = block->getNextBlock())
      {
      TR::Node *guard1 = block->getLastRealTreeTop()->getNode();

      if (isMergeableGuard(guard1))
         {
         if (trace())
            traceMsg(comp(), "Found mergeable guard in block_%d\n", block->getNumber());
         TR::Block *cold1 = guard1->getBranchDestination()->getEnclosingBlock();

         // check for an immediate merge back from the cold block and
         // tail split one block if we can - we only handle splitting a block
         // ending in a fallthrough, a branch or a goto for now for simplicity
         if (!disableVGHeadMergerTailSplitting &&
             (cold1->getSuccessors().size() == 1) &&
             cold1->hasSuccessor(block->getNextBlock()) &&
             cold1->getLastRealTreeTop()->getNode()->getOpCode().isGoto())
            {
            // TODO handle moving code earlier in the block down below the guard
            // tail split
            if ((block->getNextBlock()->getSuccessors().size() == 1) ||
                ((block->getNextBlock()->getSuccessors().size() == 2) &&
                 block->getNextBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) &&
                performTransformation(comp(), "%sCloning block_%d and placing clone after block_%d to reduce HCR guard nops\n", OPT_DETAILS, block->getNextBlock()->getNumber(), cold1->getNumber()))
               tailSplitBlock(block, cold1);
            }

         // guard motion is fairly complex but what we want to achieve around guard1 is a sequence
         // of relocated privarg blocks, followed by a sequence of runtime patchable guards going to
         // guard1's cold block, followed by a sequence of stop-the-world guards going to guard1's
         // cold block
         //
         // The following code is to setup the various insert points based on the following diagrams
         // of basic blocks:
         //
         // start:               setup:                          end result after moving runtime guard'
         //                       |       |                        +-------+ <-- privargIns
         //                       |       | <-- privargIns             |
         //                       +-------+ <-- runtimeIns         +-------+
         //   |       |               |                            | Guard'|
         //   |       |               V                            +-------+ <-- runtimeIns
         //   +-------+           +-------+                            |
         //   | Guard |           | Guard |                            V
         //   +-------+           +-------+ <-- HCRIns             +-------+
         //       |        ===>       |                    ===>    | Guard |
         //       V                   V                            +-------+ <-- HCRIns
         //   +-------+           +-------+                            |
         //   |       |           |       |                            V
         //   |       |           |       |                        +-------+
         //
         // Note we always split the block - this may create an empty block but preserves the incoming
         // control flow we leave the rest to block extension to fix later

         block = block->split(block->getLastRealTreeTop(), cfg, true, false);
         TR::Block *privargIns = block->getPrevBlock();
         TR::Block *runtimeIns = block->getPrevBlock();
         TR::Block *HCRIns = block;

         // New outer guard so cold paths must be evaluated
         evaluatedColdPathLoads = false;

         // scan for candidate guards to merge with guard1 identified above
         for (TR::Block *nextBlock = block->getNextBlock(); nextBlock; nextBlock = nextBlock->getNextBlock())
            {
            if (!(nextBlock->getPredecessors().size() == 1) ||
                !nextBlock->hasPredecessor(block))
               {
               break;
               }

            TR::TreeTop *guard2Tree = NULL;
            if (isMergeableGuard(nextBlock->getFirstRealTreeTop()->getNode()))
               {
               guard2Tree = nextBlock->getFirstRealTreeTop();
               }
            else if (isMergeableGuard(nextBlock->getLastRealTreeTop()->getNode()))
               {
               guard2Tree = nextBlock->getLastRealTreeTop();
               }
            else
               break;

            TR::Node *guard2 = guard2Tree->getNode();
            TR::Block *guard2Block = nextBlock;

            // It is not possible to shift an OSR guard unless the destination is already an OSR point
            // as the necessary OSR state will not be available
            if (guard2->isOSRGuard() && !guard1->isOSRGuard())
               break;

            TR::Block *insertPoint = isStopTheWorldGuard(guard2) ? HCRIns : runtimeIns;
            if (!safeToMoveGuard(insertPoint, guard2Tree, guard1->getBranchDestination(), privArgSymRefs))
               break;

            // now we figure out if we can redirect guard2 to guard1's cold block
            // ie can we do the head merge
            TR::Block *cold2 = guard2->getBranchDestination()->getEnclosingBlock();
            if (guard1->getInlinedSiteIndex() == guard2->getInlinedSiteIndex())
               {
               if (trace())
                  traceMsg(comp(), "  Guard1 [%p] is guarding the same call as Guard2 [%p] - proceeding with guard merging\n", guard1, guard2);
               }
            else if (guard2->getInlinedSiteIndex() > -1 &&
                guard1->getInlinedSiteIndex() == TR::comp()->getInlinedCallSite(guard2->getInlinedSiteIndex())._byteCodeInfo.getCallerIndex())
               {
               if (trace())
                  traceMsg(comp(), "  Guard1 [%p] is the caller of Guard2 [%p] - proceeding with guard merging\n", guard1, guard2);
               }
            else if ((cold1->getSuccessors().size() == 1) &&
                     cold1->hasSuccessor(cold2))
               {
               if (trace())
                  traceMsg(comp(), "  Guard1 cold destination block_%d has guard2 cold destination block_%d as its only successor - proceeding with guard merging\n", cold1->getNumber(), cold2->getNumber());
               }
            else
               {
               if (trace())
                  traceMsg(comp(), "  Cold1 block_%d and cold2 block_%d of guard2 [%p] in unknown relationship - abandon the merge attempt\n", cold1->getNumber(), cold2->getNumber(), guard2);
               break;
               }

            // Runtime guards will shift their privargs, so it is necessary to check such a move is safe
            // This is possible if a privarg temp was recycled for the inner call site, with a prior use as an
            // argument for the outer call site. As the privargs for the inner call site must be evaluated before
            // both guards, this would result in the recycled temp holding the incorrect value if the guard is ever
            // taken.
            if (!isStopTheWorldGuard(guard2))
               {
               if (!evaluatedColdPathLoads)
                  {
                  collectColdPathLoads(cold1, coldPathLoads);
                  evaluatedColdPathLoads = true;
                  }

               if (coldPathLoads.intersects(privArgSymRefs))
                  {
                  if (trace())
                     traceMsg(comp(), "  Recycled temp live in cold1 block_%d and used as privarg before guard2 [%p] - stop guard merging", cold1->getNumber(), guard2);
                  break;
                  }
               }

            if (!performTransformation(comp(), "%sRedirecting %s guard [%p] in block_%d to parent guard cold block_%d\n", OPT_DETAILS, isStopTheWorldGuard(guard2) ? "stop the world" : "runtime", guard2, guard2Block->getNumber(), cold1->getNumber()))
                  continue;

            if (guard2->getBranchDestination() != guard1->getBranchDestination())
               guard2Block->changeBranchDestination(guard1->getBranchDestination(), cfg);

            if (guard2Tree != guard2Block->getFirstRealTreeTop())
               {
               cfg->setStructure(NULL);

               // We should leave code ahead of an HCR guard in place because:
               // 1, it might have side effect to runtime guards after it, moving it up might cause us to falsely merge
               //    the subsequent runtime guards
               // 2, it might contain live monitor, moving it up above a guard can affect the monitor's live range
               if (!isStopTheWorldGuard(guard2))
                  {
	          // the block created above guard2 contains only privarg treetops or monitor stores if
                  // guard2 is a runtime-patchable guard and is safe to merge. We need to move the priv
                  // args up to the runtime insert point and leave the monitor stores in place
                  // It's safe to do so because there is no data dependency between the monitor store and
                  // the priv arg store, because the priv arg store does not load the value from the temp
                  // holding the monitored object

                  // Split priv arg stores from monitor stores
                  // Monitor store is generated for the caller of the method guard2 protects, so should appear before
                  // priv arg stores for the method guard2 protects
                  TR::Block *privargBlock = guard2Block;
                  guard2Block = splitRuntimeGuardBlock(comp(), guard2Block, cfg);
                  if (privargBlock != guard2Block)
                     {
                     if (trace())
                        traceMsg(comp(), "  Moving privarg block_%d after block_%d\n", privargBlock->getNumber(), privargIns->getNumber());

                     moveBlockAfterDest(cfg, privargBlock, privargIns);

                     if (HCRIns == privargIns)
                        HCRIns = privargBlock;
                     if (runtimeIns == privargIns)
                        runtimeIns = privargBlock;
                     privargIns = privargBlock;

                     // refresh the insertPoint since it could be stale after the above updates
                     insertPoint = runtimeIns;
                     }
                  }

               guard2Block = guard2Block->split(guard2Tree, cfg, true, false);
               if (trace())
                  traceMsg(comp(), "  Created new block_%d to hold guard [%p] from block_%d\n", guard2Block->getNumber(), guard2, guard2Block->getNumber());
               }

            if (insertPoint != guard2Block->getPrevBlock())
               {
               TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "headMerger/%s_%s/(%s)", isStopTheWorldGuard(guard1) ? "stop the world" : "runtime", isStopTheWorldGuard(guard2) ? "stop the world" : "runtime", comp()->signature()));
               cfg->setStructure(NULL);

               block = nextBlock = guard2Block->getPrevBlock();
               if (trace())
                  traceMsg(comp(), "  Moving guard2 block block_%d after block_%d\n", guard2Block->getNumber(), insertPoint->getNumber());

               moveBlockAfterDest(cfg, guard2Block, insertPoint);

               if (HCRIns == insertPoint)
                  HCRIns = guard2Block;
               if (runtimeIns == insertPoint)
                  runtimeIns = guard2Block;
               }
            else
               {
               block = guard2Block;
               }
            guard1 = guard2;
            }
         }
      }
   return 1;
}
Beispiel #10
0
OMR::X86::AMD64::CodeGenerator::CodeGenerator() :
   OMR::X86::CodeGenerator()
   {

   if (self()->comp()->getOption(TR_DisableTraps))
      {
      _numberBytesReadInaccessible  = 0;
      _numberBytesWriteInaccessible = 0;
      }
   else
      {
      _numberBytesReadInaccessible  = 4096;
      _numberBytesWriteInaccessible = 4096;
      self()->setHasResumableTrapHandler();
      self()->setEnableImplicitDivideCheck();
      }

   self()->setSupportsDivCheck();

   static char *c = feGetEnv("TR_disableAMD64ValueProfiling");
   if (c)
      self()->comp()->setOption(TR_DisableValueProfiling);

   static char *accessStaticsIndirectly = feGetEnv("TR_AccessStaticsIndirectly");
   if (accessStaticsIndirectly)
      self()->setAccessStaticsIndirectly(true);

   self()->setSupportsDoubleWordCAS();
   self()->setSupportsDoubleWordSet();

   self()->setSupportsGlRegDepOnFirstBlock();
   self()->setConsiderAllAutosAsTacticalGlobalRegisterCandidates();

   // Interpreter frame shape requires all autos to occupy an 8-byte slot on 64-bit.
   //
   if (self()->comp()->getOption(TR_MimicInterpreterFrameShape))
      self()->setMapAutosTo8ByteSlots();

   // Common X86 initialization
   //
   self()->initialize(self()->comp());

   self()->initLinkageToGlobalRegisterMap();

   self()->setRealVMThreadRegister(self()->machine()->getRealRegister(TR::RealRegister::ebp));

   // GRA-related initialization is done after calling initialize() so we can
   // use such things as getNumberOfGlobal[FG]PRs().

   _globalGPRsPreservedAcrossCalls.init(self()->getNumberOfGlobalRegisters(), self()->trMemory());
   _globalFPRsPreservedAcrossCalls.init(self()->getNumberOfGlobalRegisters(), self()->trMemory());

   int16_t i;
   TR_GlobalRegisterNumber grn;
   for (i=0; i < self()->getNumberOfGlobalGPRs(); i++)
      {
      grn = self()->getFirstGlobalGPR() + i;
      if (self()->getProperties().isPreservedRegister((TR::RealRegister::RegNum)self()->getGlobalRegister(grn)))
         _globalGPRsPreservedAcrossCalls.set(grn);
      }
   for (i=0; i < self()->getNumberOfGlobalFPRs(); i++)
      {
      grn = self()->getFirstGlobalFPR() + i;
      if (self()->getProperties().isPreservedRegister((TR::RealRegister::RegNum)self()->getGlobalRegister(grn)))
         _globalFPRsPreservedAcrossCalls.set(grn);
      }

   // Reduce the maxObjectSizeGuaranteedNotToOverflow value on 64-bit such that the
   // runtime comparison does not sign extend (saves an instruction on array allocations).
   // INT_MAX should be sufficiently large.
   //
   if ((uint32_t)_maxObjectSizeGuaranteedNotToOverflow > (uint32_t)INT_MAX)
      {
      _maxObjectSizeGuaranteedNotToOverflow = (uint32_t)INT_MAX;
      }
   }
Beispiel #11
0
OMR::X86::I386::CodeGenerator::CodeGenerator() :
   OMR::X86::CodeGenerator()
   {
   // Common X86 initialization
   //
   self()->initialize( self()->comp() );

   self()->setUsesRegisterPairsForLongs();

   if (debug("supportsArrayTranslateAndTest"))
      self()->setSupportsArrayTranslateAndTest();
   if (debug("supportsArrayCmp"))
      self()->setSupportsArrayCmp();

   self()->setSupportsDoubleWordCAS();
   self()->setSupportsDoubleWordSet();

   if (TR::Compiler->target.isWindows())
      {
      if (self()->comp()->getOption(TR_DisableTraps))
         {
         _numberBytesReadInaccessible = 0;
         _numberBytesWriteInaccessible = 0;
         }
      else
         {
         _numberBytesReadInaccessible = 4096;
         _numberBytesWriteInaccessible = 4096;
         self()->setHasResumableTrapHandler();
         self()->setEnableImplicitDivideCheck();
         }
      self()->setSupportsDivCheck();
      self()->setJNILinkageCalleeCleanup();
      self()->setRealVMThreadRegister(self()->machine()->getX86RealRegister(TR::RealRegister::ebp));
      }
   else if (TR::Compiler->target.isLinux())
      {
      if (self()->comp()->getOption(TR_DisableTraps))
         {
         _numberBytesReadInaccessible = 0;
         _numberBytesWriteInaccessible = 0;
         }
      else
         {
         _numberBytesReadInaccessible = 4096;
         _numberBytesWriteInaccessible = 4096;
         self()->setHasResumableTrapHandler();
         self()->setEnableImplicitDivideCheck();
         }
      self()->setRealVMThreadRegister(self()->machine()->getX86RealRegister(TR::RealRegister::ebp));
      self()->setSupportsDivCheck();
      }
   else
      {
      TR_ASSERT(0, "unknown target");
      }

   self()->setSupportsInlinedAtomicLongVolatiles();

   static char *dontConsiderAllAutosForGRA = feGetEnv("TR_dontConsiderAllAutosForGRA");
   if (!dontConsiderAllAutosForGRA)
      self()->setConsiderAllAutosAsTacticalGlobalRegisterCandidates();

   }
Beispiel #12
0
int32_t
OMR::X86::I386::CodeGenerator::getMaximumNumberOfGPRsAllowedAcrossEdge(TR::Node *node)
   {
   // TODO: Currently, lookupEvaluator doesn't deal properly with different
   // glRegDeps on different cases of a lookupswitch.
   //
   static const char *enableLookupswitch = feGetEnv("TR_enableGRAAcrossLookupSwitch");
   if (!enableLookupswitch && node->getOpCode().getOpCodeValue()==TR::lookup)
      return 1;

   if (node->getOpCode().getOpCodeValue()==TR::table)
      {
      // 1 for jump table base reg, which is not apparent in the trees
      // 1 for ebp when it is needed for the VMThread
      //
      return self()->getNumberOfGlobalGPRs() - 2;
      }

   if (node->getOpCode().isIf())
      {
      // we run out of all but one/two registers in these cases
      //
      if (node->getFirstChild()->getType().isInt64())
         {
         if (node->getOpCode().isBranch())
            {
            TR::Node *firstChild = node->getFirstChild();
            TR::Node *secondChild = node->getSecondChild();
            int extraRegsAvailable = 0;


            if(firstChild->getOpCodeValue() == TR::d2l ||
               secondChild->getOpCodeValue() == TR::d2l)
               {
               return 1;
               }

            if ((firstChild->getReferenceCount() == 1 &&
                firstChild->getOpCode().isLoadVarDirect()) ||
                (secondChild->getReferenceCount() == 1 &&
                 firstChild->getOpCode().isLoadVarDirect()))
               extraRegsAvailable += 0; // TODO: put it back to 2 when looking at GRA, GRA pushes allocation of 8 registers

            return 2 + extraRegsAvailable;
            }
         else
            {
            // TR_lcmpXX opcodes take up 5 regs
            //
            return 1;
            }
         }

      // we run out of all but one register in these cases....last time I tried....
      //

      if (node->getFirstChild()->getOpCodeValue() == TR::instanceof)
         {
         if (!TR::TreeEvaluator::instanceOfOrCheckCastNeedSuperTest(node->getFirstChild(), self())  &&
             TR::TreeEvaluator::instanceOfOrCheckCastNeedEqualityTest(node->getFirstChild(), self()))
            return self()->getNumberOfGlobalGPRs() - 4; // ebp plus three other regs if vft masking is enabled
         else
            return 0;
         }

      // All other conditional branches, we usually need one reg for the compare and possibly one for the vmthread
      //return getNumberOfGlobalGPRs() - 1 - (node->isVMThreadRequired()? 1 : 0);
      // vmThread required might be set on a node after GRA has ran
      return self()->getNumberOfGlobalGPRs() - 2;
      }

   return INT_MAX;
   }
Beispiel #13
0
TR_GlobalRegisterNumber
OMR::X86::I386::CodeGenerator::pickRegister(
      TR_RegisterCandidate *rc,
      TR::Block **allBlocks,
      TR_BitVector &availableRegisters,
      TR_GlobalRegisterNumber &highRegisterNumber,
      TR_LinkHead<TR_RegisterCandidate> *candidates)
   {
   if (!self()->comp()->getOption(TR_DisableRegisterPressureSimulation))
      {
      if (self()->comp()->getOption(TR_AssignEveryGlobalRegister))
         {
         // This is not really necessary except for testing purposes.
         // Conceptually, the common pickRegister code should be free to make
         // its choices based only on performance considerations, and shouldn't
         // need to worry about correctness.  When SupportsVMThreadGRA is not set,
         // it is incorrect to choose the VMThread register.  Therefore we mask
         // it out here.
         //
         // Having said that, the common code *does* already mask out the
         // VMThread register for convenience, so under normal circumstances,
         // this code is redundant.  It is only necessary when
         // TR_AssignEveryGlobalRegister is set.
         //
         availableRegisters -= *self()->getGlobalRegisters(TR_vmThreadSpill, self()->comp()->getMethodSymbol()->getLinkageConvention());
         }
      return OMR::CodeGenerator::pickRegister(rc, allBlocks, availableRegisters, highRegisterNumber, candidates);
      }

   if ((rc->getSymbol()->getDataType() == TR::Float) ||
       (rc->getSymbol()->getDataType() == TR::Double))
      {
      if (availableRegisters.get(7))
         return 7;
      if (availableRegisters.get(8))
         return 8;
      if (availableRegisters.get(9))
         return 9;
      if (availableRegisters.get(10))
         return 10;
      if (availableRegisters.get(11))
         return 11;
      if (availableRegisters.get(12))
         return 12;

      return -1;
      }


   if (!_assignedGlobalRegisters)
      _assignedGlobalRegisters = new (self()->trStackMemory()) TR_BitVector(self()->comp()->getSymRefCount(), self()->trMemory(), stackAlloc, growable);

   if (availableRegisters.get(5))
      return 5; // esi

   if (availableRegisters.get(2))
      return 2; // ecx

   static char *dontUseEBXasGPR = feGetEnv("dontUseEBXasGPR");
   if (!dontUseEBXasGPR && availableRegisters.get(1))
      return 1;

#ifdef J9_PROJECT_SPECIFIC
   TR::RecognizedMethod rm = self()->comp()->getMethodSymbol()->getRecognizedMethod();
   if (rm == TR::java_util_HashtableHashEnumerator_hasMoreElements)
      {
      if (availableRegisters.get(4))
         return 4; // edi
      if (availableRegisters.get(3))
         return 3; // edx
      }
   else
#endif
      {
      int32_t numExtraRegs = 0;
      int32_t maxRegisterPressure = 0;

      vcount_t visitCount = self()->comp()->incVisitCount();
      TR_BitVectorIterator bvi(rc->getBlocksLiveOnEntry());
      int32_t maxFrequency = 0;
      while (bvi.hasMoreElements())
         {
         int32_t liveBlockNum = bvi.getNextElement();
         TR::Block *block = allBlocks[liveBlockNum];
         if (block->getFrequency() > maxFrequency)
             maxFrequency = block->getFrequency();
         }

      int32_t maxStaticFrequency = 0;
      if (maxFrequency == 0)
         {
         bvi.setBitVector(rc->getBlocksLiveOnEntry());
         while (bvi.hasMoreElements())
            {
            int32_t liveBlockNum = bvi.getNextElement();
            TR::Block *block = allBlocks[liveBlockNum];
            TR_BlockStructure *blockStructure = block->getStructureOf();
            int32_t blockWeight = 1;
            if (blockStructure &&
                !block->isCold())
               {
               blockStructure->calculateFrequencyOfExecution(&blockWeight);
               if (blockWeight > maxStaticFrequency)
                  maxStaticFrequency = blockWeight;
               }
            }
         }

      bool assigningEDX = false;
      if (!availableRegisters.get(4) &&
          availableRegisters.get(3))
         assigningEDX = true;

      bool vmThreadUsed = false;

      bvi.setBitVector(rc->getBlocksLiveOnEntry());
      while (bvi.hasMoreElements())
         {
         int32_t liveBlockNum = bvi.getNextElement();
         TR::Block *block = allBlocks[liveBlockNum];

         _assignedGlobalRegisters->empty();
         int32_t numAssignedGlobalRegs = 0;
         TR_RegisterCandidate *prev;
         for (prev = candidates->getFirst(); prev; prev = prev->getNext())
            {
            bool gprCandidate = true;
            if ((prev->getSymbol()->getDataType() == TR::Float) ||
                (prev->getSymbol()->getDataType() == TR::Double))
               gprCandidate = false;
            if (gprCandidate && prev->getBlocksLiveOnEntry().get(liveBlockNum))
               {
               numAssignedGlobalRegs++;
               if (prev->getDataType() == TR::Int64)
                  numAssignedGlobalRegs++;
               _assignedGlobalRegisters->set(prev->getSymbolReference()->getReferenceNumber());
               }
            }

         maxRegisterPressure = self()->estimateRegisterPressure(block, visitCount, maxStaticFrequency, maxFrequency, vmThreadUsed, numAssignedGlobalRegs, _assignedGlobalRegisters, rc->getSymbolReference(), assigningEDX);

         if (maxRegisterPressure >= self()->getMaximumNumbersOfAssignableGPRs())
            break;
         }

      // Determine if we can spare any extra registers for this candidate without spilling
      // in any hot (critical) blocks
      //
      if (maxRegisterPressure < self()->getMaximumNumbersOfAssignableGPRs())
         numExtraRegs = self()->getMaximumNumbersOfAssignableGPRs() - maxRegisterPressure;

      //dumpOptDetails("For global register candidate %d reg pressure is %d maxRegs %d numExtraRegs %d\n", rc->getSymbolReference()->getReferenceNumber(), maxRegisterPressure, comp()->cg()->getMaximumNumbersOfAssignableGPRs(), numExtraRegs);

      if (numExtraRegs > 0)
         {
         if (availableRegisters.get(4))
            return 4; // edi

         if (availableRegisters.get(3))
            return 3; // edx
         }
      }

   return -1; // -1 ==> don't use a global register
   }