Exemple #1
0
static TR::Register *l2fd(TR::Node *node, TR::RealRegister *target, TR_X86OpCodes opRegMem8, TR_X86OpCodes opRegReg8, TR::CodeGenerator *cg)
   {
   TR::Node                *child = node->getFirstChild();
   TR::MemoryReference  *tempMR;

   TR_ASSERT(cg->useSSEForSinglePrecision(), "assertion failure");

   if (child->getRegister() == NULL &&
       child->getReferenceCount() == 1 &&
       child->getOpCode().isLoadVar())
      {
      tempMR = generateX86MemoryReference(child, cg);
      generateRegMemInstruction(opRegMem8, node, target, tempMR, cg);
      tempMR->decNodeReferenceCounts(cg);
      }
   else
      {
      TR::Register *intReg = cg->evaluate(child);
      generateRegRegInstruction(opRegReg8, node, target, intReg, cg);
      cg->decReferenceCount(child);
      }

   node->setRegister(target);
   return target;
   }
Exemple #2
0
TR::Register* OMR::X86::TreeEvaluator::SIMDsplatsEvaluator(TR::Node* node, TR::CodeGenerator* cg)
   {
   TR::Node* childNode = node->getChild(0);
   TR::Register* childReg = cg->evaluate(childNode);

   TR::Register* resultReg = cg->allocateRegister(TR_VRF);
   switch (node->getDataType())
      {
      case TR::VectorInt32:
         generateRegRegInstruction(MOVDRegReg4, node, resultReg, childReg, cg);
         generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, resultReg, 0x00, cg); // 00 00 00 00 shuffle xxxA to AAAA
         break;
      case TR::VectorInt64:
         if (TR::Compiler->target.is32Bit())
            {
            TR::Register* tempVectorReg = cg->allocateRegister(TR_VRF);
            generateRegRegInstruction(MOVDRegReg4, node, tempVectorReg, childReg->getHighOrder(), cg);
            generateRegImmInstruction(PSLLQRegImm1, node, tempVectorReg, 0x20, cg);
            generateRegRegInstruction(MOVDRegReg4, node, resultReg, childReg->getLowOrder(), cg);
            generateRegRegInstruction(PORRegReg, node, resultReg, tempVectorReg, cg);
            cg->stopUsingRegister(tempVectorReg);
            }
         else
            {
            generateRegRegInstruction(MOVQRegReg8, node, resultReg, childReg, cg);
            }
         generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, resultReg, 0x44, cg); // 01 00 01 00 shuffle xxBA to BABA
         break;
      case TR::VectorFloat:
         generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, childReg, 0x00, cg); // 00 00 00 00 shuffle xxxA to AAAA
         break;
      case TR::VectorDouble:
         generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, childReg, 0x44, cg); // 01 00 01 00 shuffle xxBA to BABA
         break;
      default:
         if (cg->comp()->getOption(TR_TraceCG))
            traceMsg(cg->comp(), "Unsupported data type, Node = %p\n", node);
         TR_ASSERT(false, "Unsupported data type");
         break;
      }

   node->setRegister(resultReg);
   cg->decReferenceCount(childNode);
   return resultReg;
   }
Exemple #3
0
TR::Register *OMR::X86::AMD64::TreeEvaluator::lbits2dEvaluator(TR::Node *node, TR::CodeGenerator *cg)
   {
   // TODO:AMD64: Peepholing
   TR::Node      *child  = node->getFirstChild();
   TR::Register  *sreg   = cg->evaluate(child);
   TR::Register  *treg   = cg->allocateRegister(TR_FPR);
   generateRegRegInstruction(MOVQRegReg8, node, treg, sreg, cg);
   node->setRegister(treg);
   cg->decReferenceCount(child);
   return treg;
   }
Exemple #4
0
TR::Register *
OMR::X86::I386::CodeGenerator::longClobberEvaluate(TR::Node *node)
   {
   TR_ASSERT(node->getOpCode().is8Byte(), "assertion failure");
   if (node->getReferenceCount() > 1)
      {
      TR::Register     *temp    = self()->evaluate(node);
      TR::Register     *lowReg  = self()->allocateRegister();
      TR::Register     *highReg = self()->allocateRegister();
      TR::RegisterPair *longReg = self()->allocateRegisterPair(lowReg, highReg);

      generateRegRegInstruction(MOV4RegReg, node, lowReg, temp->getLowOrder(), self());
      generateRegRegInstruction(MOV4RegReg, node, highReg, temp->getHighOrder(), self());
      return longReg;
      }
   else
      {
      return self()->evaluate(node);
      }
   }
Exemple #5
0
TR::Register *OMR::X86::AMD64::TreeEvaluator::lcmpEvaluator(TR::Node *node, TR::CodeGenerator *cg)
   {
   TR::Node *firstChild  = node->getFirstChild();
   TR::Node *secondChild = node->getSecondChild();

   TR::Register *leftRegister  = cg->evaluate(firstChild);
   TR::Register *rightRegister = cg->evaluate(secondChild);

   // Compare left and right operands, all finished with the operands after this.
   generateRegRegInstruction(CMP8RegReg, node, leftRegister, rightRegister, cg);
   cg->decReferenceCount(firstChild);
   cg->decReferenceCount(secondChild);

   TR::Register *isLessThanReg = cg->allocateRegister();
   TR::Register *isNotEqualReg = cg->allocateRegister();
   cg->getLiveRegisters(TR_GPR)->setByteRegisterAssociation(isLessThanReg);
   cg->getLiveRegisters(TR_GPR)->setByteRegisterAssociation(isNotEqualReg);

   // The state of things in each possible case after each instruction:
   //                       left < right    left = right    left > right
   // Processor flags:      NE=1 LT=1       NE=0  LT=0      NE=1 LT=0
   generateRegInstruction(SETL1Reg, node, isLessThanReg, cg);
   // isLessThanReg:        00000001        00000000        00000000
   generateRegInstruction(SETNE1Reg, node, isNotEqualReg, cg);
   // isNotEqualReg:        00000001        00000000        00000001
   generateRegInstruction(NEG1Reg, node, isLessThanReg, cg);
   // isLessThanReg:        11111111        00000000        00000000
   generateRegRegInstruction(OR1RegReg, node, isNotEqualReg, isLessThanReg, cg);
   // isNotEqualReg:        11111111        00000000        00000001

   generateRegRegInstruction(MOVSXReg4Reg1, node, isNotEqualReg, isNotEqualReg, cg);

   node->setRegister(isNotEqualReg);
   cg->stopUsingRegister(isLessThanReg);

   return isNotEqualReg;
   }
Exemple #6
0
TR::Register *OMR::X86::AMD64::TreeEvaluator::l2iEvaluator(TR::Node *node, TR::CodeGenerator *cg)
   {
   TR::Node     *child = node->getFirstChild();
   TR::Register *reg   = cg->evaluate(child);
   if (child->getReferenceCount() > 1)
      {
      // This catches two scenarios:
      //
      // 1) A longClobberEvaluate (or any other register-clobbering logic) on
      // the l2i node could see a refcount of 1, and hence won't make a copy.
      // If child's refcount is more than 1, we do in fact need a copy, so we'd
      // better do it here.
      //
      // 2) If the child is commoned, and the l2i node is also commoned, then
      // we may end up with a situation where the last evaluation of the child
      // is a clobberEvaluate.  By that time, the child's refcount would be 1,
      // so no copy is made, and the register would be clobbered.  Therefore,
      // the l2i node can't return that same register, or else the other uses
      // of the node will end up getting the clobbered value.
      //
      // Note that case 2 is conservative, in that it presumes that the child's
      // register will be clobbered by another node.  If this does not occur,
      // then the copy we're about to make is unnecessary.
      //
      TR::Register *childReg = reg;
      reg = cg->allocateRegister();
      // to support signExtension in GRA, need to preserve upper word
      // in this move
      generateRegRegInstruction(MOV8RegReg, node, reg, childReg, cg);
      }

   node->setRegister(reg);
   cg->decReferenceCount(child);

   if (cg->enableRegisterInterferences() && node->getOpCode().getSize() == 1)
      cg->getLiveRegisters(TR_GPR)->setByteRegisterAssociation(node->getRegister());

   return reg;
   }
Exemple #7
0
/*
 * users should call the integerSubtractAnalyser or integerSubtractAnalyserWithExplicitOperands APIs instead of calling this one directly 
 */
TR::Register* TR_X86SubtractAnalyser::integerSubtractAnalyserImpl(TR::Node     *root,
                                                                  TR::Node     *firstChild,
                                                                  TR::Node     *secondChild,
                                                                  TR_X86OpCodes regRegOpCode,
                                                                  TR_X86OpCodes regMemOpCode,
                                                                  TR_X86OpCodes copyOpCode,
                                                                  bool needsEflags, 
                                                                  TR::Node *borrow)  
   {
   TR::Register *targetRegister = NULL;
   TR::Register *firstRegister = firstChild->getRegister();
   TR::Register *secondRegister = secondChild->getRegister();
   setInputs(firstChild, firstRegister, secondChild, secondRegister);

   bool loadedConst = false;

   needsEflags = needsEflags || NEED_CC(root);

   if (getEvalChild1())
      {
      // if firstChild and secondChild are the same node, then we should
      // evaluate (take the else path) so that the evaluate for the secondChild
      // below will get the correct/already-allocated register.
      if (firstRegister == 0 && firstChild->getOpCodeValue() == TR::iconst && (firstChild != secondChild))
         {
	   // An iconst may have to be generated.  The iconst will be generated after the
	   //    secondChild is evaluated.  Set the loadedConst flag to true.
	   loadedConst = true;
         }
      else
         {
         firstRegister = _cg->evaluate(firstChild);
         }
      }

   if (getEvalChild2())
      {
      secondRegister = _cg->evaluate(secondChild);
      if (firstChild->getRegister())
         {
         firstRegister = firstChild->getRegister();
         }
      else if (!loadedConst)
         {
         firstRegister = _cg->evaluate(firstChild);
         }
      }

   if (loadedConst)
      {
      if (firstRegister == 0)
         {
         // firstchild is an inconst and it has not been evaluated.
         //   Generate the code for an iconst.
         firstRegister = _cg->allocateRegister();
         TR::TreeEvaluator::insertLoadConstant(firstChild, firstRegister, firstChild->getInt(), TR_RematerializableInt, _cg);
         }
      else
         {
         // firstchild was evaluated.  The code for an iconst does not need to be generated.
         //   Set the loadConst flag to false.
         loadedConst = false;
         }
      }

   if (borrow != 0)
      TR_X86ComputeCC::setCarryBorrow(borrow, true, _cg);

   if (getCopyReg1())
      {
      if (firstChild->getReferenceCount() > 1)
         {
         TR::Register *thirdReg;
         if (firstChild->getOpCodeValue() == TR::iconst && loadedConst)
            {
            thirdReg = firstRegister;
            }
         else
            {
            if (secondChild->getReferenceCount() == 1 && secondRegister != 0 && !needsEflags && (borrow == 0))
               {
               // use one fewer registers if we negate the clobberable secondRegister and add
               // Don't do this though if condition codes are needed.  The sequence
               // depends on the carry flag being valid as if a sub was done.
               //
               bool nodeIs64Bit = TR_X86OpCode(regRegOpCode).hasLongSource();
               generateRegInstruction(NEGReg(nodeIs64Bit), secondChild, secondRegister, _cg);
               thirdReg       = secondRegister;
               secondRegister = firstRegister;
               regRegOpCode   = ADDRegReg(nodeIs64Bit);
               }
            else
               {
               thirdReg = _cg->allocateRegister();
               generateRegRegInstruction(copyOpCode, root, thirdReg, firstRegister, _cg);
               }
            }
         targetRegister = thirdReg;
         if (getSubReg3Reg2())
            {
            generateRegRegInstruction(regRegOpCode, root, thirdReg, secondRegister, _cg);
            }
         else // assert getSubReg3Mem2() == true
            {
            TR::MemoryReference  *tempMR = generateX86MemoryReference(secondChild, _cg);
            generateRegMemInstruction(regMemOpCode, root, thirdReg, tempMR, _cg);
            tempMR->decNodeReferenceCounts(_cg);
            }
         }
      else
         {
         if (getSubReg3Reg2())
            {
            generateRegRegInstruction(regRegOpCode, root, firstRegister, secondRegister, _cg);
            }
         else // assert getSubReg3Mem2() == true
            {
            TR::MemoryReference  *tempMR = generateX86MemoryReference(secondChild, _cg);
            generateRegMemInstruction(regMemOpCode, root, firstRegister, tempMR, _cg);
            tempMR->decNodeReferenceCounts(_cg);
            }
         targetRegister = firstRegister;
         }
      }
   else if (getSubReg1Reg2())
      {
      generateRegRegInstruction(regRegOpCode, root, firstRegister, secondRegister, _cg);
      targetRegister = firstRegister;
      }
   else // assert getSubReg1Mem2() == true
      {
      TR::MemoryReference  *tempMR = generateX86MemoryReference(secondChild, _cg);
      generateRegMemInstruction(regMemOpCode, root, firstRegister, tempMR, _cg);
      targetRegister = firstRegister;
      tempMR->decNodeReferenceCounts(_cg);
      }
   return targetRegister;
   }
Exemple #8
0
/*
 * users should call the longSubtractAnalyser or longSubtractAnalyserWithExplicitOperands APIs instead of calling this one directly 
 */
TR::Register* TR_X86SubtractAnalyser::longSubtractAnalyserImpl(TR::Node *root, TR::Node *&firstChild, TR::Node *&secondChild)
   {
   TR::Register *firstRegister  = firstChild->getRegister();
   TR::Register *secondRegister = secondChild->getRegister();
   TR::Register *targetRegister = NULL;

   bool firstHighZero      = false;
   bool secondHighZero     = false; bool useSecondHighOrder = false;

   TR_X86OpCodes regRegOpCode = SUB4RegReg;
   TR_X86OpCodes regMemOpCode = SUB4RegMem;

   bool needsEflags = NEED_CC(root) || (root->getOpCodeValue() == TR::lusubb);

   // Can generate better code for long adds when one or more children have a high order zero word
   // can avoid the evaluation when we don't need the result of such nodes for another parent.
   //
   if (firstChild->isHighWordZero() && !needsEflags)
      {
      firstHighZero = true;
      }

   if (secondChild->isHighWordZero() && !needsEflags)
      {
      secondHighZero = true;
      TR::ILOpCodes secondOp = secondChild->getOpCodeValue();
      if (secondChild->getReferenceCount() == 1 && secondRegister == 0)
         {
         if (secondOp == TR::iu2l || secondOp == TR::su2l ||
             secondOp == TR::bu2l ||
             (secondOp == TR::lushr &&
              secondChild->getSecondChild()->getOpCodeValue() == TR::iconst &&
              (secondChild->getSecondChild()->getInt() & TR::TreeEvaluator::shiftMask(true)) == 32))
            {
            secondChild    = secondChild->getFirstChild();
            secondRegister = secondChild->getRegister();
            if (secondOp == TR::lushr)
               {
               useSecondHighOrder = true;
               }
            }
         }
      }

   setInputs(firstChild, firstRegister, secondChild, secondRegister);

   if (isVolatileMemoryOperand(firstChild))
      resetMem1();

   if (isVolatileMemoryOperand(secondChild))
      resetMem2();

   if (getEvalChild1())
      {
      firstRegister = _cg->evaluate(firstChild);
      }

   if (getEvalChild2())
      {
      secondRegister = _cg->evaluate(secondChild);
      }

   if (secondHighZero && secondRegister && secondRegister->getRegisterPair())
      {
      if (!useSecondHighOrder)
         {
         secondRegister = secondRegister->getLowOrder();
         }
      else
         {
         secondRegister = secondRegister->getHighOrder();
         }
      }

   if (root->getOpCodeValue() == TR::lusubb &&
       TR_X86ComputeCC::setCarryBorrow(root->getChild(2), true, _cg))
      {
      // use SBB rather than SUB
      //
      regRegOpCode = SBB4RegReg;
      regMemOpCode = SBB4RegMem;
      }

   if (getCopyReg1())
      {
      TR::Register     *lowThird  = _cg->allocateRegister();
      TR::Register     *highThird = _cg->allocateRegister();
      TR::RegisterPair *thirdReg  = _cg->allocateRegisterPair(lowThird, highThird);
      targetRegister = thirdReg;
      generateRegRegInstruction(MOV4RegReg, root, lowThird, firstRegister->getLowOrder(), _cg);

      if (firstHighZero)
         {
         generateRegRegInstruction(XOR4RegReg, root, highThird, highThird, _cg);
         }
      else
         {
         generateRegRegInstruction(MOV4RegReg, root, highThird, firstRegister->getHighOrder(), _cg);
         }

      if (getSubReg3Reg2())
         {
         if (secondHighZero)
            {
            generateRegRegInstruction(regRegOpCode, root, lowThird, secondRegister, _cg);
            generateRegImmInstruction(SBB4RegImms, root, highThird, 0, _cg);
            }
         else
            {
            generateRegRegInstruction(regRegOpCode, root, lowThird, secondRegister->getLowOrder(), _cg);
            generateRegRegInstruction(SBB4RegReg, root, highThird, secondRegister->getHighOrder(), _cg);
            }
         }
      else // assert getSubReg3Mem2() == true
         {
         TR::MemoryReference  *lowMR = generateX86MemoryReference(secondChild, _cg);
         /**
          * The below code is needed to ensure correct behaviour when the subtract analyser encounters a lushr bytecode that shifts
          * by 32 bits. This is the only case where the useSecondHighOrder bit is set.
          * When the first child of the lushr is in a register, code above handles the shift. When the first child of the lushr is in
          * memory, the below ensures that the upper part of the first child of the lushr is used as lowMR.
          */
         if (useSecondHighOrder)
            {
            TR_ASSERT(secondHighZero, "useSecondHighOrder should be consistent with secondHighZero. useSecondHighOrder subsumes secondHighZero");
        	   lowMR = generateX86MemoryReference(*lowMR, 4, _cg);
            }

         generateRegMemInstruction(regMemOpCode, root, lowThird, lowMR, _cg);
         if (secondHighZero)
            {
            generateRegImmInstruction(SBB4RegImms, root, highThird, 0, _cg);
            }
         else
            {
            TR::MemoryReference  *highMR = generateX86MemoryReference(*lowMR, 4, _cg);
            generateRegMemInstruction(SBB4RegMem, root, highThird, highMR, _cg);
            }
         lowMR->decNodeReferenceCounts(_cg);
         }
      }
   else if (getSubReg1Reg2())
      {
      if (secondHighZero)
         {
         generateRegRegInstruction(regRegOpCode, root, firstRegister->getLowOrder(), secondRegister, _cg);
         generateRegImmInstruction(SBB4RegImms, root, firstRegister->getHighOrder(), 0, _cg);
         }
      else
         {
         generateRegRegInstruction(regRegOpCode, root, firstRegister->getLowOrder(), secondRegister->getLowOrder(), _cg);
         generateRegRegInstruction(SBB4RegReg, root, firstRegister->getHighOrder(), secondRegister->getHighOrder(), _cg);
         }
      targetRegister = firstRegister;
      }
   else // assert getSubReg1Mem2() == true
      {
      TR::MemoryReference  *lowMR = generateX86MemoryReference(secondChild, _cg);
      /**
       * The below code is needed to ensure correct behaviour when the subtract analyser encounters a lushr bytecode that shifts
       * by 32 bits. This is the only case where the useSecondHighOrder bit is set.
       * When the first child of the lushr is in a register, code above handles the shift. When the first child of the lushr is in
       * memory, the below ensures that the upper part of the first child of the lushr is used as lowMR.
       */
      if (useSecondHighOrder)
     	 lowMR = generateX86MemoryReference(*lowMR, 4, _cg);

      generateRegMemInstruction(regMemOpCode, root, firstRegister->getLowOrder(), lowMR, _cg);

      if (secondHighZero)
         {
         generateRegImmInstruction(SBB4RegImms, root, firstRegister->getHighOrder(), 0, _cg);
         }
      else
         {
         TR::MemoryReference  *highMR = generateX86MemoryReference(*lowMR, 4, _cg);
         generateRegMemInstruction(SBB4RegMem, root, firstRegister->getHighOrder(), highMR, _cg);
         }

      targetRegister = firstRegister;
      lowMR->decNodeReferenceCounts(_cg);
      }

   return targetRegister;
   }
Exemple #9
0
TR::Register *OMR::X86::AMD64::TreeEvaluator::dbits2lEvaluator(TR::Node *node, TR::CodeGenerator *cg)
   {
   // TODO:AMD64: Peepholing
   TR::Node      *child  = node->getFirstChild();
   TR::Register  *sreg   = cg->evaluate(child);
   TR::Register  *treg   = cg->allocateRegister(TR_GPR);
   generateRegRegInstruction(MOVQReg8Reg, node, treg, sreg, cg);
   if (node->normalizeNanValues())
      {
      static char *disableFastNormalizeNaNs = feGetEnv("TR_disableFastNormalizeNaNs");
      if (disableFastNormalizeNaNs)
         {
         // This one is not clever, but it is simple, and it's based directly
         // on the IA32 version which is known to work, so is safer.
         //
         TR::RegisterDependencyConditions  *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, cg);
         deps->addPostCondition(treg, TR::RealRegister::NoReg, cg);

         TR::IA32ConstantDataSnippet *nan1Snippet = cg->findOrCreate8ByteConstant(node, DOUBLE_NAN_1_LOW);
         TR::IA32ConstantDataSnippet *nan2Snippet = cg->findOrCreate8ByteConstant(node, DOUBLE_NAN_2_LOW);
         TR::MemoryReference      *nan1MR      = generateX86MemoryReference(nan1Snippet, cg);
         TR::MemoryReference      *nan2MR      = generateX86MemoryReference(nan2Snippet, cg);

         TR::LabelSymbol *startLabel     = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *normalizeLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *endLabel       = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         startLabel->setStartInternalControlFlow();
         endLabel  ->setEndInternalControlFlow();

         generateLabelInstruction(   LABEL,       node, startLabel,               cg);
         generateRegMemInstruction(  CMP8RegMem,  node, treg, nan1MR,             cg);
         generateLabelInstruction(   JGE4,        node, normalizeLabel,           cg);
         generateRegMemInstruction(  CMP8RegMem,  node, treg, nan2MR,             cg);
         generateLabelInstruction(   JB4,         node, endLabel,                 cg);
         generateLabelInstruction(   LABEL,       node, normalizeLabel,           cg);
         generateRegImm64Instruction( MOV8RegImm64, node, treg, DOUBLE_NAN,         cg);
         generateLabelInstruction(   LABEL,       node, endLabel,           deps, cg);
         }
      else
         {
         // A bunch of bookkeeping
         //
         uint64_t nanDetector = DOUBLE_NAN_2_LOW;

         TR::RegisterDependencyConditions  *internalControlFlowDeps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, cg);
         internalControlFlowDeps->addPostCondition(treg, TR::RealRegister::NoReg, cg);

         TR::RegisterDependencyConditions  *helperDeps = generateRegisterDependencyConditions((uint8_t)1, (uint8_t)1, cg);
         helperDeps->addPreCondition( treg, TR::RealRegister::eax, cg);
         helperDeps->addPostCondition(treg, TR::RealRegister::eax, cg);

         TR::IA32ConstantDataSnippet *nanDetectorSnippet  = cg->findOrCreate8ByteConstant(node, nanDetector);
         TR::MemoryReference      *nanDetectorMR       = generateX86MemoryReference(nanDetectorSnippet,  cg);

         TR::LabelSymbol *startLabel     = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *slowPathLabel  = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *normalizeLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         TR::LabelSymbol *endLabel       = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
         startLabel->setStartInternalControlFlow();
         endLabel  ->setEndInternalControlFlow();

         // Fast path: if subtracting nanDetector leaves CF=0 or OF=1, then it
         // must be a NaN.
         //
         generateLabelInstruction(  LABEL,       node, startLabel,           cg);
         generateRegMemInstruction( CMP8RegMem,  node, treg, nanDetectorMR,  cg);
         generateLabelInstruction(  JAE4,        node, slowPathLabel,        cg);
         generateLabelInstruction(  JO4,         node, slowPathLabel,        cg);

         // Slow path
         //
         TR_OutlinedInstructions *slowPath = new (cg->trHeapMemory()) TR_OutlinedInstructions(slowPathLabel, cg);
         cg->getOutlinedInstructionsList().push_front(slowPath);
         slowPath->swapInstructionListsWithCompilation();
         generateLabelInstruction(NULL, LABEL,       slowPathLabel,          cg)->setNode(node);
         generateRegImm64Instruction(MOV8RegImm64, node, treg, DOUBLE_NAN, cg);
         generateLabelInstruction(      JMP4,        node, endLabel,         cg);
         slowPath->swapInstructionListsWithCompilation();

         // Merge point
         //
         generateLabelInstruction(LABEL, node, endLabel, internalControlFlowDeps, cg);
         }
      }
   node->setRegister(treg);
   cg->decReferenceCount(child);
   return treg;
   }
Exemple #10
0
// Build arguments for system linkage dispatch.
//
int32_t TR::AMD64SystemLinkage::buildArgs(
      TR::Node *callNode,
      TR::RegisterDependencyConditions *deps)
   {
   TR::SymbolReference *methodSymRef = callNode->getSymbolReference();
   TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();
   TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;
   TR::RealRegister *espReal = machine()->getX86RealRegister(TR::RealRegister::esp);
   int32_t firstNodeArgument = callNode->getFirstArgumentIndex();
   int32_t lastNodeArgument = callNode->getNumChildren() - 1;
   int32_t offset = 0;
   int32_t sizeOfOutGoingArgs= 0;
   uint16_t numIntArgs = 0,
            numFloatArgs = 0;
   int32_t first, last, direction;
   int32_t numCopiedRegs = 0;
   TR::Register *copiedRegs[TR::X86LinkageProperties::MaxArgumentRegisters];

   if (getProperties().passArgsRightToLeft())
      {
      first = lastNodeArgument;
      last  = firstNodeArgument - 1;
      direction = -1;
      }
   else
      {
      first = firstNodeArgument;
      last  = lastNodeArgument + 1;
      direction = 1;
      }

   // If the dispatch is indirect we must add the VFT register to the preconditions
   // so that it gets register assigned with the other preconditions to the call.
   //
   if (callNode->getOpCode().isIndirect())
      {
      TR::Node *vftChild = callNode->getFirstChild();
      TR_ASSERT(vftChild->getRegister(), "expecting VFT child to be evaluated");
      TR::RealRegister::RegNum scratchRegIndex = getProperties().getIntegerScratchRegister(1);
      deps->addPreCondition(vftChild->getRegister(), scratchRegIndex, cg());
      }

   int32_t i;
   for (i = first; i != last; i += direction)
      {
      TR::parmLayoutResult layoutResult;
      TR::RealRegister::RegNum rregIndex = noReg;
      TR::Node *child = callNode->getChild(i);

      layoutParm(child, sizeOfOutGoingArgs, numIntArgs, numFloatArgs, layoutResult);

      if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG_PAIR)
         {
         // TODO: AMD64 SysV ABI might put a struct into a pair of linkage registerr
         TR_ASSERT(false, "haven't support linkage_reg_pair yet.\n");
         }
      else if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG)
         {
         TR_RegisterKinds regKind = layoutResult.regs[0].regKind;
         uint32_t regIndex = layoutResult.regs[0].regIndex;
         TR_ASSERT(regKind == TR_GPR || regKind == TR_FPR, "linkage registers includes TR_GPR and TR_FPR\n");
         rregIndex = (regKind == TR_FPR) ? getProperties().getFloatArgumentRegister(regIndex): getProperties().getIntegerArgumentRegister(regIndex);
         }
      else
         {
         offset = layoutResult.offset;
         }

      TR::Register *vreg;
      vreg = cg()->evaluate(child);

      bool needsStackOffsetUpdate = false;
      if (rregIndex != noReg)
         {
         // For NULL JNI reference parameters, it is possible that the NULL value will be evaluated into
         // a different register than the child.  In that case it is not necessary to copy the temporary scratch
         // register across the call.
         //
         if ((child->getReferenceCount() > 1) &&
             (vreg == child->getRegister()))
            {
            TR::Register *argReg = cg()->allocateRegister();
            if (vreg->containsCollectedReference())
               argReg->setContainsCollectedReference();
            generateRegRegInstruction(TR::Linkage::movOpcodes(RegReg, movType(child->getDataType())), child, argReg, vreg, cg());
            vreg = argReg;
            copiedRegs[numCopiedRegs++] = vreg;
            }

         deps->addPreCondition(vreg, rregIndex, cg());
         }
      else
         {
         // Ideally, we would like to push rather than move
         generateMemRegInstruction(TR::Linkage::movOpcodes(MemReg, fullRegisterMovType(vreg)),
                                   child,
                                   generateX86MemoryReference(espReal, offset, cg()),
                                   vreg,
                                   cg());
         }

      cg()->decReferenceCount(child);
      }

   // Now that we're finished making the preconditions, all the interferences
   // are established and we can kill these regs.
   //
   for (i = 0; i < numCopiedRegs; i++)
      cg()->stopUsingRegister(copiedRegs[i]);

   deps->stopAddingPreConditions();

   return sizeOfOutGoingArgs;
   }
Exemple #11
0
// Copies parameters from where they enter the method (either on stack or in a
// linkage register) to their "home location" where the method body will expect
// to find them (either on stack or in a global register).
//
TR::Instruction *
TR::X86SystemLinkage::copyParametersToHomeLocation(TR::Instruction *cursor)
   {
   TR::Machine *machine = cg()->machine();
   TR::RealRegister *framePointer = machine->getX86RealRegister(TR::RealRegister::vfp);

   TR::ResolvedMethodSymbol             *bodySymbol = comp()->getJittedMethodSymbol();
   ListIterator<TR::ParameterSymbol>  paramIterator(&(bodySymbol->getParameterList()));
   TR::ParameterSymbol               *paramCursor;

   const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;
   TR_ASSERT(noReg == 0, "noReg must be zero so zero-initializing movStatus will work");

   TR::MovStatus movStatus[TR::RealRegister::NumRegisters] = {{(TR::RealRegister::RegNum)0,(TR::RealRegister::RegNum)0,(TR_MovDataTypes)0}};

   // We must always do the stores first, then the reg-reg copies, then the
   // loads, so that we never clobber a register we will need later.  However,
   // the logic is simpler if we do the loads and stores in the same loop.
   // Therefore, we maintain a separate instruction cursor for the loads.
   //
   // We defer the initialization of loadCursor until we generate the first
   // load.  Otherwise, if we happen to generate some stores first, then the
   // store cursor would get ahead of the loadCursor, and the instructions
   // would end up in the wrong order despite our efforts.
   //
   TR::Instruction *loadCursor = NULL;

   // Phase 1: generate RegMem and MemReg movs, and collect information about
   // the required RegReg movs.
   //
   for (paramCursor = paramIterator.getFirst();
       paramCursor != NULL;
       paramCursor = paramIterator.getNext())
      {
      int8_t lri = paramCursor->getLinkageRegisterIndex();     // How the parameter enters the method
      TR::RealRegister::RegNum ai                              // Where method body expects to find it
         = (TR::RealRegister::RegNum)paramCursor->getAllocatedIndex();
      int32_t offset = paramCursor->getParameterOffset();      // Location of the parameter's stack slot
      TR_MovDataTypes movDataType = paramMovType(paramCursor); // What sort of MOV instruction does it need?

      // Copy the parameter to wherever it should be
      //
      if (lri == NOT_LINKAGE) // It's on the stack
         {
         if (ai == NOT_ASSIGNED) // It only needs to be on the stack
            {
            // Nothing to do
            }
         else // Method body expects it to be in the ai register
            {
            if (loadCursor == NULL)
               loadCursor = cursor;

            if (debug("traceCopyParametersToHomeLocation"))
               diagnostic("copyParametersToHomeLocation: Loading %d\n", ai);
            // ai := stack
            loadCursor = generateRegMemInstruction(
               loadCursor,
               TR::Linkage::movOpcodes(RegMem, movDataType),
               machine->getX86RealRegister(ai),
               generateX86MemoryReference(framePointer, offset, cg()),
               cg()
               );
            }
         }
      else // It's in a linkage register
         {
         TR::RealRegister::RegNum sourceIndex = getProperties().getArgumentRegister(lri, isFloat(movDataType));

         // Copy to the stack if necessary
         //
         if (ai == NOT_ASSIGNED || hasToBeOnStack(paramCursor))
            {
            if (comp()->getOption(TR_TraceCG))
              traceMsg(comp(), "copyToHomeLocation param %p, linkage reg index %d, allocated index %d, parameter offset %d, hasToBeOnStack %d, parm->isParmHasToBeOnStack() %d.\n", paramCursor, lri, ai, offset, hasToBeOnStack(paramCursor), paramCursor->isParmHasToBeOnStack());
            if (debug("traceCopyParametersToHomeLocation"))
               diagnostic("copyParametersToHomeLocation: Storing %d\n", sourceIndex);
            // stack := lri
            cursor = generateMemRegInstruction(
               cursor,
               TR::Linkage::movOpcodes(MemReg, movDataType),
               generateX86MemoryReference(framePointer, offset, cg()),
               machine->getX86RealRegister(sourceIndex),
               cg()
               );
            }

         // Copy to the ai register if necessary
         //
         if (ai != NOT_ASSIGNED && ai != sourceIndex)
            {
            // This parameter needs a RegReg move.  We don't know yet whether
            // we need the value in the target register, so for now we just
            // remember that we need to do this and keep going.
            //
            TR_ASSERT(movStatus[ai         ].sourceReg == noReg, "Each target reg must have only one source");
            TR_ASSERT(movStatus[sourceIndex].targetReg == noReg, "Each source reg must have only one target");
            if (debug("traceCopyParametersToHomeLocation"))
               diagnostic("copyParametersToHomeLocation: Planning to move %d to %d\n", sourceIndex, ai);
            movStatus[ai].sourceReg                  = sourceIndex;
            movStatus[sourceIndex].targetReg         = ai;
            movStatus[sourceIndex].outgoingDataType  = movDataType;
            }

         if (debug("traceCopyParametersToHomeLocation") && ai == sourceIndex)
            {
            diagnostic("copyParametersToHomeLocation: Parameter #%d already in register %d\n", lri, ai);
            }
         }
      }

   // Phase 2: Iterate through the parameters again to insert the RegReg moves.
   //
   for (paramCursor = paramIterator.getFirst();
       paramCursor != NULL;
       paramCursor = paramIterator.getNext())
      {
      if (paramCursor->getLinkageRegisterIndex() == NOT_LINKAGE)
         continue;

      const TR::RealRegister::RegNum paramReg =
         getProperties().getArgumentRegister(paramCursor->getLinkageRegisterIndex(), isFloat(paramMovType(paramCursor)));

      if (movStatus[paramReg].targetReg == 0)
         {
         // This parameter does not need to be copied anywhere
         if (debug("traceCopyParametersToHomeLocation"))
            diagnostic("copyParametersToHomeLocation: Not moving %d\n", paramReg);
         }
      else
         {
         if (debug("traceCopyParametersToHomeLocation"))
            diagnostic("copyParametersToHomeLocation: Preparing to move %d\n", paramReg);

         // If a mov's target register is the source for another mov, we need
         // to do that other mov first.  The idea is to find the end point of
         // the chain of movs starting with paramReg and ending with a
         // register whose current value is not needed; then do that chain of
         // movs in reverse order.
         //
         TR_ASSERT(noReg == 0, "noReg must be zero (not %d) for zero-filled initialization to work", noReg);

         TR::RealRegister::RegNum regCursor;

         // Find the last target in the chain
         //
         regCursor = movStatus[paramReg].targetReg;
         while(movStatus[regCursor].targetReg != noReg)
            {
            // Haven't found the end yet
            regCursor = movStatus[regCursor].targetReg;
            TR_ASSERT(regCursor != paramReg, "Can't yet handle cyclic dependencies");

            // TODO:AMD64 Use scratch register to break cycles
            // A properly-written pickRegister should never
            // cause cycles to occur in the first place.  However, we may want
            // to consider adding cycle-breaking logic so that (1) pickRegister
            // has more flexibility, and (2) we're more robust against
            // otherwise harmless bugs in pickRegister.
            }

         // Work our way backward along the chain, generating all the necessary movs
         //
         while(movStatus[regCursor].sourceReg != noReg)
            {
            TR::RealRegister::RegNum source = movStatus[regCursor].sourceReg;
            if (debug("traceCopyParametersToHomeLocation"))
               diagnostic("copyParametersToHomeLocation: Moving %d to %d\n", source, regCursor);
            // regCursor := regCursor.sourceReg
            cursor = generateRegRegInstruction(
               cursor,
               TR::Linkage::movOpcodes(RegReg, movStatus[source].outgoingDataType),
               machine->getX86RealRegister(regCursor),
               machine->getX86RealRegister(source),
               cg()
               );
            // Update movStatus as we go so we don't generate redundant movs
            movStatus[regCursor].sourceReg = noReg;
            movStatus[source   ].targetReg = noReg;
            // Continue with the next register in the chain
            regCursor = source;
            }
         }
      }

   // Return the last instruction we inserted, whether or not it was a load.
   //
   return loadCursor? loadCursor : cursor;
   }
Exemple #12
0
TR::Register *TR_IA32XMMCompareAnalyser::xmmCompareAnalyser(TR::Node       *root,
                                                           TR_X86OpCodes cmpRegRegOpCode,
                                                           TR_X86OpCodes cmpRegMemOpCode)
   {
   TR::Node      *firstChild,
                *secondChild;
   TR::ILOpCodes  cmpOp = root->getOpCodeValue();
   bool          reverseMemOp = false;
   bool          reverseCmpOp = false;

   // Some operators must have their operands swapped to improve the generated
   // code needed to evaluate the result of the comparison.
   //
   bool mustSwapOperands = (cmpOp == TR::iffcmple ||
                            cmpOp == TR::ifdcmple ||
                            cmpOp == TR::iffcmpgtu ||
                            cmpOp == TR::ifdcmpgtu ||
                            cmpOp == TR::fcmple ||
                            cmpOp == TR::dcmple ||
                            cmpOp == TR::fcmpgtu ||
                            cmpOp == TR::dcmpgtu ||
                            cmpOp == TR::iffcmplt ||
                            cmpOp == TR::ifdcmplt ||
                            cmpOp == TR::iffcmpgeu ||
                            cmpOp == TR::ifdcmpgeu ||
                            cmpOp == TR::fcmplt ||
                            cmpOp == TR::dcmplt ||
                            cmpOp == TR::fcmpgeu ||
                            cmpOp == TR::dcmpgeu) ? true : false;

   // Some operators should not have their operands swapped to improve the generated
   // code needed to evaluate the result of the comparison.
   //
   bool preventOperandSwapping = (cmpOp == TR::iffcmpltu ||
                                  cmpOp == TR::ifdcmpltu ||
                                  cmpOp == TR::iffcmpge ||
                                  cmpOp == TR::ifdcmpge ||
                                  cmpOp == TR::fcmpltu ||
                                  cmpOp == TR::dcmpltu ||
                                  cmpOp == TR::fcmpge ||
                                  cmpOp == TR::dcmpge ||
                                  cmpOp == TR::iffcmpgt ||
                                  cmpOp == TR::ifdcmpgt ||
                                  cmpOp == TR::iffcmpleu ||
                                  cmpOp == TR::ifdcmpleu ||
                                  cmpOp == TR::fcmpgt ||
                                  cmpOp == TR::dcmpgt ||
                                  cmpOp == TR::fcmpleu ||
                                  cmpOp == TR::dcmpleu) ? true : false;

   // For correctness, don't swap operands of these operators.
   //
   if (cmpOp == TR::fcmpg || cmpOp == TR::fcmpl ||
       cmpOp == TR::dcmpg || cmpOp == TR::dcmpl)
      {
      preventOperandSwapping = true;
      }

   // Initial operand evaluation ordering.
   //
   if (preventOperandSwapping || (!mustSwapOperands && _cg->whichChildToEvaluate(root) == 0))
      {
      firstChild  = root->getFirstChild();
      secondChild = root->getSecondChild();
      setReversedOperands(false);
      }
   else
      {
      firstChild  = root->getSecondChild();
      secondChild = root->getFirstChild();
      setReversedOperands(true);
      }

   TR::Register *firstRegister  = firstChild->getRegister();
   TR::Register *secondRegister = secondChild->getRegister();

   setInputs(firstChild,
             firstRegister,
             secondChild,
             secondRegister,
             false,

             // If either 'preventOperandSwapping' or 'mustSwapOperands' is set then the
             // initial operand ordering set above must be maintained.
             //
             preventOperandSwapping || mustSwapOperands);

   // Make sure any required operand ordering is respected.
   //
   if ((getCmpReg2Reg1() || getCmpReg2Mem1()) &&
       (mustSwapOperands || preventOperandSwapping))
      {
      reverseCmpOp = getCmpReg2Reg1() ? true : false;
      reverseMemOp = getCmpReg2Mem1() ? true : false;
      }

   // Evaluate the children if necessary.
   //
   if (getEvalChild1())
      {
      _cg->evaluate(firstChild);
      }

   if (getEvalChild2())
      {
      _cg->evaluate(secondChild);
      }

   TR::TreeEvaluator::coerceFPOperandsToXMMRs(root, _cg);

   firstRegister  = firstChild->getRegister();
   secondRegister = secondChild->getRegister();

   // Generate the compare instruction.
   //
   if (getCmpReg1Mem2() || reverseMemOp)
      {
      TR::MemoryReference  *tempMR = generateX86MemoryReference(secondChild, _cg);
      generateRegMemInstruction(cmpRegMemOpCode, root, firstRegister, tempMR, _cg);
      tempMR->decNodeReferenceCounts(_cg);
      }
   else if (getCmpReg2Mem1())
      {
      TR::MemoryReference  *tempMR = generateX86MemoryReference(firstChild, _cg);
      generateRegMemInstruction(cmpRegMemOpCode, root, secondRegister, tempMR, _cg);
      notReversedOperands();
      tempMR->decNodeReferenceCounts(_cg);
      }
   else if (getCmpReg1Reg2() || reverseCmpOp)
      {
      generateRegRegInstruction(cmpRegRegOpCode, root, firstRegister, secondRegister, _cg);
      }
   else if (getCmpReg2Reg1())
      {
      generateRegRegInstruction(cmpRegRegOpCode, root, secondRegister, firstRegister, _cg);
      notReversedOperands();
      }

   _cg->decReferenceCount(firstChild);
   _cg->decReferenceCount(secondChild);

   // Update the opcode on the root node if we have swapped its children.
   // TODO: Reverse the children too, or else this looks wrong in the log file
   //
   if (getReversedOperands())
      {
      cmpOp = TR::ILOpCode(cmpOp).getOpCodeForSwapChildren();
      TR::Node::recreate(root, cmpOp);
      }

   return NULL;
   }
Exemple #13
0
TR::Register* OMR::X86::TreeEvaluator::SIMDgetvelemEvaluator(TR::Node* node, TR::CodeGenerator* cg)
   {
   TR::Node* firstChild = node->getChild(0);
   TR::Node* secondChild = node->getChild(1);

   TR::Register* srcVectorReg = cg->evaluate(firstChild);
   TR::Register* resReg = 0;
   TR::Register* lowResReg = 0;
   TR::Register* highResReg = 0;

   int32_t elementCount = -1;
   switch (firstChild->getDataType())
      {
      case TR::VectorInt8:
      case TR::VectorInt16:
         TR_ASSERT(false, "unsupported vector type %s in SIMDgetvelemEvaluator.\n", firstChild->getDataType().toString());
         break;
      case TR::VectorInt32:
         elementCount = 4;
         resReg = cg->allocateRegister();
         break;
      case TR::VectorInt64:
         elementCount = 2;
         if (TR::Compiler->target.is32Bit())
            {
            lowResReg = cg->allocateRegister();
            highResReg = cg->allocateRegister();
            resReg = cg->allocateRegisterPair(lowResReg, highResReg);
            }
         else
            {
            resReg = cg->allocateRegister();
            }
         break;
      case TR::VectorFloat:
         elementCount = 4;
         resReg = cg->allocateSinglePrecisionRegister(TR_FPR);
         break;
      case TR::VectorDouble:
         elementCount = 2;
         resReg = cg->allocateRegister(TR_FPR);
         break;
      default:
         TR_ASSERT(false, "unrecognized vector type %s in SIMDgetvelemEvaluator.\n", firstChild->getDataType().toString());
      }

   if (secondChild->getOpCode().isLoadConst())
      {
      int32_t elem = secondChild->getInt();

      TR_ASSERT(elem >= 0 && elem < elementCount, "Element can only be 0 to %u\n", elementCount - 1);

      uint8_t shufconst = 0x00;
      TR::Register* dstReg = 0;
      if (4 == elementCount)
         {
         /*
          * if elem = 0, access the most significant 32 bits (set shufconst to 0x03)
          * if elem = 1, access the second most significant 32 bits (set shufconst to 0x02)
          * if elem = 2, access the third most significant 32 bits (set shufconst to 0x01)
          * if elem = 3, access the least significant 32 bits (set shufconst to 0x00)
          */
         shufconst = (uint8_t)((3 - elem) & 0x03);

         /*
          * the value to be read (indicated by shufconst) from srcVectorReg is splatted into all 4 slots in the dstReg
          * this puts the value we want in the least significant bits and the other bits should never be read.
          * for float, dstReg and resReg are the same because PSHUFD can work directly with TR_FPR registers
          * for Int32, the result needs to be moved from the dstReg to a TR_GPR resReg.
          */
         if (TR::VectorInt32 == firstChild->getDataType())
            {
            dstReg = cg->allocateRegister(TR_VRF);
            }
         else //TR::VectorFloat == firstChild->getDataType()
            {
            dstReg = resReg;
            }

         /*
          * if elem = 3, the value we want is already in the least significant 32 bits
          * as a result, a mov instruction is good enough and splatting the value is unnecessary
          */
         if (3 == elem)
            {
            generateRegRegInstruction(MOVDQURegReg, node, dstReg, srcVectorReg, cg);
            }
         else
            {
            generateRegRegImmInstruction(PSHUFDRegRegImm1, node, dstReg, srcVectorReg, shufconst, cg);
            }

         if (TR::VectorInt32 == firstChild->getDataType())
            {
            generateRegRegInstruction(MOVDReg4Reg, node, resReg, dstReg, cg);
            cg->stopUsingRegister(dstReg);
            }
         }
      else //2 == elementCount
         {
         /*
          * for double, dstReg and resReg are the same because PSHUFD can work directly with TR_FPR registers
          * for Int64, the result needs to be moved from the dstReg to a TR_GPR resReg.
          */
         if (TR::VectorInt64 == firstChild->getDataType())
            {
            dstReg = cg->allocateRegister(TR_VRF);
            }
         else //TR::VectorDouble == firstChild->getDataType()
            {
            dstReg = resReg;
            }

         /*
          * the value to be read needs to be in the least significant 64 bits.
          * if elem = 0, the value we want is in the most significant 64 bits and needs to be splatted into
          * the least significant 64 bits (the other bits affected by the splat are never read)
          * if elem = 1, the value we want is already in the least significant 64 bits
          * as a result, a mov instruction is good enough and splatting the value is unnecessary
          */
         if (1 == elem)
            {
            generateRegRegInstruction(MOVDQURegReg, node, dstReg, srcVectorReg, cg);
            }
         else //0 == elem
            {
            generateRegRegImmInstruction(PSHUFDRegRegImm1, node, dstReg, srcVectorReg, 0x0e, cg);
            }

         if (TR::VectorInt64 == firstChild->getDataType())
            {
            if (TR::Compiler->target.is32Bit())
               {
               generateRegRegInstruction(MOVDReg4Reg, node, lowResReg, dstReg, cg);
               generateRegRegImmInstruction(PSHUFDRegRegImm1, node, dstReg, srcVectorReg, (0 == elem) ? 0x03 : 0x01, cg);
               generateRegRegInstruction(MOVDReg4Reg, node, highResReg, dstReg, cg);
               }
            else
               {
               generateRegRegInstruction(MOVQReg8Reg, node, resReg, dstReg, cg);
               }
            cg->stopUsingRegister(dstReg);
            }
         }
      }
   else
      {
      //TODO: handle non-constant second child case
      TR_ASSERT(false, "non-const second child not currently supported in SIMDgetvelemEvaluator.\n");
      }

   node->setRegister(resReg);
   cg->decReferenceCount(firstChild);
   cg->decReferenceCount(secondChild);

   return resReg;
   }
void TR_OutlinedInstructions::generateOutlinedInstructionsDispatch()
   {
   // Switch to cold helper instruction stream.
   //
   TR::Register    *vmThreadReg = _cg->getMethodMetaDataRegister();
   TR::Instruction *savedFirstInstruction = comp()->getFirstInstruction();
   TR::Instruction *savedAppendInstruction = comp()->getAppendInstruction();
   comp()->setFirstInstruction(NULL);
   comp()->setAppendInstruction(NULL);

   new (_cg->trHeapMemory()) TR::X86LabelInstruction(NULL, LABEL, _entryLabel, _cg);

   if (_rematerializeVMThread)
      {
      generateRegInstruction(PUSHReg, _callNode, vmThreadReg, _cg);
      generateRestoreVMThreadInstruction ( _callNode, _cg);
      TR::MemoryReference  *vmThreadMR = generateX86MemoryReference(vmThreadReg, (TR::Compiler->target.is64Bit()) ? 16 : 8, _cg);
      generateRegMemInstruction (LRegMem(), _callNode, vmThreadReg, vmThreadMR, _cg);
      }
   TR::Register *resultReg=NULL;
   if (_callNode->getOpCode().isCallIndirect())
      resultReg = TR::TreeEvaluator::performCall(_callNode, true, false, _cg);
   else
      resultReg = TR::TreeEvaluator::performCall(_callNode, false, false, _cg);

   if (_rematerializeVMThread)
      {
      generateRegInstruction(POPReg, _callNode, vmThreadReg, _cg);
      }

   if (_targetReg)
      {
      TR_ASSERT(resultReg, "assertion failure");
      TR::RegisterPair *targetRegPair = _targetReg->getRegisterPair();
      TR::RegisterPair *resultRegPair =  resultReg->getRegisterPair();
      if (targetRegPair)
         {
         TR_ASSERT(resultRegPair, "OutlinedInstructions: targetReg is a register pair and resultReg is not");
         generateRegRegInstruction(_targetRegMovOpcode, _callNode, targetRegPair->getLowOrder(),  resultRegPair->getLowOrder(),  _cg);
         generateRegRegInstruction(_targetRegMovOpcode, _callNode, targetRegPair->getHighOrder(), resultRegPair->getHighOrder(), _cg);
         }
      else
         {
         TR_ASSERT(!resultRegPair, "OutlinedInstructions: resultReg is a register pair and targetReg is not");
         generateRegRegInstruction(_targetRegMovOpcode, _callNode, _targetReg, resultReg, _cg);
         }
      }

   _cg->decReferenceCount(_callNode);

   if (_restartLabel)
      generateLabelInstruction(JMP4, _callNode, _restartLabel, _cg);
   else
      {
      // Java-specific.
      // No restart label implies we're not coming back from this call,
      // so it's safe to put data after the call.  In the case of calling a throw
      // helper, there's an ancient busted handshake that expects to find a 4-byte
      // offset here, so we have to comply...
      //
      // When the handshake is removed, we can delete this zero.
      //
      generateImmInstruction(DDImm4, _callNode, 0, _cg);
      }

   // Dummy label to delimit the end of the helper call dispatch sequence (for exception ranges).
   //
   generateLabelInstruction(LABEL, _callNode, TR::LabelSymbol::create(_cg->trHeapMemory(),_cg), _cg);

   // Switch from cold helper instruction stream.
   //
   _firstInstruction = comp()->getFirstInstruction();
   _appendInstruction = comp()->getAppendInstruction();
   comp()->setFirstInstruction(savedFirstInstruction);
   comp()->setAppendInstruction(savedAppendInstruction);
   }