TR::Register *IA32LinkageUtils::pushFloatArg( TR::Node *child, TR::CodeGenerator *cg) { TR::Register *pushRegister; if (child->getRegister() == NULL) { if (child->getOpCodeValue() == TR::fconst) { int32_t value = child->getFloatBits(); TR_X86OpCodes pushOp; if (value >= -128 && value <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, value, cg); cg->decReferenceCount(child); return NULL; } else if (child->getReferenceCount() == 1) { if (child->getOpCode().isLoad()) { TR::MemoryReference *tempMR = generateX86MemoryReference(child, cg); generateMemInstruction(PUSHMem, child, tempMR, cg); tempMR->decNodeReferenceCounts(cg); cg->decReferenceCount(child); return NULL; } else if (child->getOpCodeValue() == TR::ibits2f) { pushRegister = pushIntegerWordArg(child->getFirstChild(), cg); cg->decReferenceCount(child); return pushRegister; } } } pushRegister = cg->evaluate(child); TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp); generateRegImmInstruction(SUB4RegImms, child, espReal, 4, cg); if (cg->useSSEForSinglePrecision() && pushRegister->getKind() == TR_FPR) generateMemRegInstruction(MOVSSMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); else generateFPMemRegInstruction(FSTMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); cg->decReferenceCount(child); return pushRegister; }
TR::Register *OMR::X86::AMD64::TreeEvaluator::iu2lEvaluator(TR::Node *node, TR::CodeGenerator *cg) { if (node->getFirstChild()->getOpCode().isLoadConst()) { TR::Register *targetRegister = cg->allocateRegister(); generateRegImmInstruction(MOV4RegImm4, node, targetRegister, node->getFirstChild()->getInt(), cg); // implicitly zero extended node->setRegister(targetRegister); cg->decReferenceCount(node->getFirstChild()); return targetRegister; } else return TR::TreeEvaluator::conversionAnalyser(node, L4RegMem, MOVZXReg8Reg4, cg); // This zero-extends on AMD64 }
TR::Register *OMR::X86::AMD64::TreeEvaluator::i2lEvaluator(TR::Node *node, TR::CodeGenerator *cg) { TR::Compilation *comp = cg->comp(); if (node->getFirstChild()->getOpCode().isLoadConst()) { TR::Register *targetRegister = cg->allocateRegister(); generateRegImmInstruction(MOV8RegImm4, node, targetRegister, node->getFirstChild()->getInt(), cg); node->setRegister(targetRegister); cg->decReferenceCount(node->getFirstChild()); return targetRegister; } else { // In theory, because iRegStore has chosen to disregard needsSignExtension, // we must disregard skipSignExtension here for correctness. // // However, in fact, it is actually safe to obey skipSignExtension so // long as the optimizer only uses it on nodes known to be non-negative // when the i2l occurs. We do already have isNonNegative for that // purpose, but it may not always be set by the optimizer if a node known // to be non-negative at one point in a block is commoned up above the // BNDCHK or branch that determines the node's non-negativity. The // codegen does set the flag during tree evaluation, but the // skipSignExtension flag is set by the optimizer with more global // knowledge than the tree evaluator, so we will trust it. // TR_X86OpCodes regMemOpCode,regRegOpCode; if( node->isNonNegative() || (node->skipSignExtension() && performTransformation(comp, "TREE EVALUATION: skipping sign extension on node %s despite lack of isNonNegative\n", comp->getDebug()->getName(node)))) { // We prefer these plain (zero-extending) opcodes because the analyser can often eliminate them // regMemOpCode = L4RegMem; regRegOpCode = MOVZXReg8Reg4; } else { regMemOpCode = MOVSXReg8Mem4; regRegOpCode = MOVSXReg8Reg4; } return TR::TreeEvaluator::conversionAnalyser(node, regMemOpCode, regRegOpCode, cg); } }
TR::Register* OMR::X86::TreeEvaluator::SIMDsplatsEvaluator(TR::Node* node, TR::CodeGenerator* cg) { TR::Node* childNode = node->getChild(0); TR::Register* childReg = cg->evaluate(childNode); TR::Register* resultReg = cg->allocateRegister(TR_VRF); switch (node->getDataType()) { case TR::VectorInt32: generateRegRegInstruction(MOVDRegReg4, node, resultReg, childReg, cg); generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, resultReg, 0x00, cg); // 00 00 00 00 shuffle xxxA to AAAA break; case TR::VectorInt64: if (TR::Compiler->target.is32Bit()) { TR::Register* tempVectorReg = cg->allocateRegister(TR_VRF); generateRegRegInstruction(MOVDRegReg4, node, tempVectorReg, childReg->getHighOrder(), cg); generateRegImmInstruction(PSLLQRegImm1, node, tempVectorReg, 0x20, cg); generateRegRegInstruction(MOVDRegReg4, node, resultReg, childReg->getLowOrder(), cg); generateRegRegInstruction(PORRegReg, node, resultReg, tempVectorReg, cg); cg->stopUsingRegister(tempVectorReg); } else { generateRegRegInstruction(MOVQRegReg8, node, resultReg, childReg, cg); } generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, resultReg, 0x44, cg); // 01 00 01 00 shuffle xxBA to BABA break; case TR::VectorFloat: generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, childReg, 0x00, cg); // 00 00 00 00 shuffle xxxA to AAAA break; case TR::VectorDouble: generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, childReg, 0x44, cg); // 01 00 01 00 shuffle xxBA to BABA break; default: if (cg->comp()->getOption(TR_TraceCG)) traceMsg(cg->comp(), "Unsupported data type, Node = %p\n", node); TR_ASSERT(false, "Unsupported data type"); break; } node->setRegister(resultReg); cg->decReferenceCount(childNode); return resultReg; }
TR::Register *TR::IA32SystemLinkage::buildDirectDispatch(TR::Node *callNode, bool spillFPRegs) { TR::RealRegister *stackPointerReg = machine()->getX86RealRegister(TR::RealRegister::esp); TR::SymbolReference *methodSymRef = callNode->getSymbolReference(); TR::MethodSymbol *methodSymbol = callNode->getSymbol()->castToMethodSymbol(); TR::ILOpCodes callOpCodeValue = callNode->getOpCodeValue(); if (!methodSymbol->isHelper()) diagnostic("Building call site for %s\n", methodSymbol->getMethod()->signature(trMemory())); TR::RegisterDependencyConditions *deps; deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)6, cg()); TR::Register *returnReg = buildVolatileAndReturnDependencies(callNode, deps); deps->stopAddingConditions(); TR::RegisterDependencyConditions *dummy = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)0, cg()); uint32_t argSize = buildArgs(callNode, dummy); TR::Register* targetAddressReg = NULL; TR::MemoryReference* targetAddressMem = NULL; // Call-out int32_t stackAdjustment = cg()->getProperties().getCallerCleanup() ? 0 : -argSize; TR::X86ImmInstruction* instr = generateImmSymInstruction(CALLImm4, callNode, (uintptr_t)methodSymbol->getMethodAddress(), methodSymRef, cg()); instr->setAdjustsFramePointerBy(stackAdjustment); if (cg()->getProperties().getCallerCleanup() && argSize > 0) { // Clean up arguments // generateRegImmInstruction( (argSize <= 127) ? ADD4RegImms : ADD4RegImm4, callNode, stackPointerReg, argSize, cg() ); } // Label denoting end of dispatch code sequence; dependencies are on // this label rather than on the call // TR::LabelSymbol *endSystemCallSequence = generateLabelSymbol(cg()); generateLabelInstruction(LABEL, callNode, endSystemCallSequence, deps, cg()); // Stop using the killed registers that are not going to persist // if (deps) stopUsingKilledRegisters(deps, returnReg); // If the method returns a floating point value that is not used, insert a dummy store to // eventually pop the value from the floating point stack. // if ((callNode->getDataType() == TR::Float || callNode->getDataType() == TR::Double) && callNode->getReferenceCount() == 1) { generateFPSTiST0RegRegInstruction(FSTRegReg, callNode, returnReg, returnReg, cg()); } if (cg()->enableRegisterAssociations()) associatePreservedRegisters(deps, returnReg); return returnReg; }
/* * users should call the longSubtractAnalyser or longSubtractAnalyserWithExplicitOperands APIs instead of calling this one directly */ TR::Register* TR_X86SubtractAnalyser::longSubtractAnalyserImpl(TR::Node *root, TR::Node *&firstChild, TR::Node *&secondChild) { TR::Register *firstRegister = firstChild->getRegister(); TR::Register *secondRegister = secondChild->getRegister(); TR::Register *targetRegister = NULL; bool firstHighZero = false; bool secondHighZero = false; bool useSecondHighOrder = false; TR_X86OpCodes regRegOpCode = SUB4RegReg; TR_X86OpCodes regMemOpCode = SUB4RegMem; bool needsEflags = NEED_CC(root) || (root->getOpCodeValue() == TR::lusubb); // Can generate better code for long adds when one or more children have a high order zero word // can avoid the evaluation when we don't need the result of such nodes for another parent. // if (firstChild->isHighWordZero() && !needsEflags) { firstHighZero = true; } if (secondChild->isHighWordZero() && !needsEflags) { secondHighZero = true; TR::ILOpCodes secondOp = secondChild->getOpCodeValue(); if (secondChild->getReferenceCount() == 1 && secondRegister == 0) { if (secondOp == TR::iu2l || secondOp == TR::su2l || secondOp == TR::bu2l || (secondOp == TR::lushr && secondChild->getSecondChild()->getOpCodeValue() == TR::iconst && (secondChild->getSecondChild()->getInt() & TR::TreeEvaluator::shiftMask(true)) == 32)) { secondChild = secondChild->getFirstChild(); secondRegister = secondChild->getRegister(); if (secondOp == TR::lushr) { useSecondHighOrder = true; } } } } setInputs(firstChild, firstRegister, secondChild, secondRegister); if (isVolatileMemoryOperand(firstChild)) resetMem1(); if (isVolatileMemoryOperand(secondChild)) resetMem2(); if (getEvalChild1()) { firstRegister = _cg->evaluate(firstChild); } if (getEvalChild2()) { secondRegister = _cg->evaluate(secondChild); } if (secondHighZero && secondRegister && secondRegister->getRegisterPair()) { if (!useSecondHighOrder) { secondRegister = secondRegister->getLowOrder(); } else { secondRegister = secondRegister->getHighOrder(); } } if (root->getOpCodeValue() == TR::lusubb && TR_X86ComputeCC::setCarryBorrow(root->getChild(2), true, _cg)) { // use SBB rather than SUB // regRegOpCode = SBB4RegReg; regMemOpCode = SBB4RegMem; } if (getCopyReg1()) { TR::Register *lowThird = _cg->allocateRegister(); TR::Register *highThird = _cg->allocateRegister(); TR::RegisterPair *thirdReg = _cg->allocateRegisterPair(lowThird, highThird); targetRegister = thirdReg; generateRegRegInstruction(MOV4RegReg, root, lowThird, firstRegister->getLowOrder(), _cg); if (firstHighZero) { generateRegRegInstruction(XOR4RegReg, root, highThird, highThird, _cg); } else { generateRegRegInstruction(MOV4RegReg, root, highThird, firstRegister->getHighOrder(), _cg); } if (getSubReg3Reg2()) { if (secondHighZero) { generateRegRegInstruction(regRegOpCode, root, lowThird, secondRegister, _cg); generateRegImmInstruction(SBB4RegImms, root, highThird, 0, _cg); } else { generateRegRegInstruction(regRegOpCode, root, lowThird, secondRegister->getLowOrder(), _cg); generateRegRegInstruction(SBB4RegReg, root, highThird, secondRegister->getHighOrder(), _cg); } } else // assert getSubReg3Mem2() == true { TR::MemoryReference *lowMR = generateX86MemoryReference(secondChild, _cg); /** * The below code is needed to ensure correct behaviour when the subtract analyser encounters a lushr bytecode that shifts * by 32 bits. This is the only case where the useSecondHighOrder bit is set. * When the first child of the lushr is in a register, code above handles the shift. When the first child of the lushr is in * memory, the below ensures that the upper part of the first child of the lushr is used as lowMR. */ if (useSecondHighOrder) { TR_ASSERT(secondHighZero, "useSecondHighOrder should be consistent with secondHighZero. useSecondHighOrder subsumes secondHighZero"); lowMR = generateX86MemoryReference(*lowMR, 4, _cg); } generateRegMemInstruction(regMemOpCode, root, lowThird, lowMR, _cg); if (secondHighZero) { generateRegImmInstruction(SBB4RegImms, root, highThird, 0, _cg); } else { TR::MemoryReference *highMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(SBB4RegMem, root, highThird, highMR, _cg); } lowMR->decNodeReferenceCounts(_cg); } } else if (getSubReg1Reg2()) { if (secondHighZero) { generateRegRegInstruction(regRegOpCode, root, firstRegister->getLowOrder(), secondRegister, _cg); generateRegImmInstruction(SBB4RegImms, root, firstRegister->getHighOrder(), 0, _cg); } else { generateRegRegInstruction(regRegOpCode, root, firstRegister->getLowOrder(), secondRegister->getLowOrder(), _cg); generateRegRegInstruction(SBB4RegReg, root, firstRegister->getHighOrder(), secondRegister->getHighOrder(), _cg); } targetRegister = firstRegister; } else // assert getSubReg1Mem2() == true { TR::MemoryReference *lowMR = generateX86MemoryReference(secondChild, _cg); /** * The below code is needed to ensure correct behaviour when the subtract analyser encounters a lushr bytecode that shifts * by 32 bits. This is the only case where the useSecondHighOrder bit is set. * When the first child of the lushr is in a register, code above handles the shift. When the first child of the lushr is in * memory, the below ensures that the upper part of the first child of the lushr is used as lowMR. */ if (useSecondHighOrder) lowMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(regMemOpCode, root, firstRegister->getLowOrder(), lowMR, _cg); if (secondHighZero) { generateRegImmInstruction(SBB4RegImms, root, firstRegister->getHighOrder(), 0, _cg); } else { TR::MemoryReference *highMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(SBB4RegMem, root, firstRegister->getHighOrder(), highMR, _cg); } targetRegister = firstRegister; lowMR->decNodeReferenceCounts(_cg); } return targetRegister; }
TR::Register *IA32LinkageUtils::pushDoubleArg( TR::Node *child, TR::CodeGenerator *cg) { TR::Register *pushRegister; if (child->getRegister() == NULL) { if (child->getOpCodeValue() == TR::dconst) { TR_X86OpCodes pushOp; int32_t highValue = child->getLongIntHigh(); if (highValue >= -128 && highValue <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, highValue, cg); int32_t lowValue = child->getLongIntLow(); if (lowValue >= -128 && lowValue <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, lowValue, cg); cg->decReferenceCount(child); return NULL; } else if (child->getReferenceCount() == 1) { if (child->getOpCode().isLoad()) { TR::MemoryReference *lowMR = generateX86MemoryReference(child, cg); generateMemInstruction(PUSHMem, child, generateX86MemoryReference(*lowMR, 4, cg), cg); generateMemInstruction(PUSHMem, child, lowMR, cg); lowMR->decNodeReferenceCounts(cg); cg->decReferenceCount(child); return NULL; } else if (child->getOpCodeValue() == TR::lbits2d) { pushRegister = pushLongArg(child->getFirstChild(), cg); cg->decReferenceCount(child); return pushRegister; } } } pushRegister = cg->evaluate(child); TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp); generateRegImmInstruction(SUB4RegImms, child, espReal, 8, cg); if (cg->useSSEForSinglePrecision() && pushRegister->getKind() == TR_FPR) generateMemRegInstruction(MOVSDMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); else generateFPMemRegInstruction(DSTMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); cg->decReferenceCount(child); return pushRegister; }