TR::Register* OMR::X86::TreeEvaluator::SIMDloadEvaluator(TR::Node* node, TR::CodeGenerator* cg) { TR::MemoryReference* tempMR = generateX86MemoryReference(node, cg); tempMR = ConvertToPatchableMemoryReference(tempMR, node, cg); TR::Register* resultReg = cg->allocateRegister(TR_VRF); TR_X86OpCodes opCode = BADIA32Op; switch (node->getSize()) { case 16: opCode = MOVDQURegMem; break; default: if (cg->comp()->getOption(TR_TraceCG)) traceMsg(cg->comp(), "Unsupported fill size: Node = %p\n", node); TR_ASSERT(false, "Unsupported fill size"); break; } TR::Instruction* instr = generateRegMemInstruction(opCode, node, resultReg, tempMR, cg); if (node->getOpCode().isIndirect()) cg->setImplicitExceptionPoint(instr); node->setRegister(resultReg); tempMR->decNodeReferenceCounts(cg); return resultReg; }
static TR::Register *l2fd(TR::Node *node, TR::RealRegister *target, TR_X86OpCodes opRegMem8, TR_X86OpCodes opRegReg8, TR::CodeGenerator *cg) { TR::Node *child = node->getFirstChild(); TR::MemoryReference *tempMR; TR_ASSERT(cg->useSSEForSinglePrecision(), "assertion failure"); if (child->getRegister() == NULL && child->getReferenceCount() == 1 && child->getOpCode().isLoadVar()) { tempMR = generateX86MemoryReference(child, cg); generateRegMemInstruction(opRegMem8, node, target, tempMR, cg); tempMR->decNodeReferenceCounts(cg); } else { TR::Register *intReg = cg->evaluate(child); generateRegRegInstruction(opRegReg8, node, target, intReg, cg); cg->decReferenceCount(child); } node->setRegister(target); return target; }
static TR::MemoryReference* ConvertToPatchableMemoryReference(TR::MemoryReference* mr, TR::Node* node, TR::CodeGenerator* cg) { if (mr->getSymbolReference().isUnresolved()) { // The load instructions may be wider than 8-bytes (our patching window) // but we won't know that for sure until after register assignment. // Hence, the unresolved memory reference must be evaluated into a register first. // TR::Register* tempReg = cg->allocateRegister(); generateRegMemInstruction(LEARegMem(cg), node, tempReg, mr, cg); mr = generateX86MemoryReference(tempReg, 0, cg); cg->stopUsingRegister(tempReg); } return mr; }
/* * users should call the integerSubtractAnalyser or integerSubtractAnalyserWithExplicitOperands APIs instead of calling this one directly */ TR::Register* TR_X86SubtractAnalyser::integerSubtractAnalyserImpl(TR::Node *root, TR::Node *firstChild, TR::Node *secondChild, TR_X86OpCodes regRegOpCode, TR_X86OpCodes regMemOpCode, TR_X86OpCodes copyOpCode, bool needsEflags, TR::Node *borrow) { TR::Register *targetRegister = NULL; TR::Register *firstRegister = firstChild->getRegister(); TR::Register *secondRegister = secondChild->getRegister(); setInputs(firstChild, firstRegister, secondChild, secondRegister); bool loadedConst = false; needsEflags = needsEflags || NEED_CC(root); if (getEvalChild1()) { // if firstChild and secondChild are the same node, then we should // evaluate (take the else path) so that the evaluate for the secondChild // below will get the correct/already-allocated register. if (firstRegister == 0 && firstChild->getOpCodeValue() == TR::iconst && (firstChild != secondChild)) { // An iconst may have to be generated. The iconst will be generated after the // secondChild is evaluated. Set the loadedConst flag to true. loadedConst = true; } else { firstRegister = _cg->evaluate(firstChild); } } if (getEvalChild2()) { secondRegister = _cg->evaluate(secondChild); if (firstChild->getRegister()) { firstRegister = firstChild->getRegister(); } else if (!loadedConst) { firstRegister = _cg->evaluate(firstChild); } } if (loadedConst) { if (firstRegister == 0) { // firstchild is an inconst and it has not been evaluated. // Generate the code for an iconst. firstRegister = _cg->allocateRegister(); TR::TreeEvaluator::insertLoadConstant(firstChild, firstRegister, firstChild->getInt(), TR_RematerializableInt, _cg); } else { // firstchild was evaluated. The code for an iconst does not need to be generated. // Set the loadConst flag to false. loadedConst = false; } } if (borrow != 0) TR_X86ComputeCC::setCarryBorrow(borrow, true, _cg); if (getCopyReg1()) { if (firstChild->getReferenceCount() > 1) { TR::Register *thirdReg; if (firstChild->getOpCodeValue() == TR::iconst && loadedConst) { thirdReg = firstRegister; } else { if (secondChild->getReferenceCount() == 1 && secondRegister != 0 && !needsEflags && (borrow == 0)) { // use one fewer registers if we negate the clobberable secondRegister and add // Don't do this though if condition codes are needed. The sequence // depends on the carry flag being valid as if a sub was done. // bool nodeIs64Bit = TR_X86OpCode(regRegOpCode).hasLongSource(); generateRegInstruction(NEGReg(nodeIs64Bit), secondChild, secondRegister, _cg); thirdReg = secondRegister; secondRegister = firstRegister; regRegOpCode = ADDRegReg(nodeIs64Bit); } else { thirdReg = _cg->allocateRegister(); generateRegRegInstruction(copyOpCode, root, thirdReg, firstRegister, _cg); } } targetRegister = thirdReg; if (getSubReg3Reg2()) { generateRegRegInstruction(regRegOpCode, root, thirdReg, secondRegister, _cg); } else // assert getSubReg3Mem2() == true { TR::MemoryReference *tempMR = generateX86MemoryReference(secondChild, _cg); generateRegMemInstruction(regMemOpCode, root, thirdReg, tempMR, _cg); tempMR->decNodeReferenceCounts(_cg); } } else { if (getSubReg3Reg2()) { generateRegRegInstruction(regRegOpCode, root, firstRegister, secondRegister, _cg); } else // assert getSubReg3Mem2() == true { TR::MemoryReference *tempMR = generateX86MemoryReference(secondChild, _cg); generateRegMemInstruction(regMemOpCode, root, firstRegister, tempMR, _cg); tempMR->decNodeReferenceCounts(_cg); } targetRegister = firstRegister; } } else if (getSubReg1Reg2()) { generateRegRegInstruction(regRegOpCode, root, firstRegister, secondRegister, _cg); targetRegister = firstRegister; } else // assert getSubReg1Mem2() == true { TR::MemoryReference *tempMR = generateX86MemoryReference(secondChild, _cg); generateRegMemInstruction(regMemOpCode, root, firstRegister, tempMR, _cg); targetRegister = firstRegister; tempMR->decNodeReferenceCounts(_cg); } return targetRegister; }
/* * users should call the longSubtractAnalyser or longSubtractAnalyserWithExplicitOperands APIs instead of calling this one directly */ TR::Register* TR_X86SubtractAnalyser::longSubtractAnalyserImpl(TR::Node *root, TR::Node *&firstChild, TR::Node *&secondChild) { TR::Register *firstRegister = firstChild->getRegister(); TR::Register *secondRegister = secondChild->getRegister(); TR::Register *targetRegister = NULL; bool firstHighZero = false; bool secondHighZero = false; bool useSecondHighOrder = false; TR_X86OpCodes regRegOpCode = SUB4RegReg; TR_X86OpCodes regMemOpCode = SUB4RegMem; bool needsEflags = NEED_CC(root) || (root->getOpCodeValue() == TR::lusubb); // Can generate better code for long adds when one or more children have a high order zero word // can avoid the evaluation when we don't need the result of such nodes for another parent. // if (firstChild->isHighWordZero() && !needsEflags) { firstHighZero = true; } if (secondChild->isHighWordZero() && !needsEflags) { secondHighZero = true; TR::ILOpCodes secondOp = secondChild->getOpCodeValue(); if (secondChild->getReferenceCount() == 1 && secondRegister == 0) { if (secondOp == TR::iu2l || secondOp == TR::su2l || secondOp == TR::bu2l || (secondOp == TR::lushr && secondChild->getSecondChild()->getOpCodeValue() == TR::iconst && (secondChild->getSecondChild()->getInt() & TR::TreeEvaluator::shiftMask(true)) == 32)) { secondChild = secondChild->getFirstChild(); secondRegister = secondChild->getRegister(); if (secondOp == TR::lushr) { useSecondHighOrder = true; } } } } setInputs(firstChild, firstRegister, secondChild, secondRegister); if (isVolatileMemoryOperand(firstChild)) resetMem1(); if (isVolatileMemoryOperand(secondChild)) resetMem2(); if (getEvalChild1()) { firstRegister = _cg->evaluate(firstChild); } if (getEvalChild2()) { secondRegister = _cg->evaluate(secondChild); } if (secondHighZero && secondRegister && secondRegister->getRegisterPair()) { if (!useSecondHighOrder) { secondRegister = secondRegister->getLowOrder(); } else { secondRegister = secondRegister->getHighOrder(); } } if (root->getOpCodeValue() == TR::lusubb && TR_X86ComputeCC::setCarryBorrow(root->getChild(2), true, _cg)) { // use SBB rather than SUB // regRegOpCode = SBB4RegReg; regMemOpCode = SBB4RegMem; } if (getCopyReg1()) { TR::Register *lowThird = _cg->allocateRegister(); TR::Register *highThird = _cg->allocateRegister(); TR::RegisterPair *thirdReg = _cg->allocateRegisterPair(lowThird, highThird); targetRegister = thirdReg; generateRegRegInstruction(MOV4RegReg, root, lowThird, firstRegister->getLowOrder(), _cg); if (firstHighZero) { generateRegRegInstruction(XOR4RegReg, root, highThird, highThird, _cg); } else { generateRegRegInstruction(MOV4RegReg, root, highThird, firstRegister->getHighOrder(), _cg); } if (getSubReg3Reg2()) { if (secondHighZero) { generateRegRegInstruction(regRegOpCode, root, lowThird, secondRegister, _cg); generateRegImmInstruction(SBB4RegImms, root, highThird, 0, _cg); } else { generateRegRegInstruction(regRegOpCode, root, lowThird, secondRegister->getLowOrder(), _cg); generateRegRegInstruction(SBB4RegReg, root, highThird, secondRegister->getHighOrder(), _cg); } } else // assert getSubReg3Mem2() == true { TR::MemoryReference *lowMR = generateX86MemoryReference(secondChild, _cg); /** * The below code is needed to ensure correct behaviour when the subtract analyser encounters a lushr bytecode that shifts * by 32 bits. This is the only case where the useSecondHighOrder bit is set. * When the first child of the lushr is in a register, code above handles the shift. When the first child of the lushr is in * memory, the below ensures that the upper part of the first child of the lushr is used as lowMR. */ if (useSecondHighOrder) { TR_ASSERT(secondHighZero, "useSecondHighOrder should be consistent with secondHighZero. useSecondHighOrder subsumes secondHighZero"); lowMR = generateX86MemoryReference(*lowMR, 4, _cg); } generateRegMemInstruction(regMemOpCode, root, lowThird, lowMR, _cg); if (secondHighZero) { generateRegImmInstruction(SBB4RegImms, root, highThird, 0, _cg); } else { TR::MemoryReference *highMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(SBB4RegMem, root, highThird, highMR, _cg); } lowMR->decNodeReferenceCounts(_cg); } } else if (getSubReg1Reg2()) { if (secondHighZero) { generateRegRegInstruction(regRegOpCode, root, firstRegister->getLowOrder(), secondRegister, _cg); generateRegImmInstruction(SBB4RegImms, root, firstRegister->getHighOrder(), 0, _cg); } else { generateRegRegInstruction(regRegOpCode, root, firstRegister->getLowOrder(), secondRegister->getLowOrder(), _cg); generateRegRegInstruction(SBB4RegReg, root, firstRegister->getHighOrder(), secondRegister->getHighOrder(), _cg); } targetRegister = firstRegister; } else // assert getSubReg1Mem2() == true { TR::MemoryReference *lowMR = generateX86MemoryReference(secondChild, _cg); /** * The below code is needed to ensure correct behaviour when the subtract analyser encounters a lushr bytecode that shifts * by 32 bits. This is the only case where the useSecondHighOrder bit is set. * When the first child of the lushr is in a register, code above handles the shift. When the first child of the lushr is in * memory, the below ensures that the upper part of the first child of the lushr is used as lowMR. */ if (useSecondHighOrder) lowMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(regMemOpCode, root, firstRegister->getLowOrder(), lowMR, _cg); if (secondHighZero) { generateRegImmInstruction(SBB4RegImms, root, firstRegister->getHighOrder(), 0, _cg); } else { TR::MemoryReference *highMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(SBB4RegMem, root, firstRegister->getHighOrder(), highMR, _cg); } targetRegister = firstRegister; lowMR->decNodeReferenceCounts(_cg); } return targetRegister; }
TR::Register *OMR::X86::AMD64::TreeEvaluator::dbits2lEvaluator(TR::Node *node, TR::CodeGenerator *cg) { // TODO:AMD64: Peepholing TR::Node *child = node->getFirstChild(); TR::Register *sreg = cg->evaluate(child); TR::Register *treg = cg->allocateRegister(TR_GPR); generateRegRegInstruction(MOVQReg8Reg, node, treg, sreg, cg); if (node->normalizeNanValues()) { static char *disableFastNormalizeNaNs = feGetEnv("TR_disableFastNormalizeNaNs"); if (disableFastNormalizeNaNs) { // This one is not clever, but it is simple, and it's based directly // on the IA32 version which is known to work, so is safer. // TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, cg); deps->addPostCondition(treg, TR::RealRegister::NoReg, cg); TR::IA32ConstantDataSnippet *nan1Snippet = cg->findOrCreate8ByteConstant(node, DOUBLE_NAN_1_LOW); TR::IA32ConstantDataSnippet *nan2Snippet = cg->findOrCreate8ByteConstant(node, DOUBLE_NAN_2_LOW); TR::MemoryReference *nan1MR = generateX86MemoryReference(nan1Snippet, cg); TR::MemoryReference *nan2MR = generateX86MemoryReference(nan2Snippet, cg); TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *normalizeLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *endLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); startLabel->setStartInternalControlFlow(); endLabel ->setEndInternalControlFlow(); generateLabelInstruction( LABEL, node, startLabel, cg); generateRegMemInstruction( CMP8RegMem, node, treg, nan1MR, cg); generateLabelInstruction( JGE4, node, normalizeLabel, cg); generateRegMemInstruction( CMP8RegMem, node, treg, nan2MR, cg); generateLabelInstruction( JB4, node, endLabel, cg); generateLabelInstruction( LABEL, node, normalizeLabel, cg); generateRegImm64Instruction( MOV8RegImm64, node, treg, DOUBLE_NAN, cg); generateLabelInstruction( LABEL, node, endLabel, deps, cg); } else { // A bunch of bookkeeping // uint64_t nanDetector = DOUBLE_NAN_2_LOW; TR::RegisterDependencyConditions *internalControlFlowDeps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, cg); internalControlFlowDeps->addPostCondition(treg, TR::RealRegister::NoReg, cg); TR::RegisterDependencyConditions *helperDeps = generateRegisterDependencyConditions((uint8_t)1, (uint8_t)1, cg); helperDeps->addPreCondition( treg, TR::RealRegister::eax, cg); helperDeps->addPostCondition(treg, TR::RealRegister::eax, cg); TR::IA32ConstantDataSnippet *nanDetectorSnippet = cg->findOrCreate8ByteConstant(node, nanDetector); TR::MemoryReference *nanDetectorMR = generateX86MemoryReference(nanDetectorSnippet, cg); TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *slowPathLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *normalizeLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); TR::LabelSymbol *endLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); startLabel->setStartInternalControlFlow(); endLabel ->setEndInternalControlFlow(); // Fast path: if subtracting nanDetector leaves CF=0 or OF=1, then it // must be a NaN. // generateLabelInstruction( LABEL, node, startLabel, cg); generateRegMemInstruction( CMP8RegMem, node, treg, nanDetectorMR, cg); generateLabelInstruction( JAE4, node, slowPathLabel, cg); generateLabelInstruction( JO4, node, slowPathLabel, cg); // Slow path // TR_OutlinedInstructions *slowPath = new (cg->trHeapMemory()) TR_OutlinedInstructions(slowPathLabel, cg); cg->getOutlinedInstructionsList().push_front(slowPath); slowPath->swapInstructionListsWithCompilation(); generateLabelInstruction(NULL, LABEL, slowPathLabel, cg)->setNode(node); generateRegImm64Instruction(MOV8RegImm64, node, treg, DOUBLE_NAN, cg); generateLabelInstruction( JMP4, node, endLabel, cg); slowPath->swapInstructionListsWithCompilation(); // Merge point // generateLabelInstruction(LABEL, node, endLabel, internalControlFlowDeps, cg); } } node->setRegister(treg); cg->decReferenceCount(child); return treg; }
TR::Register *TR::AMD64SystemLinkage::buildDirectDispatch( TR::Node *callNode, bool spillFPRegs) { TR::SymbolReference *methodSymRef = callNode->getSymbolReference(); TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol(); TR::Register *returnReg; // Allocate adequate register dependencies. // // pre = number of argument registers // post = number of volatile + return register // uint32_t pre = getProperties().getNumIntegerArgumentRegisters() + getProperties().getNumFloatArgumentRegisters(); uint32_t post = getProperties().getNumVolatileRegisters() + (callNode->getDataType() == TR::NoType ? 0 : 1); #if defined (PYTHON) && 0 // Treat all preserved GP regs as volatile until register map support available. // post += getProperties().getNumberOfPreservedGPRegisters(); #endif TR::RegisterDependencyConditions *preDeps = generateRegisterDependencyConditions(pre, 0, cg()); TR::RegisterDependencyConditions *postDeps = generateRegisterDependencyConditions(0, post, cg()); // Evaluate outgoing arguments on the system stack and build pre-conditions. // int32_t memoryArgSize = buildArgs(callNode, preDeps); // Build post-conditions. // returnReg = buildVolatileAndReturnDependencies(callNode, postDeps); postDeps->stopAddingPostConditions(); // Find the second scratch register in the post dependency list. // TR::Register *scratchReg = NULL; TR::RealRegister::RegNum scratchRegIndex = getProperties().getIntegerScratchRegister(1); for (int32_t i=0; i<post; i++) { if (postDeps->getPostConditions()->getRegisterDependency(i)->getRealRegister() == scratchRegIndex) { scratchReg = postDeps->getPostConditions()->getRegisterDependency(i)->getRegister(); break; } } #if defined(PYTHON) && 0 // For Python, store the instruction that contains the GC map at this site into // the frame object. // TR::SymbolReference *frameObjectSymRef = comp()->getSymRefTab()->findOrCreateAutoSymbol(comp()->getMethodSymbol(), 0, TR::Address, true, false, true); TR::Register *frameObjectRegister = cg()->allocateRegister(); generateRegMemInstruction( L8RegMem, callNode, frameObjectRegister, generateX86MemoryReference(frameObjectSymRef, cg()), cg()); TR::RealRegister *espReal = cg()->machine()->getX86RealRegister(TR::RealRegister::esp); TR::Register *gcMapPCRegister = cg()->allocateRegister(); generateRegMemInstruction( LEA8RegMem, callNode, gcMapPCRegister, generateX86MemoryReference(espReal, -8, cg()), cg()); // Use "volatile" registers across the call. Once proper register map support // is implemented, r14 and r15 will no longer be volatile and a different pair // should be chosen. // TR::RegisterDependencyConditions *gcMapDeps = generateRegisterDependencyConditions(0, 2, cg()); gcMapDeps->addPostCondition(frameObjectRegister, TR::RealRegister::r14, cg()); gcMapDeps->addPostCondition(gcMapPCRegister, TR::RealRegister::r15, cg()); gcMapDeps->stopAddingPostConditions(); generateMemRegInstruction( S8MemReg, callNode, generateX86MemoryReference(frameObjectRegister, fe()->getPythonGCMapPCOffsetInFrame(), cg()), gcMapPCRegister, gcMapDeps, cg()); cg()->stopUsingRegister(frameObjectRegister); cg()->stopUsingRegister(gcMapPCRegister); #endif TR::Instruction *instr; if (methodSymbol->getMethodAddress()) { TR_ASSERT(scratchReg, "could not find second scratch register"); auto LoadRegisterInstruction = generateRegImm64SymInstruction( MOV8RegImm64, callNode, scratchReg, (uintptr_t)methodSymbol->getMethodAddress(), methodSymRef, cg()); if (TR::Options::getCmdLineOptions()->getOption(TR_EmitRelocatableELFFile)) { LoadRegisterInstruction->setReloKind(TR_NativeMethodAbsolute); } instr = generateRegInstruction(CALLReg, callNode, scratchReg, preDeps, cg()); } else { instr = generateImmSymInstruction(CALLImm4, callNode, (uintptrj_t)methodSymbol->getMethodAddress(), methodSymRef, preDeps, cg()); } cg()->resetIsLeafMethod(); instr->setNeedsGCMap(getProperties().getPreservedRegisterMapForGC()); cg()->stopUsingRegister(scratchReg); TR::LabelSymbol *postDepLabel = generateLabelSymbol(cg()); generateLabelInstruction(LABEL, callNode, postDepLabel, postDeps, cg()); return returnReg; }
// Copies parameters from where they enter the method (either on stack or in a // linkage register) to their "home location" where the method body will expect // to find them (either on stack or in a global register). // TR::Instruction * TR::X86SystemLinkage::copyParametersToHomeLocation(TR::Instruction *cursor) { TR::Machine *machine = cg()->machine(); TR::RealRegister *framePointer = machine->getX86RealRegister(TR::RealRegister::vfp); TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol(); ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList())); TR::ParameterSymbol *paramCursor; const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg; TR_ASSERT(noReg == 0, "noReg must be zero so zero-initializing movStatus will work"); TR::MovStatus movStatus[TR::RealRegister::NumRegisters] = {{(TR::RealRegister::RegNum)0,(TR::RealRegister::RegNum)0,(TR_MovDataTypes)0}}; // We must always do the stores first, then the reg-reg copies, then the // loads, so that we never clobber a register we will need later. However, // the logic is simpler if we do the loads and stores in the same loop. // Therefore, we maintain a separate instruction cursor for the loads. // // We defer the initialization of loadCursor until we generate the first // load. Otherwise, if we happen to generate some stores first, then the // store cursor would get ahead of the loadCursor, and the instructions // would end up in the wrong order despite our efforts. // TR::Instruction *loadCursor = NULL; // Phase 1: generate RegMem and MemReg movs, and collect information about // the required RegReg movs. // for (paramCursor = paramIterator.getFirst(); paramCursor != NULL; paramCursor = paramIterator.getNext()) { int8_t lri = paramCursor->getLinkageRegisterIndex(); // How the parameter enters the method TR::RealRegister::RegNum ai // Where method body expects to find it = (TR::RealRegister::RegNum)paramCursor->getAllocatedIndex(); int32_t offset = paramCursor->getParameterOffset(); // Location of the parameter's stack slot TR_MovDataTypes movDataType = paramMovType(paramCursor); // What sort of MOV instruction does it need? // Copy the parameter to wherever it should be // if (lri == NOT_LINKAGE) // It's on the stack { if (ai == NOT_ASSIGNED) // It only needs to be on the stack { // Nothing to do } else // Method body expects it to be in the ai register { if (loadCursor == NULL) loadCursor = cursor; if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Loading %d\n", ai); // ai := stack loadCursor = generateRegMemInstruction( loadCursor, TR::Linkage::movOpcodes(RegMem, movDataType), machine->getX86RealRegister(ai), generateX86MemoryReference(framePointer, offset, cg()), cg() ); } } else // It's in a linkage register { TR::RealRegister::RegNum sourceIndex = getProperties().getArgumentRegister(lri, isFloat(movDataType)); // Copy to the stack if necessary // if (ai == NOT_ASSIGNED || hasToBeOnStack(paramCursor)) { if (comp()->getOption(TR_TraceCG)) traceMsg(comp(), "copyToHomeLocation param %p, linkage reg index %d, allocated index %d, parameter offset %d, hasToBeOnStack %d, parm->isParmHasToBeOnStack() %d.\n", paramCursor, lri, ai, offset, hasToBeOnStack(paramCursor), paramCursor->isParmHasToBeOnStack()); if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Storing %d\n", sourceIndex); // stack := lri cursor = generateMemRegInstruction( cursor, TR::Linkage::movOpcodes(MemReg, movDataType), generateX86MemoryReference(framePointer, offset, cg()), machine->getX86RealRegister(sourceIndex), cg() ); } // Copy to the ai register if necessary // if (ai != NOT_ASSIGNED && ai != sourceIndex) { // This parameter needs a RegReg move. We don't know yet whether // we need the value in the target register, so for now we just // remember that we need to do this and keep going. // TR_ASSERT(movStatus[ai ].sourceReg == noReg, "Each target reg must have only one source"); TR_ASSERT(movStatus[sourceIndex].targetReg == noReg, "Each source reg must have only one target"); if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Planning to move %d to %d\n", sourceIndex, ai); movStatus[ai].sourceReg = sourceIndex; movStatus[sourceIndex].targetReg = ai; movStatus[sourceIndex].outgoingDataType = movDataType; } if (debug("traceCopyParametersToHomeLocation") && ai == sourceIndex) { diagnostic("copyParametersToHomeLocation: Parameter #%d already in register %d\n", lri, ai); } } } // Phase 2: Iterate through the parameters again to insert the RegReg moves. // for (paramCursor = paramIterator.getFirst(); paramCursor != NULL; paramCursor = paramIterator.getNext()) { if (paramCursor->getLinkageRegisterIndex() == NOT_LINKAGE) continue; const TR::RealRegister::RegNum paramReg = getProperties().getArgumentRegister(paramCursor->getLinkageRegisterIndex(), isFloat(paramMovType(paramCursor))); if (movStatus[paramReg].targetReg == 0) { // This parameter does not need to be copied anywhere if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Not moving %d\n", paramReg); } else { if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Preparing to move %d\n", paramReg); // If a mov's target register is the source for another mov, we need // to do that other mov first. The idea is to find the end point of // the chain of movs starting with paramReg and ending with a // register whose current value is not needed; then do that chain of // movs in reverse order. // TR_ASSERT(noReg == 0, "noReg must be zero (not %d) for zero-filled initialization to work", noReg); TR::RealRegister::RegNum regCursor; // Find the last target in the chain // regCursor = movStatus[paramReg].targetReg; while(movStatus[regCursor].targetReg != noReg) { // Haven't found the end yet regCursor = movStatus[regCursor].targetReg; TR_ASSERT(regCursor != paramReg, "Can't yet handle cyclic dependencies"); // TODO:AMD64 Use scratch register to break cycles // A properly-written pickRegister should never // cause cycles to occur in the first place. However, we may want // to consider adding cycle-breaking logic so that (1) pickRegister // has more flexibility, and (2) we're more robust against // otherwise harmless bugs in pickRegister. } // Work our way backward along the chain, generating all the necessary movs // while(movStatus[regCursor].sourceReg != noReg) { TR::RealRegister::RegNum source = movStatus[regCursor].sourceReg; if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Moving %d to %d\n", source, regCursor); // regCursor := regCursor.sourceReg cursor = generateRegRegInstruction( cursor, TR::Linkage::movOpcodes(RegReg, movStatus[source].outgoingDataType), machine->getX86RealRegister(regCursor), machine->getX86RealRegister(source), cg() ); // Update movStatus as we go so we don't generate redundant movs movStatus[regCursor].sourceReg = noReg; movStatus[source ].targetReg = noReg; // Continue with the next register in the chain regCursor = source; } } } // Return the last instruction we inserted, whether or not it was a load. // return loadCursor? loadCursor : cursor; }
TR::Register *TR_IA32XMMCompareAnalyser::xmmCompareAnalyser(TR::Node *root, TR_X86OpCodes cmpRegRegOpCode, TR_X86OpCodes cmpRegMemOpCode) { TR::Node *firstChild, *secondChild; TR::ILOpCodes cmpOp = root->getOpCodeValue(); bool reverseMemOp = false; bool reverseCmpOp = false; // Some operators must have their operands swapped to improve the generated // code needed to evaluate the result of the comparison. // bool mustSwapOperands = (cmpOp == TR::iffcmple || cmpOp == TR::ifdcmple || cmpOp == TR::iffcmpgtu || cmpOp == TR::ifdcmpgtu || cmpOp == TR::fcmple || cmpOp == TR::dcmple || cmpOp == TR::fcmpgtu || cmpOp == TR::dcmpgtu || cmpOp == TR::iffcmplt || cmpOp == TR::ifdcmplt || cmpOp == TR::iffcmpgeu || cmpOp == TR::ifdcmpgeu || cmpOp == TR::fcmplt || cmpOp == TR::dcmplt || cmpOp == TR::fcmpgeu || cmpOp == TR::dcmpgeu) ? true : false; // Some operators should not have their operands swapped to improve the generated // code needed to evaluate the result of the comparison. // bool preventOperandSwapping = (cmpOp == TR::iffcmpltu || cmpOp == TR::ifdcmpltu || cmpOp == TR::iffcmpge || cmpOp == TR::ifdcmpge || cmpOp == TR::fcmpltu || cmpOp == TR::dcmpltu || cmpOp == TR::fcmpge || cmpOp == TR::dcmpge || cmpOp == TR::iffcmpgt || cmpOp == TR::ifdcmpgt || cmpOp == TR::iffcmpleu || cmpOp == TR::ifdcmpleu || cmpOp == TR::fcmpgt || cmpOp == TR::dcmpgt || cmpOp == TR::fcmpleu || cmpOp == TR::dcmpleu) ? true : false; // For correctness, don't swap operands of these operators. // if (cmpOp == TR::fcmpg || cmpOp == TR::fcmpl || cmpOp == TR::dcmpg || cmpOp == TR::dcmpl) { preventOperandSwapping = true; } // Initial operand evaluation ordering. // if (preventOperandSwapping || (!mustSwapOperands && _cg->whichChildToEvaluate(root) == 0)) { firstChild = root->getFirstChild(); secondChild = root->getSecondChild(); setReversedOperands(false); } else { firstChild = root->getSecondChild(); secondChild = root->getFirstChild(); setReversedOperands(true); } TR::Register *firstRegister = firstChild->getRegister(); TR::Register *secondRegister = secondChild->getRegister(); setInputs(firstChild, firstRegister, secondChild, secondRegister, false, // If either 'preventOperandSwapping' or 'mustSwapOperands' is set then the // initial operand ordering set above must be maintained. // preventOperandSwapping || mustSwapOperands); // Make sure any required operand ordering is respected. // if ((getCmpReg2Reg1() || getCmpReg2Mem1()) && (mustSwapOperands || preventOperandSwapping)) { reverseCmpOp = getCmpReg2Reg1() ? true : false; reverseMemOp = getCmpReg2Mem1() ? true : false; } // Evaluate the children if necessary. // if (getEvalChild1()) { _cg->evaluate(firstChild); } if (getEvalChild2()) { _cg->evaluate(secondChild); } TR::TreeEvaluator::coerceFPOperandsToXMMRs(root, _cg); firstRegister = firstChild->getRegister(); secondRegister = secondChild->getRegister(); // Generate the compare instruction. // if (getCmpReg1Mem2() || reverseMemOp) { TR::MemoryReference *tempMR = generateX86MemoryReference(secondChild, _cg); generateRegMemInstruction(cmpRegMemOpCode, root, firstRegister, tempMR, _cg); tempMR->decNodeReferenceCounts(_cg); } else if (getCmpReg2Mem1()) { TR::MemoryReference *tempMR = generateX86MemoryReference(firstChild, _cg); generateRegMemInstruction(cmpRegMemOpCode, root, secondRegister, tempMR, _cg); notReversedOperands(); tempMR->decNodeReferenceCounts(_cg); } else if (getCmpReg1Reg2() || reverseCmpOp) { generateRegRegInstruction(cmpRegRegOpCode, root, firstRegister, secondRegister, _cg); } else if (getCmpReg2Reg1()) { generateRegRegInstruction(cmpRegRegOpCode, root, secondRegister, firstRegister, _cg); notReversedOperands(); } _cg->decReferenceCount(firstChild); _cg->decReferenceCount(secondChild); // Update the opcode on the root node if we have swapped its children. // TODO: Reverse the children too, or else this looks wrong in the log file // if (getReversedOperands()) { cmpOp = TR::ILOpCode(cmpOp).getOpCodeForSwapChildren(); TR::Node::recreate(root, cmpOp); } return NULL; }
void TR_OutlinedInstructions::generateOutlinedInstructionsDispatch() { // Switch to cold helper instruction stream. // TR::Register *vmThreadReg = _cg->getMethodMetaDataRegister(); TR::Instruction *savedFirstInstruction = comp()->getFirstInstruction(); TR::Instruction *savedAppendInstruction = comp()->getAppendInstruction(); comp()->setFirstInstruction(NULL); comp()->setAppendInstruction(NULL); new (_cg->trHeapMemory()) TR::X86LabelInstruction(NULL, LABEL, _entryLabel, _cg); if (_rematerializeVMThread) { generateRegInstruction(PUSHReg, _callNode, vmThreadReg, _cg); generateRestoreVMThreadInstruction ( _callNode, _cg); TR::MemoryReference *vmThreadMR = generateX86MemoryReference(vmThreadReg, (TR::Compiler->target.is64Bit()) ? 16 : 8, _cg); generateRegMemInstruction (LRegMem(), _callNode, vmThreadReg, vmThreadMR, _cg); } TR::Register *resultReg=NULL; if (_callNode->getOpCode().isCallIndirect()) resultReg = TR::TreeEvaluator::performCall(_callNode, true, false, _cg); else resultReg = TR::TreeEvaluator::performCall(_callNode, false, false, _cg); if (_rematerializeVMThread) { generateRegInstruction(POPReg, _callNode, vmThreadReg, _cg); } if (_targetReg) { TR_ASSERT(resultReg, "assertion failure"); TR::RegisterPair *targetRegPair = _targetReg->getRegisterPair(); TR::RegisterPair *resultRegPair = resultReg->getRegisterPair(); if (targetRegPair) { TR_ASSERT(resultRegPair, "OutlinedInstructions: targetReg is a register pair and resultReg is not"); generateRegRegInstruction(_targetRegMovOpcode, _callNode, targetRegPair->getLowOrder(), resultRegPair->getLowOrder(), _cg); generateRegRegInstruction(_targetRegMovOpcode, _callNode, targetRegPair->getHighOrder(), resultRegPair->getHighOrder(), _cg); } else { TR_ASSERT(!resultRegPair, "OutlinedInstructions: resultReg is a register pair and targetReg is not"); generateRegRegInstruction(_targetRegMovOpcode, _callNode, _targetReg, resultReg, _cg); } } _cg->decReferenceCount(_callNode); if (_restartLabel) generateLabelInstruction(JMP4, _callNode, _restartLabel, _cg); else { // Java-specific. // No restart label implies we're not coming back from this call, // so it's safe to put data after the call. In the case of calling a throw // helper, there's an ancient busted handshake that expects to find a 4-byte // offset here, so we have to comply... // // When the handshake is removed, we can delete this zero. // generateImmInstruction(DDImm4, _callNode, 0, _cg); } // Dummy label to delimit the end of the helper call dispatch sequence (for exception ranges). // generateLabelInstruction(LABEL, _callNode, TR::LabelSymbol::create(_cg->trHeapMemory(),_cg), _cg); // Switch from cold helper instruction stream. // _firstInstruction = comp()->getFirstInstruction(); _appendInstruction = comp()->getAppendInstruction(); comp()->setFirstInstruction(savedFirstInstruction); comp()->setAppendInstruction(savedAppendInstruction); }