TR::Register* OMR::X86::TreeEvaluator::SIMDstoreEvaluator(TR::Node* node, TR::CodeGenerator* cg) { TR::Node* valueNode = node->getChild(node->getOpCode().isIndirect() ? 1 : 0); TR::MemoryReference* tempMR = generateX86MemoryReference(node, cg); tempMR = ConvertToPatchableMemoryReference(tempMR, node, cg); TR::Register* valueReg = cg->evaluate(valueNode); TR_X86OpCodes opCode = BADIA32Op; switch (node->getSize()) { case 16: opCode = MOVDQUMemReg; break; default: if (cg->comp()->getOption(TR_TraceCG)) traceMsg(cg->comp(), "Unsupported fill size: Node = %p\n", node); TR_ASSERT(false, "Unsupported fill size"); break; } TR::Instruction* instr = generateMemRegInstruction(opCode, node, tempMR, valueReg, cg); cg->decReferenceCount(valueNode); tempMR->decNodeReferenceCounts(cg); if (node->getOpCode().isIndirect()) cg->setImplicitExceptionPoint(instr); return NULL; }
TR::Register *IA32LinkageUtils::pushFloatArg( TR::Node *child, TR::CodeGenerator *cg) { TR::Register *pushRegister; if (child->getRegister() == NULL) { if (child->getOpCodeValue() == TR::fconst) { int32_t value = child->getFloatBits(); TR_X86OpCodes pushOp; if (value >= -128 && value <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, value, cg); cg->decReferenceCount(child); return NULL; } else if (child->getReferenceCount() == 1) { if (child->getOpCode().isLoad()) { TR::MemoryReference *tempMR = generateX86MemoryReference(child, cg); generateMemInstruction(PUSHMem, child, tempMR, cg); tempMR->decNodeReferenceCounts(cg); cg->decReferenceCount(child); return NULL; } else if (child->getOpCodeValue() == TR::ibits2f) { pushRegister = pushIntegerWordArg(child->getFirstChild(), cg); cg->decReferenceCount(child); return pushRegister; } } } pushRegister = cg->evaluate(child); TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp); generateRegImmInstruction(SUB4RegImms, child, espReal, 4, cg); if (cg->useSSEForSinglePrecision() && pushRegister->getKind() == TR_FPR) generateMemRegInstruction(MOVSSMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); else generateFPMemRegInstruction(FSTMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); cg->decReferenceCount(child); return pushRegister; }
TR::Instruction * TR_X86SystemLinkage::savePreservedRegisters(TR::Instruction *cursor) { // For IA32, if disableShrinkWrapping, usePushForPreservedRegs will be true; otherwise false; // For X64, shrinkWraping is always on, and usePushForPreservedRegs always false; // TR_ASSERT(!getProperties().getUsesPushesForPreservedRegs(), "assertion failure"); TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol(); const int32_t localSize = getProperties().getOffsetToFirstLocal() - bodySymbol->getLocalMappingCursor(); const int32_t pointerSize = getProperties().getPointerSize(); int32_t offsetCursor = -localSize + getProperties().getOffsetToFirstLocal() - pointerSize; if (_properties.getUsesPushesForPreservedRegs()) { for (int32_t pindex = _properties.getMaxRegistersPreservedInPrologue()-1; pindex >= 0; pindex--) { TR::RealRegister::RegNum idx = _properties.getPreservedRegister((uint32_t)pindex); TR::RealRegister *reg = machine()->getX86RealRegister(idx); if (reg->getHasBeenAssignedInMethod() && reg->getState() != TR::RealRegister::Locked) { cursor = new (trHeapMemory()) TR::X86RegInstruction(cursor, PUSHReg, reg, cg()); } } } else { TR_BitVector *p = cg()->getPreservedRegsInPrologue(); for (int32_t pindex = getProperties().getMaxRegistersPreservedInPrologue()-1; pindex >= 0; pindex--) { TR::RealRegister::RegNum idx = _properties.getPreservedRegister((uint32_t)pindex); TR::RealRegister *reg = machine()->getX86RealRegister(getProperties().getPreservedRegister((uint32_t)pindex)); if(reg->getHasBeenAssignedInMethod() && reg->getState() != TR::RealRegister::Locked) { if (!p || p->get(idx)) { cursor = generateMemRegInstruction( cursor, movOpcodes[MemReg][fullRegisterMovType(reg)], generateX86MemoryReference(machine()->getX86RealRegister(TR::RealRegister::vfp), offsetCursor, cg()), reg, cg() ); } offsetCursor -= pointerSize; } } } return cursor; }
TR::Instruction * TR::X86SystemLinkage::savePreservedRegisters(TR::Instruction *cursor) { // For IA32 usePushForPreservedRegs will be true; // For X64, usePushForPreservedRegs always false; TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol(); const int32_t localSize = getProperties().getOffsetToFirstLocal() - bodySymbol->getLocalMappingCursor(); const int32_t pointerSize = getProperties().getPointerSize(); int32_t offsetCursor = -localSize + getProperties().getOffsetToFirstLocal() - pointerSize; if (_properties.getUsesPushesForPreservedRegs()) { for (int32_t pindex = _properties.getMaxRegistersPreservedInPrologue()-1; pindex >= 0; pindex--) { TR::RealRegister::RegNum idx = _properties.getPreservedRegister((uint32_t)pindex); TR::RealRegister *reg = machine()->getX86RealRegister(idx); if (reg->getHasBeenAssignedInMethod() && reg->getState() != TR::RealRegister::Locked) { cursor = new (trHeapMemory()) TR::X86RegInstruction(cursor, PUSHReg, reg, cg()); } } } else { for (int32_t pindex = getProperties().getMaxRegistersPreservedInPrologue()-1; pindex >= 0; pindex--) { TR::RealRegister::RegNum idx = _properties.getPreservedRegister((uint32_t)pindex); TR::RealRegister *reg = machine()->getX86RealRegister(getProperties().getPreservedRegister((uint32_t)pindex)); if(reg->getHasBeenAssignedInMethod() && reg->getState() != TR::RealRegister::Locked) { cursor = generateMemRegInstruction( cursor, TR::Linkage::movOpcodes(MemReg, fullRegisterMovType(reg)), generateX86MemoryReference(machine()->getX86RealRegister(TR::RealRegister::vfp), offsetCursor, cg()), reg, cg() ); offsetCursor -= pointerSize; } } } return cursor; }
TR::Register *TR::AMD64SystemLinkage::buildDirectDispatch( TR::Node *callNode, bool spillFPRegs) { TR::SymbolReference *methodSymRef = callNode->getSymbolReference(); TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol(); TR::Register *returnReg; // Allocate adequate register dependencies. // // pre = number of argument registers // post = number of volatile + return register // uint32_t pre = getProperties().getNumIntegerArgumentRegisters() + getProperties().getNumFloatArgumentRegisters(); uint32_t post = getProperties().getNumVolatileRegisters() + (callNode->getDataType() == TR::NoType ? 0 : 1); #if defined (PYTHON) && 0 // Treat all preserved GP regs as volatile until register map support available. // post += getProperties().getNumberOfPreservedGPRegisters(); #endif TR::RegisterDependencyConditions *preDeps = generateRegisterDependencyConditions(pre, 0, cg()); TR::RegisterDependencyConditions *postDeps = generateRegisterDependencyConditions(0, post, cg()); // Evaluate outgoing arguments on the system stack and build pre-conditions. // int32_t memoryArgSize = buildArgs(callNode, preDeps); // Build post-conditions. // returnReg = buildVolatileAndReturnDependencies(callNode, postDeps); postDeps->stopAddingPostConditions(); // Find the second scratch register in the post dependency list. // TR::Register *scratchReg = NULL; TR::RealRegister::RegNum scratchRegIndex = getProperties().getIntegerScratchRegister(1); for (int32_t i=0; i<post; i++) { if (postDeps->getPostConditions()->getRegisterDependency(i)->getRealRegister() == scratchRegIndex) { scratchReg = postDeps->getPostConditions()->getRegisterDependency(i)->getRegister(); break; } } #if defined(PYTHON) && 0 // For Python, store the instruction that contains the GC map at this site into // the frame object. // TR::SymbolReference *frameObjectSymRef = comp()->getSymRefTab()->findOrCreateAutoSymbol(comp()->getMethodSymbol(), 0, TR::Address, true, false, true); TR::Register *frameObjectRegister = cg()->allocateRegister(); generateRegMemInstruction( L8RegMem, callNode, frameObjectRegister, generateX86MemoryReference(frameObjectSymRef, cg()), cg()); TR::RealRegister *espReal = cg()->machine()->getX86RealRegister(TR::RealRegister::esp); TR::Register *gcMapPCRegister = cg()->allocateRegister(); generateRegMemInstruction( LEA8RegMem, callNode, gcMapPCRegister, generateX86MemoryReference(espReal, -8, cg()), cg()); // Use "volatile" registers across the call. Once proper register map support // is implemented, r14 and r15 will no longer be volatile and a different pair // should be chosen. // TR::RegisterDependencyConditions *gcMapDeps = generateRegisterDependencyConditions(0, 2, cg()); gcMapDeps->addPostCondition(frameObjectRegister, TR::RealRegister::r14, cg()); gcMapDeps->addPostCondition(gcMapPCRegister, TR::RealRegister::r15, cg()); gcMapDeps->stopAddingPostConditions(); generateMemRegInstruction( S8MemReg, callNode, generateX86MemoryReference(frameObjectRegister, fe()->getPythonGCMapPCOffsetInFrame(), cg()), gcMapPCRegister, gcMapDeps, cg()); cg()->stopUsingRegister(frameObjectRegister); cg()->stopUsingRegister(gcMapPCRegister); #endif TR::Instruction *instr; if (methodSymbol->getMethodAddress()) { TR_ASSERT(scratchReg, "could not find second scratch register"); auto LoadRegisterInstruction = generateRegImm64SymInstruction( MOV8RegImm64, callNode, scratchReg, (uintptr_t)methodSymbol->getMethodAddress(), methodSymRef, cg()); if (TR::Options::getCmdLineOptions()->getOption(TR_EmitRelocatableELFFile)) { LoadRegisterInstruction->setReloKind(TR_NativeMethodAbsolute); } instr = generateRegInstruction(CALLReg, callNode, scratchReg, preDeps, cg()); } else { instr = generateImmSymInstruction(CALLImm4, callNode, (uintptrj_t)methodSymbol->getMethodAddress(), methodSymRef, preDeps, cg()); } cg()->resetIsLeafMethod(); instr->setNeedsGCMap(getProperties().getPreservedRegisterMapForGC()); cg()->stopUsingRegister(scratchReg); TR::LabelSymbol *postDepLabel = generateLabelSymbol(cg()); generateLabelInstruction(LABEL, callNode, postDepLabel, postDeps, cg()); return returnReg; }
// Build arguments for system linkage dispatch. // int32_t TR::AMD64SystemLinkage::buildArgs( TR::Node *callNode, TR::RegisterDependencyConditions *deps) { TR::SymbolReference *methodSymRef = callNode->getSymbolReference(); TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol(); TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg; TR::RealRegister *espReal = machine()->getX86RealRegister(TR::RealRegister::esp); int32_t firstNodeArgument = callNode->getFirstArgumentIndex(); int32_t lastNodeArgument = callNode->getNumChildren() - 1; int32_t offset = 0; int32_t sizeOfOutGoingArgs= 0; uint16_t numIntArgs = 0, numFloatArgs = 0; int32_t first, last, direction; int32_t numCopiedRegs = 0; TR::Register *copiedRegs[TR::X86LinkageProperties::MaxArgumentRegisters]; if (getProperties().passArgsRightToLeft()) { first = lastNodeArgument; last = firstNodeArgument - 1; direction = -1; } else { first = firstNodeArgument; last = lastNodeArgument + 1; direction = 1; } // If the dispatch is indirect we must add the VFT register to the preconditions // so that it gets register assigned with the other preconditions to the call. // if (callNode->getOpCode().isIndirect()) { TR::Node *vftChild = callNode->getFirstChild(); TR_ASSERT(vftChild->getRegister(), "expecting VFT child to be evaluated"); TR::RealRegister::RegNum scratchRegIndex = getProperties().getIntegerScratchRegister(1); deps->addPreCondition(vftChild->getRegister(), scratchRegIndex, cg()); } int32_t i; for (i = first; i != last; i += direction) { TR::parmLayoutResult layoutResult; TR::RealRegister::RegNum rregIndex = noReg; TR::Node *child = callNode->getChild(i); layoutParm(child, sizeOfOutGoingArgs, numIntArgs, numFloatArgs, layoutResult); if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG_PAIR) { // TODO: AMD64 SysV ABI might put a struct into a pair of linkage registerr TR_ASSERT(false, "haven't support linkage_reg_pair yet.\n"); } else if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG) { TR_RegisterKinds regKind = layoutResult.regs[0].regKind; uint32_t regIndex = layoutResult.regs[0].regIndex; TR_ASSERT(regKind == TR_GPR || regKind == TR_FPR, "linkage registers includes TR_GPR and TR_FPR\n"); rregIndex = (regKind == TR_FPR) ? getProperties().getFloatArgumentRegister(regIndex): getProperties().getIntegerArgumentRegister(regIndex); } else { offset = layoutResult.offset; } TR::Register *vreg; vreg = cg()->evaluate(child); bool needsStackOffsetUpdate = false; if (rregIndex != noReg) { // For NULL JNI reference parameters, it is possible that the NULL value will be evaluated into // a different register than the child. In that case it is not necessary to copy the temporary scratch // register across the call. // if ((child->getReferenceCount() > 1) && (vreg == child->getRegister())) { TR::Register *argReg = cg()->allocateRegister(); if (vreg->containsCollectedReference()) argReg->setContainsCollectedReference(); generateRegRegInstruction(TR::Linkage::movOpcodes(RegReg, movType(child->getDataType())), child, argReg, vreg, cg()); vreg = argReg; copiedRegs[numCopiedRegs++] = vreg; } deps->addPreCondition(vreg, rregIndex, cg()); } else { // Ideally, we would like to push rather than move generateMemRegInstruction(TR::Linkage::movOpcodes(MemReg, fullRegisterMovType(vreg)), child, generateX86MemoryReference(espReal, offset, cg()), vreg, cg()); } cg()->decReferenceCount(child); } // Now that we're finished making the preconditions, all the interferences // are established and we can kill these regs. // for (i = 0; i < numCopiedRegs; i++) cg()->stopUsingRegister(copiedRegs[i]); deps->stopAddingPreConditions(); return sizeOfOutGoingArgs; }
// Copies parameters from where they enter the method (either on stack or in a // linkage register) to their "home location" where the method body will expect // to find them (either on stack or in a global register). // TR::Instruction * TR::X86SystemLinkage::copyParametersToHomeLocation(TR::Instruction *cursor) { TR::Machine *machine = cg()->machine(); TR::RealRegister *framePointer = machine->getX86RealRegister(TR::RealRegister::vfp); TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol(); ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList())); TR::ParameterSymbol *paramCursor; const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg; TR_ASSERT(noReg == 0, "noReg must be zero so zero-initializing movStatus will work"); TR::MovStatus movStatus[TR::RealRegister::NumRegisters] = {{(TR::RealRegister::RegNum)0,(TR::RealRegister::RegNum)0,(TR_MovDataTypes)0}}; // We must always do the stores first, then the reg-reg copies, then the // loads, so that we never clobber a register we will need later. However, // the logic is simpler if we do the loads and stores in the same loop. // Therefore, we maintain a separate instruction cursor for the loads. // // We defer the initialization of loadCursor until we generate the first // load. Otherwise, if we happen to generate some stores first, then the // store cursor would get ahead of the loadCursor, and the instructions // would end up in the wrong order despite our efforts. // TR::Instruction *loadCursor = NULL; // Phase 1: generate RegMem and MemReg movs, and collect information about // the required RegReg movs. // for (paramCursor = paramIterator.getFirst(); paramCursor != NULL; paramCursor = paramIterator.getNext()) { int8_t lri = paramCursor->getLinkageRegisterIndex(); // How the parameter enters the method TR::RealRegister::RegNum ai // Where method body expects to find it = (TR::RealRegister::RegNum)paramCursor->getAllocatedIndex(); int32_t offset = paramCursor->getParameterOffset(); // Location of the parameter's stack slot TR_MovDataTypes movDataType = paramMovType(paramCursor); // What sort of MOV instruction does it need? // Copy the parameter to wherever it should be // if (lri == NOT_LINKAGE) // It's on the stack { if (ai == NOT_ASSIGNED) // It only needs to be on the stack { // Nothing to do } else // Method body expects it to be in the ai register { if (loadCursor == NULL) loadCursor = cursor; if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Loading %d\n", ai); // ai := stack loadCursor = generateRegMemInstruction( loadCursor, TR::Linkage::movOpcodes(RegMem, movDataType), machine->getX86RealRegister(ai), generateX86MemoryReference(framePointer, offset, cg()), cg() ); } } else // It's in a linkage register { TR::RealRegister::RegNum sourceIndex = getProperties().getArgumentRegister(lri, isFloat(movDataType)); // Copy to the stack if necessary // if (ai == NOT_ASSIGNED || hasToBeOnStack(paramCursor)) { if (comp()->getOption(TR_TraceCG)) traceMsg(comp(), "copyToHomeLocation param %p, linkage reg index %d, allocated index %d, parameter offset %d, hasToBeOnStack %d, parm->isParmHasToBeOnStack() %d.\n", paramCursor, lri, ai, offset, hasToBeOnStack(paramCursor), paramCursor->isParmHasToBeOnStack()); if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Storing %d\n", sourceIndex); // stack := lri cursor = generateMemRegInstruction( cursor, TR::Linkage::movOpcodes(MemReg, movDataType), generateX86MemoryReference(framePointer, offset, cg()), machine->getX86RealRegister(sourceIndex), cg() ); } // Copy to the ai register if necessary // if (ai != NOT_ASSIGNED && ai != sourceIndex) { // This parameter needs a RegReg move. We don't know yet whether // we need the value in the target register, so for now we just // remember that we need to do this and keep going. // TR_ASSERT(movStatus[ai ].sourceReg == noReg, "Each target reg must have only one source"); TR_ASSERT(movStatus[sourceIndex].targetReg == noReg, "Each source reg must have only one target"); if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Planning to move %d to %d\n", sourceIndex, ai); movStatus[ai].sourceReg = sourceIndex; movStatus[sourceIndex].targetReg = ai; movStatus[sourceIndex].outgoingDataType = movDataType; } if (debug("traceCopyParametersToHomeLocation") && ai == sourceIndex) { diagnostic("copyParametersToHomeLocation: Parameter #%d already in register %d\n", lri, ai); } } } // Phase 2: Iterate through the parameters again to insert the RegReg moves. // for (paramCursor = paramIterator.getFirst(); paramCursor != NULL; paramCursor = paramIterator.getNext()) { if (paramCursor->getLinkageRegisterIndex() == NOT_LINKAGE) continue; const TR::RealRegister::RegNum paramReg = getProperties().getArgumentRegister(paramCursor->getLinkageRegisterIndex(), isFloat(paramMovType(paramCursor))); if (movStatus[paramReg].targetReg == 0) { // This parameter does not need to be copied anywhere if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Not moving %d\n", paramReg); } else { if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Preparing to move %d\n", paramReg); // If a mov's target register is the source for another mov, we need // to do that other mov first. The idea is to find the end point of // the chain of movs starting with paramReg and ending with a // register whose current value is not needed; then do that chain of // movs in reverse order. // TR_ASSERT(noReg == 0, "noReg must be zero (not %d) for zero-filled initialization to work", noReg); TR::RealRegister::RegNum regCursor; // Find the last target in the chain // regCursor = movStatus[paramReg].targetReg; while(movStatus[regCursor].targetReg != noReg) { // Haven't found the end yet regCursor = movStatus[regCursor].targetReg; TR_ASSERT(regCursor != paramReg, "Can't yet handle cyclic dependencies"); // TODO:AMD64 Use scratch register to break cycles // A properly-written pickRegister should never // cause cycles to occur in the first place. However, we may want // to consider adding cycle-breaking logic so that (1) pickRegister // has more flexibility, and (2) we're more robust against // otherwise harmless bugs in pickRegister. } // Work our way backward along the chain, generating all the necessary movs // while(movStatus[regCursor].sourceReg != noReg) { TR::RealRegister::RegNum source = movStatus[regCursor].sourceReg; if (debug("traceCopyParametersToHomeLocation")) diagnostic("copyParametersToHomeLocation: Moving %d to %d\n", source, regCursor); // regCursor := regCursor.sourceReg cursor = generateRegRegInstruction( cursor, TR::Linkage::movOpcodes(RegReg, movStatus[source].outgoingDataType), machine->getX86RealRegister(regCursor), machine->getX86RealRegister(source), cg() ); // Update movStatus as we go so we don't generate redundant movs movStatus[regCursor].sourceReg = noReg; movStatus[source ].targetReg = noReg; // Continue with the next register in the chain regCursor = source; } } } // Return the last instruction we inserted, whether or not it was a load. // return loadCursor? loadCursor : cursor; }
TR::Register *IA32LinkageUtils::pushDoubleArg( TR::Node *child, TR::CodeGenerator *cg) { TR::Register *pushRegister; if (child->getRegister() == NULL) { if (child->getOpCodeValue() == TR::dconst) { TR_X86OpCodes pushOp; int32_t highValue = child->getLongIntHigh(); if (highValue >= -128 && highValue <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, highValue, cg); int32_t lowValue = child->getLongIntLow(); if (lowValue >= -128 && lowValue <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, lowValue, cg); cg->decReferenceCount(child); return NULL; } else if (child->getReferenceCount() == 1) { if (child->getOpCode().isLoad()) { TR::MemoryReference *lowMR = generateX86MemoryReference(child, cg); generateMemInstruction(PUSHMem, child, generateX86MemoryReference(*lowMR, 4, cg), cg); generateMemInstruction(PUSHMem, child, lowMR, cg); lowMR->decNodeReferenceCounts(cg); cg->decReferenceCount(child); return NULL; } else if (child->getOpCodeValue() == TR::lbits2d) { pushRegister = pushLongArg(child->getFirstChild(), cg); cg->decReferenceCount(child); return pushRegister; } } } pushRegister = cg->evaluate(child); TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp); generateRegImmInstruction(SUB4RegImms, child, espReal, 8, cg); if (cg->useSSEForSinglePrecision() && pushRegister->getKind() == TR_FPR) generateMemRegInstruction(MOVSDMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); else generateFPMemRegInstruction(DSTMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); cg->decReferenceCount(child); return pushRegister; }