TR::Register *IA32LinkageUtils::pushLongArg( TR::Node *child, TR::CodeGenerator *cg) { TR::Register *pushRegister; if (child->getRegister() == NULL) { if (child->getOpCode().isLoadConst()) { TR_X86OpCodes pushOp; int32_t highValue = child->getLongIntHigh(); if (highValue >= -128 && highValue <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, highValue, cg); int32_t lowValue = child->getLongIntLow(); if (lowValue >= -128 && lowValue <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, lowValue, cg); cg->decReferenceCount(child); return NULL; } else if (child->getOpCodeValue() == TR::dbits2l && !child->normalizeNanValues() && child->getReferenceCount() == 1) { pushRegister = pushDoubleArg(child->getFirstChild(), cg); cg->decReferenceCount(child); return pushRegister; } else if (child->getOpCode().isMemoryReference() && child->getReferenceCount() == 1) { TR::MemoryReference *lowMR = generateX86MemoryReference(child, cg); generateMemInstruction(PUSHMem, child, generateX86MemoryReference(*lowMR,4, cg), cg); generateMemInstruction(PUSHMem, child, lowMR, cg); lowMR->decNodeReferenceCounts(cg); return NULL; } } pushRegister = cg->evaluate(child); generateRegInstruction(PUSHReg, child, pushRegister->getHighOrder(), cg); generateRegInstruction(PUSHReg, child, pushRegister->getLowOrder(), cg); cg->decReferenceCount(child); return pushRegister; }
rcount_t OMR::CodeGenerator::incReferenceCount(TR::Node *node) { TR::Register *reg = node->getRegister(); #ifdef J9_PROJECT_SPECIFIC #if defined(TR_TARGET_S390) if (reg && reg->getOpaquePseudoRegister()) { TR_OpaquePseudoRegister * pseudoReg = reg->getOpaquePseudoRegister(); TR_StorageReference * pseudoRegStorageReference = pseudoReg->getStorageReference(); TR_ASSERT(pseudoRegStorageReference, "the pseudoReg should have a non-null storage reference\n"); pseudoRegStorageReference->incrementTemporaryReferenceCount(); } #endif #endif rcount_t count = node->incReferenceCount(); if (self()->comp()->getOptions()->getTraceCGOption(TR_TraceCGEvaluation)) { self()->getDebug()->printNodeEvaluation(node, "++ ", reg); } return count; }
void TR::ARM64MemSrc1Instruction::assignRegisters(TR_RegisterKinds kindToBeAssigned) { TR::Machine *machine = cg()->machine(); TR::MemoryReference *mref = getMemoryReference(); TR::Register *sourceVirtual = getSource1Register(); if (getDependencyConditions()) getDependencyConditions()->assignPostConditionRegisters(this, kindToBeAssigned, cg()); sourceVirtual->block(); mref->assignRegisters(this, cg()); sourceVirtual->unblock(); mref->blockRegisters(); TR::RealRegister *assignedRegister = sourceVirtual->getAssignedRealRegister(); if (assignedRegister == NULL) { assignedRegister = machine->assignOneRegister(this, sourceVirtual); } mref->unblockRegisters(); setSource1Register(assignedRegister); if (getDependencyConditions()) getDependencyConditions()->assignPreConditionRegisters(this->getPrev(), kindToBeAssigned, cg()); }
void decFutureUseCounts(uint32_t numberOfRegisters, TR::CodeGenerator *cg) { for (uint32_t i = 0; i< numberOfRegisters; i++) { TR::Register *virtReg = _dependencies[i].getRegister(); virtReg->decFutureUseCount(); } }
// also handles ilload TR::Register *OMR::X86::AMD64::TreeEvaluator::lloadEvaluator(TR::Node *node, TR::CodeGenerator *cg) { TR::MemoryReference *sourceMR = generateX86MemoryReference(node, cg); TR::Register *reg = TR::TreeEvaluator::loadMemory(node, sourceMR, TR_RematerializableLong, node->getOpCode().isIndirect(), cg); reg->setMemRef(sourceMR); node->setRegister(reg); sourceMR->decNodeReferenceCounts(cg); return reg; }
TR::Register *IA32LinkageUtils::pushFloatArg( TR::Node *child, TR::CodeGenerator *cg) { TR::Register *pushRegister; if (child->getRegister() == NULL) { if (child->getOpCodeValue() == TR::fconst) { int32_t value = child->getFloatBits(); TR_X86OpCodes pushOp; if (value >= -128 && value <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, value, cg); cg->decReferenceCount(child); return NULL; } else if (child->getReferenceCount() == 1) { if (child->getOpCode().isLoad()) { TR::MemoryReference *tempMR = generateX86MemoryReference(child, cg); generateMemInstruction(PUSHMem, child, tempMR, cg); tempMR->decNodeReferenceCounts(cg); cg->decReferenceCount(child); return NULL; } else if (child->getOpCodeValue() == TR::ibits2f) { pushRegister = pushIntegerWordArg(child->getFirstChild(), cg); cg->decReferenceCount(child); return pushRegister; } } } pushRegister = cg->evaluate(child); TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp); generateRegImmInstruction(SUB4RegImms, child, espReal, 4, cg); if (cg->useSSEForSinglePrecision() && pushRegister->getKind() == TR_FPR) generateMemRegInstruction(MOVSSMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); else generateFPMemRegInstruction(FSTMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); cg->decReferenceCount(child); return pushRegister; }
TR::Register* OMR::X86::TreeEvaluator::SIMDsplatsEvaluator(TR::Node* node, TR::CodeGenerator* cg) { TR::Node* childNode = node->getChild(0); TR::Register* childReg = cg->evaluate(childNode); TR::Register* resultReg = cg->allocateRegister(TR_VRF); switch (node->getDataType()) { case TR::VectorInt32: generateRegRegInstruction(MOVDRegReg4, node, resultReg, childReg, cg); generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, resultReg, 0x00, cg); // 00 00 00 00 shuffle xxxA to AAAA break; case TR::VectorInt64: if (TR::Compiler->target.is32Bit()) { TR::Register* tempVectorReg = cg->allocateRegister(TR_VRF); generateRegRegInstruction(MOVDRegReg4, node, tempVectorReg, childReg->getHighOrder(), cg); generateRegImmInstruction(PSLLQRegImm1, node, tempVectorReg, 0x20, cg); generateRegRegInstruction(MOVDRegReg4, node, resultReg, childReg->getLowOrder(), cg); generateRegRegInstruction(PORRegReg, node, resultReg, tempVectorReg, cg); cg->stopUsingRegister(tempVectorReg); } else { generateRegRegInstruction(MOVQRegReg8, node, resultReg, childReg, cg); } generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, resultReg, 0x44, cg); // 01 00 01 00 shuffle xxBA to BABA break; case TR::VectorFloat: generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, childReg, 0x00, cg); // 00 00 00 00 shuffle xxxA to AAAA break; case TR::VectorDouble: generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, childReg, 0x44, cg); // 01 00 01 00 shuffle xxBA to BABA break; default: if (cg->comp()->getOption(TR_TraceCG)) traceMsg(cg->comp(), "Unsupported data type, Node = %p\n", node); TR_ASSERT(false, "Unsupported data type"); break; } node->setRegister(resultReg); cg->decReferenceCount(childNode); return resultReg; }
// Create a NoReg dependency for each child of a call that has been evaluated into a register. // Ignore children that do not have a register since their live range should not persist outside of // the helper call stream. // TR::RegisterDependencyConditions *TR_OutlinedInstructions::formEvaluatedArgumentDepList() { int32_t i, c=0; for (i=_callNode->getFirstArgumentIndex(); i<_callNode->getNumChildren(); i++) { TR::Register *reg = _callNode->getChild(i)->getRegister(); if (reg) { TR::RegisterPair *regPair = reg->getRegisterPair(); c += regPair? 2 : 1; } } TR::RegisterDependencyConditions *depConds = NULL; if (c) { TR::Machine *machine = _cg->machine(); depConds = generateRegisterDependencyConditions(0, c, _cg); for (i=_callNode->getFirstArgumentIndex(); i<_callNode->getNumChildren(); i++) { TR::Register *reg = _callNode->getChild(i)->getRegister(); if (reg) { TR::RegisterPair *regPair = reg->getRegisterPair(); if (regPair) { depConds->addPostCondition(regPair->getLowOrder(), TR::RealRegister::NoReg, _cg); depConds->addPostCondition(regPair->getHighOrder(), TR::RealRegister::NoReg, _cg); } else { depConds->addPostCondition(reg, TR::RealRegister::NoReg, _cg); } } } depConds->stopAddingConditions(); } return depConds; }
TR::Register * OMR::X86::I386::CodeGenerator::longClobberEvaluate(TR::Node *node) { TR_ASSERT(node->getOpCode().is8Byte(), "assertion failure"); if (node->getReferenceCount() > 1) { TR::Register *temp = self()->evaluate(node); TR::Register *lowReg = self()->allocateRegister(); TR::Register *highReg = self()->allocateRegister(); TR::RegisterPair *longReg = self()->allocateRegisterPair(lowReg, highReg); generateRegRegInstruction(MOV4RegReg, node, lowReg, temp->getLowOrder(), self()); generateRegRegInstruction(MOV4RegReg, node, highReg, temp->getHighOrder(), self()); return longReg; } else { return self()->evaluate(node); } }
void TR::ARM64Trg1MemInstruction::assignRegisters(TR_RegisterKinds kindToBeAssigned) { TR::Machine *machine = cg()->machine(); TR::MemoryReference *mref = getMemoryReference(); TR::Register *targetVirtual = getTargetRegister(); if (getDependencyConditions()) getDependencyConditions()->assignPostConditionRegisters(this, kindToBeAssigned, cg()); mref->blockRegisters(); setTargetRegister(machine->assignOneRegister(this, targetVirtual)); mref->unblockRegisters(); targetVirtual->block(); mref->assignRegisters(this, cg()); targetVirtual->unblock(); if (getDependencyConditions()) getDependencyConditions()->assignPreConditionRegisters(this->getPrev(), kindToBeAssigned, cg()); }
void OMR::X86::Instruction::clobberRegsForRematerialisation() { // We assume most instructions modify all registers that appear in their // postconditions, with a few exceptions. // if ( self()->cg()->enableRematerialisation() && self()->getDependencyConditions() && (self()->getOpCodeValue() != ASSOCREGS) // reg associations aren't really instructions, so they don't modify anything && (self()->getOpCodeValue() != LABEL) // labels must already be handled properly for a variety of reasons && (!self()->getOpCode().isShiftOp()) && (!self()->getOpCode().isRotateOp()) // shifts and rotates often have a postcondition on ecx but don't clobber it ){ // Check the live discardable register list to see if this is the first // instruction that kills the rematerialisable range of a register. // TR::ClobberingInstruction *clob = NULL; TR_X86RegisterDependencyGroup *post = self()->getDependencyConditions()->getPostConditions(); for (uint32_t i = 0; i < self()->getDependencyConditions()->getNumPostConditions(); i++) { TR::Register *reg = post->getRegisterDependency(i)->getRegister(); if (reg->isDiscardable()) { if (!clob) { clob = new (self()->cg()->trHeapMemory()) TR::ClobberingInstruction(self(), self()->cg()->trMemory()); self()->cg()->addClobberingInstruction(clob); } clob->addClobberedRegister(reg); self()->cg()->removeLiveDiscardableRegister(reg); self()->cg()->clobberLiveDependentDiscardableRegisters(clob, reg); if (debug("dumpRemat")) diagnostic("---> Clobbering %s discardable postcondition register %s at instruction %s\n", self()->cg()->getDebug()->toString(reg->getRematerializationInfo()), self()->cg()->getDebug()->getName(reg), self()->cg()->getDebug()->getName(self())); } } } }
rcount_t OMR::CodeGenerator::decReferenceCount(TR::Node * node) { TR::Register *reg = node->getRegister(); // restricted registers go dead when ref count==2 because // their ref count was inced in prepareNodeForInstructionSelection if ((node->getReferenceCount() == 1) && reg && self()->getLiveRegisters(reg->getKind())) { TR_ASSERT(reg->isLive() || (diagnostic("\n*** Error: Register %s for node " "[%s] died prematurely\n", reg->getRegisterName(self()->comp()), node->getName(self()->comp()->getDebug())), 0), "Node %s register should be live",self()->getDebug()->getName(node)); TR_LiveRegisterInfo *liveRegister = reg->getLiveRegisterInfo(); TR::Register *pair = reg->getRegisterPair(); if (pair) { pair->getHighOrder()->getLiveRegisterInfo()->decNodeCount(); pair->getLowOrder()->getLiveRegisterInfo()->decNodeCount(); } if (liveRegister && liveRegister->decNodeCount() == 0) { // The register is now dead // self()->getLiveRegisters(reg->getKind())->registerIsDead(reg); } } #ifdef J9_PROJECT_SPECIFIC #if defined(TR_TARGET_S390) if (reg && reg->getOpaquePseudoRegister()) { TR_OpaquePseudoRegister *pseudoReg = reg->getOpaquePseudoRegister(); TR_StorageReference *storageReference = pseudoReg->getStorageReference(); TR_ASSERT(storageReference,"the pseudoReg should have a non-null storage reference\n"); storageReference->decrementTemporaryReferenceCount(); if (node->getReferenceCount() == 1) { storageReference->decOwningRegisterCount(); if (self()->traceBCDCodeGen()) traceMsg(self()->comp(),"\tdecrement owningRegisterCount %d->%d on ref #%d (%s) for reg %s as %s (%p) refCount == 1 (going to 0)\n", storageReference->getOwningRegisterCount()+1, storageReference->getOwningRegisterCount(), storageReference->getReferenceNumber(), self()->getDebug()->getName(storageReference->getSymbol()), self()->getDebug()->getName(reg), node->getOpCode().getName(), node); } } else if (node->getOpCode().hasSymbolReference() && node->getSymbolReference() && node->getSymbolReference()->isTempVariableSizeSymRef()) { TR_ASSERT(false,"tempMemSlots should only be attached to pseudoRegisters and not node %p\n",node); } #endif #endif rcount_t count = node->decReferenceCount(); if (self()->comp()->getOptions()->getTraceCGOption(TR_TraceCGEvaluation)) { self()->getDebug()->printNodeEvaluation(node, "-- ", reg); } return count; }
void TR_OutlinedInstructions::assignRegisters(TR_RegisterKinds kindsToBeAssigned, TR::X86VFPSaveInstruction *vfpSaveInstruction) { if (hasBeenRegisterAssigned()) return; // nested internal control flow assert: _cg->setInternalControlFlowSafeNestingDepth(_cg->internalControlFlowNestingDepth()); // Create a dependency list on the first instruction in this stream that captures all // current real register associations. This is necessary to get the register assigner // back into its original state before the helper stream was processed. // TR::RegisterDependencyConditions *liveRealRegDeps = _cg->machine()->createDepCondForLiveGPRs(); _firstInstruction->setDependencyConditions(liveRealRegDeps); #if 0 // If the outlined section jumps back to a section that's expecting a certain register // state then add register dependencies on the exit branch to set that state. // if (_postDependencyMergeList) { TR::RegisterDependencyConditions *mergeDeps = _postDependencyMergeList->clone(_cg); TR_ASSERT(_appendInstruction->getDependencyConditions() == NULL, "unexpected reg deps on OOL append instruction"); _appendInstruction->setDependencyConditions(mergeDeps); TR_X86RegisterDependencyGroup *depGroup = mergeDeps->getPostConditions(); for (int32_t i=0; i<mergeDeps->getNumPostConditions(); i++) { TR::RegisterDependency *dependency = depGroup->getRegisterDependency(i); TR::Register *virtReg = dependency->getRegister(); virtReg->incTotalUseCount(); virtReg->incFutureUseCount(); #ifdef DEBUG // Ensure all register dependencies have been assigned. // TR_ASSERT(dependency->getRealRegister() != TR::RealRegister::NoReg, "unassigned merge dep register"); TR_ASSERT(virtReg->getAssignedRealRegister() == _cg->machine()->getX86RealRegister(dependency->getRealRegister()), "unexpected(?) register assignment"); #endif } } #endif // TODO:AMD64: Fix excessive register assignment exchanges in outlined instruction dispatch. // Ensure correct VFP state at the start of the outlined instruction sequence. // generateVFPRestoreInstruction(comp()->getAppendInstruction(), vfpSaveInstruction, _cg); // Link in the helper stream into the mainline code. // TR::Instruction *appendInstruction = comp()->getAppendInstruction(); appendInstruction->setNext(_firstInstruction); _firstInstruction->setPrev(appendInstruction); comp()->setAppendInstruction(_appendInstruction); // Register assign the helper dispatch instructions. // _cg->doBackwardsRegisterAssignment(kindsToBeAssigned, _appendInstruction, appendInstruction); // Returning to mainline, reset this counter _cg->setInternalControlFlowSafeNestingDepth(0); setHasBeenRegisterAssigned(true); }
// Build arguments for system linkage dispatch. // int32_t TR::AMD64SystemLinkage::buildArgs( TR::Node *callNode, TR::RegisterDependencyConditions *deps) { TR::SymbolReference *methodSymRef = callNode->getSymbolReference(); TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol(); TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg; TR::RealRegister *espReal = machine()->getX86RealRegister(TR::RealRegister::esp); int32_t firstNodeArgument = callNode->getFirstArgumentIndex(); int32_t lastNodeArgument = callNode->getNumChildren() - 1; int32_t offset = 0; int32_t sizeOfOutGoingArgs= 0; uint16_t numIntArgs = 0, numFloatArgs = 0; int32_t first, last, direction; int32_t numCopiedRegs = 0; TR::Register *copiedRegs[TR::X86LinkageProperties::MaxArgumentRegisters]; if (getProperties().passArgsRightToLeft()) { first = lastNodeArgument; last = firstNodeArgument - 1; direction = -1; } else { first = firstNodeArgument; last = lastNodeArgument + 1; direction = 1; } // If the dispatch is indirect we must add the VFT register to the preconditions // so that it gets register assigned with the other preconditions to the call. // if (callNode->getOpCode().isIndirect()) { TR::Node *vftChild = callNode->getFirstChild(); TR_ASSERT(vftChild->getRegister(), "expecting VFT child to be evaluated"); TR::RealRegister::RegNum scratchRegIndex = getProperties().getIntegerScratchRegister(1); deps->addPreCondition(vftChild->getRegister(), scratchRegIndex, cg()); } int32_t i; for (i = first; i != last; i += direction) { TR::parmLayoutResult layoutResult; TR::RealRegister::RegNum rregIndex = noReg; TR::Node *child = callNode->getChild(i); layoutParm(child, sizeOfOutGoingArgs, numIntArgs, numFloatArgs, layoutResult); if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG_PAIR) { // TODO: AMD64 SysV ABI might put a struct into a pair of linkage registerr TR_ASSERT(false, "haven't support linkage_reg_pair yet.\n"); } else if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG) { TR_RegisterKinds regKind = layoutResult.regs[0].regKind; uint32_t regIndex = layoutResult.regs[0].regIndex; TR_ASSERT(regKind == TR_GPR || regKind == TR_FPR, "linkage registers includes TR_GPR and TR_FPR\n"); rregIndex = (regKind == TR_FPR) ? getProperties().getFloatArgumentRegister(regIndex): getProperties().getIntegerArgumentRegister(regIndex); } else { offset = layoutResult.offset; } TR::Register *vreg; vreg = cg()->evaluate(child); bool needsStackOffsetUpdate = false; if (rregIndex != noReg) { // For NULL JNI reference parameters, it is possible that the NULL value will be evaluated into // a different register than the child. In that case it is not necessary to copy the temporary scratch // register across the call. // if ((child->getReferenceCount() > 1) && (vreg == child->getRegister())) { TR::Register *argReg = cg()->allocateRegister(); if (vreg->containsCollectedReference()) argReg->setContainsCollectedReference(); generateRegRegInstruction(TR::Linkage::movOpcodes(RegReg, movType(child->getDataType())), child, argReg, vreg, cg()); vreg = argReg; copiedRegs[numCopiedRegs++] = vreg; } deps->addPreCondition(vreg, rregIndex, cg()); } else { // Ideally, we would like to push rather than move generateMemRegInstruction(TR::Linkage::movOpcodes(MemReg, fullRegisterMovType(vreg)), child, generateX86MemoryReference(espReal, offset, cg()), vreg, cg()); } cg()->decReferenceCount(child); } // Now that we're finished making the preconditions, all the interferences // are established and we can kill these regs. // for (i = 0; i < numCopiedRegs; i++) cg()->stopUsingRegister(copiedRegs[i]); deps->stopAddingPreConditions(); return sizeOfOutGoingArgs; }
TR::Register * TR::AMD64SystemLinkage::buildVolatileAndReturnDependencies( TR::Node *callNode, TR::RegisterDependencyConditions *deps) { if (callNode->getOpCode().isIndirect()) { TR::Node *vftChild = callNode->getFirstChild(); if (vftChild->getRegister() && (vftChild->getReferenceCount() > 1)) { } else { // VFT child dies here; decrement it early so it doesn't interfere with dummy regs. cg()->recursivelyDecReferenceCount(vftChild); } } TR_ASSERT(deps != NULL, "expected register dependencies"); // Figure out which is the return register. // TR::RealRegister::RegNum returnRegIndex; TR_RegisterKinds returnKind; switch (callNode->getDataType()) { case TR::NoType: returnRegIndex = TR::RealRegister::NoReg; returnKind = TR_NoRegister; break; case TR::Int8: case TR::Int16: case TR::Int32: case TR::Int64: case TR::Address: returnRegIndex = getProperties().getIntegerReturnRegister(); returnKind = TR_GPR; break; case TR::Float: case TR::Double: returnRegIndex = getProperties().getFloatReturnRegister(); returnKind = TR_FPR; break; case TR::Aggregate: default: TR_ASSERT(false, "Unrecognized call node data type: #%d", (int)callNode->getDataType()); break; } // Kill all non-preserved int and float regs besides the return register. // int32_t i; TR::RealRegister::RegNum scratchIndex = getProperties().getIntegerScratchRegister(1); for (i=0; i<getProperties().getNumVolatileRegisters(); i++) { TR::RealRegister::RegNum regIndex = getProperties()._volatileRegisters[i]; if (regIndex != returnRegIndex) { TR_RegisterKinds rk = (i < getProperties()._numberOfVolatileGPRegisters) ? TR_GPR : TR_FPR; TR::Register *dummy = cg()->allocateRegister(rk); deps->addPostCondition(dummy, regIndex, cg()); // Note that we don't setPlaceholderReg here. If this volatile reg is also volatile // in the caller's linkage, then that flag doesn't matter much anyway. If it's preserved // in the caller's linkage, then we don't want to set that flag because we want this // use of the register to count as a "real" use, thereby motivating the prologue to // preserve the register. // A scratch register is necessary to call the native without a trampoline. // if (callNode->getOpCode().isIndirect() || (regIndex != scratchIndex)) cg()->stopUsingRegister(dummy); } } #if defined (PYTHON) && 0 // Evict the preserved registers across the call // for (i=0; i<getProperties().getNumberOfPreservedGPRegisters(); i++) { TR::RealRegister::RegNum regIndex = getProperties()._preservedRegisters[i]; TR::Register *dummy = cg()->allocateRegister(TR_GPR); deps->addPostCondition(dummy, regIndex, cg()); // Note that we don't setPlaceholderReg here. If this volatile reg is also volatile // in the caller's linkage, then that flag doesn't matter much anyway. If it's preserved // in the caller's linkage, then we don't want to set that flag because we want this // use of the register to count as a "real" use, thereby motivating the prologue to // preserve the register. // A scratch register is necessary to call the native without a trampoline. // if (callNode->getOpCode().isIndirect() || (regIndex != scratchIndex)) cg()->stopUsingRegister(dummy); } #endif if (callNode->getOpCode().isIndirect()) { TR::Node *vftChild = callNode->getFirstChild(); if (vftChild->getRegister() && (vftChild->getReferenceCount() > 1)) { // VFT child survives the call, so we must include it in the postconditions. deps->addPostCondition(vftChild->getRegister(), TR::RealRegister::NoReg, cg()); cg()->recursivelyDecReferenceCount(vftChild); } } // Now that everything is dead, we can allocate the return register without // interference // TR::Register *returnRegister; if (returnRegIndex) { TR_ASSERT(returnKind != TR_NoRegister, "assertion failure"); if (callNode->getDataType() == TR::Address) returnRegister = cg()->allocateCollectedReferenceRegister(); else { returnRegister = cg()->allocateRegister(returnKind); if (callNode->getDataType() == TR::Float) returnRegister->setIsSinglePrecision(); } deps->addPostCondition(returnRegister, returnRegIndex, cg()); } else returnRegister = NULL; // The reg dependency is left open intentionally, and need to be closed by // the caller. The reason is because, child class might call this method, while // adding more register dependecies; if we close the reg dependency here, // the child class won't be able to add more register dependencies. return returnRegister; }
int32_t TR::ARM64SystemLinkage::buildArgs(TR::Node *callNode, TR::RegisterDependencyConditions *dependencies) { const TR::ARM64LinkageProperties &properties = getProperties(); TR::ARM64MemoryArgument *pushToMemory = NULL; TR::Register *argMemReg; TR::Register *tempReg; int32_t argIndex = 0; int32_t numMemArgs = 0; int32_t argSize = 0; int32_t numIntegerArgs = 0; int32_t numFloatArgs = 0; int32_t totalSize; int32_t i; TR::Node *child; TR::DataType childType; TR::DataType resType = callNode->getType(); uint32_t firstArgumentChild = callNode->getFirstArgumentIndex(); /* Step 1 - figure out how many arguments are going to be spilled to memory i.e. not in registers */ for (i = firstArgumentChild; i < callNode->getNumChildren(); i++) { child = callNode->getChild(i); childType = child->getDataType(); switch (childType) { case TR::Int8: case TR::Int16: case TR::Int32: case TR::Int64: case TR::Address: if (numIntegerArgs >= properties.getNumIntArgRegs()) numMemArgs++; numIntegerArgs++; break; case TR::Float: case TR::Double: if (numFloatArgs >= properties.getNumFloatArgRegs()) numMemArgs++; numFloatArgs++; break; default: TR_ASSERT(false, "Argument type %s is not supported\n", childType.toString()); } } // From here, down, any new stack allocations will expire / die when the function returns TR::StackMemoryRegion stackMemoryRegion(*trMemory()); /* End result of Step 1 - determined number of memory arguments! */ if (numMemArgs > 0) { pushToMemory = new (trStackMemory()) TR::ARM64MemoryArgument[numMemArgs]; argMemReg = cg()->allocateRegister(); } totalSize = numMemArgs * 8; // align to 16-byte boundary totalSize = (totalSize + 15) & (~15); numIntegerArgs = 0; numFloatArgs = 0; for (i = firstArgumentChild; i < callNode->getNumChildren(); i++) { TR::MemoryReference *mref = NULL; TR::Register *argRegister; TR::InstOpCode::Mnemonic op; child = callNode->getChild(i); childType = child->getDataType(); switch (childType) { case TR::Int8: case TR::Int16: case TR::Int32: case TR::Int64: case TR::Address: if (childType == TR::Address) argRegister = pushAddressArg(child); else if (childType == TR::Int64) argRegister = pushLongArg(child); else argRegister = pushIntegerWordArg(child); if (numIntegerArgs < properties.getNumIntArgRegs()) { if (!cg()->canClobberNodesRegister(child, 0)) { if (argRegister->containsCollectedReference()) tempReg = cg()->allocateCollectedReferenceRegister(); else tempReg = cg()->allocateRegister(); generateMovInstruction(cg(), callNode, tempReg, argRegister); argRegister = tempReg; } if (numIntegerArgs == 0 && (resType.isAddress() || resType.isInt32() || resType.isInt64())) { TR::Register *resultReg; if (resType.isAddress()) resultReg = cg()->allocateCollectedReferenceRegister(); else resultReg = cg()->allocateRegister(); dependencies->addPreCondition(argRegister, TR::RealRegister::x0); dependencies->addPostCondition(resultReg, TR::RealRegister::x0); } else { addDependency(dependencies, argRegister, properties.getIntegerArgumentRegister(numIntegerArgs), TR_GPR, cg()); } } else { // numIntegerArgs >= properties.getNumIntArgRegs() if (childType == TR::Address || childType == TR::Int64) { op = TR::InstOpCode::strpostx; } else { op = TR::InstOpCode::strpostw; } mref = getOutgoingArgumentMemRef(argMemReg, argRegister, op, pushToMemory[argIndex++]); argSize += 8; // always 8-byte aligned } numIntegerArgs++; break; case TR::Float: case TR::Double: if (childType == TR::Float) argRegister = pushFloatArg(child); else argRegister = pushDoubleArg(child); if (numFloatArgs < properties.getNumFloatArgRegs()) { if (!cg()->canClobberNodesRegister(child, 0)) { tempReg = cg()->allocateRegister(TR_FPR); op = (childType == TR::Float) ? TR::InstOpCode::fmovs : TR::InstOpCode::fmovd; generateTrg1Src1Instruction(cg(), op, callNode, tempReg, argRegister); argRegister = tempReg; } if ((numFloatArgs == 0 && resType.isFloatingPoint())) { TR::Register *resultReg; if (resType.getDataType() == TR::Float) resultReg = cg()->allocateSinglePrecisionRegister(); else resultReg = cg()->allocateRegister(TR_FPR); dependencies->addPreCondition(argRegister, TR::RealRegister::v0); dependencies->addPostCondition(resultReg, TR::RealRegister::v0); } else { addDependency(dependencies, argRegister, properties.getFloatArgumentRegister(numFloatArgs), TR_FPR, cg()); } } else { // numFloatArgs >= properties.getNumFloatArgRegs() if (childType == TR::Double) { op = TR::InstOpCode::vstrpostd; } else { op = TR::InstOpCode::vstrposts; } mref = getOutgoingArgumentMemRef(argMemReg, argRegister, op, pushToMemory[argIndex++]); argSize += 8; // always 8-byte aligned } numFloatArgs++; break; } // end of switch } // end of for // NULL deps for non-preserved and non-system regs while (numIntegerArgs < properties.getNumIntArgRegs()) { if (numIntegerArgs == 0 && resType.isAddress()) { dependencies->addPreCondition(cg()->allocateRegister(), properties.getIntegerArgumentRegister(0)); dependencies->addPostCondition(cg()->allocateCollectedReferenceRegister(), properties.getIntegerArgumentRegister(0)); } else { addDependency(dependencies, NULL, properties.getIntegerArgumentRegister(numIntegerArgs), TR_GPR, cg()); } numIntegerArgs++; } int32_t floatRegsUsed = (numFloatArgs > properties.getNumFloatArgRegs()) ? properties.getNumFloatArgRegs() : numFloatArgs; for (i = (TR::RealRegister::RegNum)((uint32_t)TR::RealRegister::v0 + floatRegsUsed); i <= TR::RealRegister::LastFPR; i++) { if (!properties.getPreserved((TR::RealRegister::RegNum)i)) { // NULL dependency for non-preserved regs addDependency(dependencies, NULL, (TR::RealRegister::RegNum)i, TR_FPR, cg()); } } if (numMemArgs > 0) { TR::RealRegister *sp = cg()->machine()->getRealRegister(properties.getStackPointerRegister()); generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, callNode, argMemReg, sp, totalSize); for (argIndex = 0; argIndex < numMemArgs; argIndex++) { TR::Register *aReg = pushToMemory[argIndex].argRegister; generateMemSrc1Instruction(cg(), pushToMemory[argIndex].opCode, callNode, pushToMemory[argIndex].argMemory, aReg); cg()->stopUsingRegister(aReg); } cg()->stopUsingRegister(argMemReg); } return totalSize; }
void TR_PPCRegisterDependencyGroup::assignRegisters(TR::Instruction *currentInstruction, TR_RegisterKinds kindToBeAssigned, uint32_t numberOfRegisters, TR::CodeGenerator *cg) { // *this swipeable for debugging purposes TR::Machine *machine = cg->machine(); TR::Register *virtReg; TR::RealRegister::RegNum dependentRegNum; TR::RealRegister *dependentRealReg, *assignedRegister, *realReg; int i, j; TR::Compilation *comp = cg->comp(); int num_gprs = 0; int num_fprs = 0; int num_vrfs = 0; // Use to do lookups using real register numbers TR_PPCRegisterDependencyMap map(_dependencies, numberOfRegisters); if (!comp->getOption(TR_DisableOOL)) { for (i = 0; i< numberOfRegisters; i++) { virtReg = _dependencies[i].getRegister(); dependentRegNum = _dependencies[i].getRealRegister(); if (dependentRegNum == TR::RealRegister::SpilledReg) { TR_ASSERT(virtReg->getBackingStorage(),"should have a backing store if dependentRegNum == spillRegIndex()\n"); if (virtReg->getAssignedRealRegister()) { // this happens when the register was first spilled in main line path then was reverse spilled // and assigned to a real register in OOL path. We protected the backing store when doing // the reverse spill so we could re-spill to the same slot now traceMsg (comp,"\nOOL: Found register spilled in main line and re-assigned inside OOL"); TR::Node *currentNode = currentInstruction->getNode(); TR::RealRegister *assignedReg = toRealRegister(virtReg->getAssignedRegister()); TR::MemoryReference *tempMR = new (cg->trHeapMemory()) TR::MemoryReference(currentNode, (TR::SymbolReference*)virtReg->getBackingStorage()->getSymbolReference(), sizeof(uintptr_t), cg); TR::InstOpCode::Mnemonic opCode; TR_RegisterKinds rk = virtReg->getKind(); switch (rk) { case TR_GPR: opCode =TR::InstOpCode::Op_load; break; case TR_FPR: opCode = virtReg->isSinglePrecision() ? TR::InstOpCode::lfs : TR::InstOpCode::lfd; break; default: TR_ASSERT(0, "\nRegister kind not supported in OOL spill\n"); break; } TR::Instruction *inst = generateTrg1MemInstruction(cg, opCode, currentNode, assignedReg, tempMR, currentInstruction); assignedReg->setAssignedRegister(NULL); virtReg->setAssignedRegister(NULL); assignedReg->setState(TR::RealRegister::Free); if (comp->getDebug()) cg->traceRegisterAssignment("Generate reload of virt %s due to spillRegIndex dep at inst %p\n",comp->getDebug()->getName(virtReg),currentInstruction); cg->traceRAInstruction(inst); } if (!(std::find(cg->getSpilledRegisterList()->begin(), cg->getSpilledRegisterList()->end(), virtReg) != cg->getSpilledRegisterList()->end())) cg->getSpilledRegisterList()->push_front(virtReg); } // we also need to free up all locked backing storage if we are exiting the OOL during backwards RA assignment else if (currentInstruction->isLabel() && virtReg->getAssignedRealRegister()) { TR::PPCLabelInstruction *labelInstr = (TR::PPCLabelInstruction *)currentInstruction; TR_BackingStore * location = virtReg->getBackingStorage(); TR_RegisterKinds rk = virtReg->getKind(); int32_t dataSize; if (labelInstr->getLabelSymbol()->isStartOfColdInstructionStream() && location) { traceMsg (comp,"\nOOL: Releasing backing storage (%p)\n", location); if (rk == TR_GPR) dataSize = TR::Compiler->om.sizeofReferenceAddress(); else dataSize = 8; location->setMaxSpillDepth(0); cg->freeSpill(location,dataSize,0); virtReg->setBackingStorage(NULL); } } } } for (i = 0; i < numberOfRegisters; i++) { map.addDependency(_dependencies[i], i); virtReg = _dependencies[i].getRegister(); dependentRegNum = _dependencies[i].getRealRegister(); if (dependentRegNum != TR::RealRegister::SpilledReg) { if (virtReg->getKind() == TR_GPR) num_gprs++; else if (virtReg->getKind() == TR_FPR) num_fprs++; else if (virtReg->getKind() == TR_VRF) num_vrfs++; } } #ifdef DEBUG int locked_gprs = 0; int locked_fprs = 0; int locked_vrfs = 0; // count up how many registers are locked for each type for(i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastGPR; i++) { realReg = machine->getPPCRealRegister((TR::RealRegister::RegNum)i); if (realReg->getState() == TR::RealRegister::Locked) locked_gprs++; } for(i = TR::RealRegister::FirstFPR; i <= TR::RealRegister::LastFPR; i++) { realReg = machine->getPPCRealRegister((TR::RealRegister::RegNum)i); if (realReg->getState() == TR::RealRegister::Locked) locked_fprs++; } for(i = TR::RealRegister::FirstVRF; i <= TR::RealRegister::LastVRF; i++) { realReg = machine->getPPCRealRegister((TR::RealRegister::RegNum)i); if (realReg->getState() == TR::RealRegister::Locked) locked_vrfs++; } TR_ASSERT( locked_gprs == machine->getNumberOfLockedRegisters(TR_GPR),"Inconsistent number of locked GPRs"); TR_ASSERT( locked_fprs == machine->getNumberOfLockedRegisters(TR_FPR),"Inconsistent number of locked FPRs"); TR_ASSERT( locked_vrfs == machine->getNumberOfLockedRegisters(TR_VRF), "Inconsistent number of locked VRFs"); #endif // To handle circular dependencies, we block a real register if (1) it is already assigned to a correct // virtual register and (2) if it is assigned to one register in the list but is required by another. // However, if all available registers are requested, we do not block in case (2) to avoid all registers // being blocked. bool block_gprs = true; bool block_fprs = true; bool block_vrfs = true; TR_ASSERT(num_gprs <= (TR::RealRegister::LastGPR - TR::RealRegister::FirstGPR + 1 - machine->getNumberOfLockedRegisters(TR_GPR)), "Too many GPR dependencies, unable to assign" ); TR_ASSERT(num_fprs <= (TR::RealRegister::LastFPR - TR::RealRegister::FirstFPR + 1 - machine->getNumberOfLockedRegisters(TR_FPR)), "Too many FPR dependencies, unable to assign" ); TR_ASSERT(num_vrfs <= (TR::RealRegister::LastVRF - TR::RealRegister::FirstVRF + 1 - machine->getNumberOfLockedRegisters(TR_VRF)), "Too many VRF dependencies, unable to assign" ); if (num_gprs == (TR::RealRegister::LastGPR - TR::RealRegister::FirstGPR + 1 - machine->getNumberOfLockedRegisters(TR_GPR))) block_gprs = false; if (num_fprs == (TR::RealRegister::LastFPR - TR::RealRegister::FirstFPR + 1 - machine->getNumberOfLockedRegisters(TR_FPR))) block_fprs = false; if (num_vrfs == (TR::RealRegister::LastVRF - TR::RealRegister::FirstVRF + 1 - machine->getNumberOfLockedRegisters(TR_VRF))) block_vrfs = false; for (i = 0; i < numberOfRegisters; i++) { virtReg = _dependencies[i].getRegister(); if (virtReg->getAssignedRealRegister()!=NULL) { if (_dependencies[i].getRealRegister() == TR::RealRegister::NoReg) { virtReg->block(); } else { TR::RealRegister::RegNum assignedRegNum; assignedRegNum = toRealRegister(virtReg->getAssignedRealRegister())->getRegisterNumber(); // always block if required register and assigned register match; // block if assigned register is required by other dependency but only if // any spare registers are left to avoid blocking all existing registers if (_dependencies[i].getRealRegister() == assignedRegNum || (map.getDependencyWithTarget(assignedRegNum) && ((virtReg->getKind() != TR_GPR || block_gprs) && (virtReg->getKind() != TR_FPR || block_fprs) && (virtReg->getKind() != TR_VRF || block_vrfs)))) { virtReg->block(); } } } } // Assign all virtual regs that depend on a specific real reg that is free for (i = 0; i < numberOfRegisters; i++) { virtReg = _dependencies[i].getRegister(); dependentRegNum = _dependencies[i].getRealRegister(); dependentRealReg = machine->getPPCRealRegister(dependentRegNum); if (dependentRegNum != TR::RealRegister::NoReg && dependentRegNum != TR::RealRegister::SpilledReg && dependentRealReg->getState() == TR::RealRegister::Free) { assignFreeRegisters(currentInstruction, &_dependencies[i], map, cg); } } // Assign all virtual regs that depend on a specfic real reg that is not free for (i = 0; i < numberOfRegisters; i++) { virtReg = _dependencies[i].getRegister(); assignedRegister = NULL; if (virtReg->getAssignedRealRegister() != NULL) { assignedRegister = toRealRegister(virtReg->getAssignedRealRegister()); } dependentRegNum = _dependencies[i].getRealRegister(); dependentRealReg = machine->getPPCRealRegister(dependentRegNum); if (dependentRegNum != TR::RealRegister::NoReg && dependentRegNum != TR::RealRegister::SpilledReg && dependentRealReg != assignedRegister) { bool depsBlocked = false; switch (_dependencies[i].getRegister()->getKind()) { case TR_GPR: depsBlocked = block_gprs; break; case TR_FPR: depsBlocked = block_fprs; break; case TR_VRF: depsBlocked = block_vrfs; break; } assignContendedRegisters(currentInstruction, &_dependencies[i], map, depsBlocked, cg); } } // Assign all virtual regs that depend on NoReg but exclude gr0 for (i=0; i<numberOfRegisters; i++) { if (_dependencies[i].getRealRegister() == TR::RealRegister::NoReg && _dependencies[i].getExcludeGPR0()) { TR::RealRegister *realOne; virtReg = _dependencies[i].getRegister(); realOne = virtReg->getAssignedRealRegister(); if (realOne!=NULL && toRealRegister(realOne)->getRegisterNumber()==TR::RealRegister::gr0) { if ((assignedRegister = machine->findBestFreeRegister(currentInstruction, virtReg->getKind(), true, false, virtReg)) == NULL) { assignedRegister = machine->freeBestRegister(currentInstruction, virtReg, NULL, true); } machine->coerceRegisterAssignment(currentInstruction, virtReg, assignedRegister->getRegisterNumber()); } else if (realOne == NULL) { machine->assignOneRegister(currentInstruction, virtReg, true); } virtReg->block(); } } // Assign all virtual regs that depend on NoReg for (i=0; i<numberOfRegisters; i++) { if (_dependencies[i].getRealRegister() == TR::RealRegister::NoReg && !_dependencies[i].getExcludeGPR0()) { TR::RealRegister *realOne; virtReg = _dependencies[i].getRegister(); realOne = virtReg->getAssignedRealRegister(); if (!realOne) { machine->assignOneRegister(currentInstruction, virtReg, false); } virtReg->block(); } } unblockRegisters(numberOfRegisters); for (i = 0; i < numberOfRegisters; i++) { TR::Register *dependentRegister = getRegisterDependency(i)->getRegister(); // dependentRegister->getAssignedRegister() is NULL if the reg has already been spilled due to a spilledReg dep if (comp->getOption(TR_DisableOOL) || (!(cg->isOutOfLineColdPath()) && !(cg->isOutOfLineHotPath()))) { TR_ASSERT(dependentRegister->getAssignedRegister(), "assignedRegister can not be NULL"); } if (dependentRegister->getAssignedRegister()) { TR::RealRegister *assignedRegister = dependentRegister->getAssignedRegister()->getRealRegister(); if (getRegisterDependency(i)->getRealRegister() == TR::RealRegister::NoReg) getRegisterDependency(i)->setRealRegister(toRealRegister(assignedRegister)->getRegisterNumber()); machine->decFutureUseCountAndUnlatch(dependentRegister); } } }
static void assignContendedRegisters(TR::Instruction *currentInstruction, TR::RegisterDependency *dep, TR_PPCRegisterDependencyMap& map, bool depsBlocked, TR::CodeGenerator *cg) { // *this swipeable for debugging purposes TR::Machine *machine = cg->machine(); dep = findDependencyChainHead(dep, map); TR::Register *virtReg = dep->getRegister(); TR::RealRegister::RegNum targetRegNum = dep->getRealRegister(); TR::RealRegister *targetReg = machine->getPPCRealRegister(targetRegNum); TR::RealRegister *assignedReg = virtReg->getAssignedRealRegister() ? toRealRegister(virtReg->getAssignedRealRegister()) : NULL; // Chain of length 1 if (!assignedReg || !map.getDependencyWithTarget(assignedReg->getRegisterNumber())) { machine->coerceRegisterAssignment(currentInstruction, virtReg, targetRegNum); virtReg->block(); return; } // Chain of length 2, handled here instead of below to get 3*xor exchange on GPRs if (map.getDependencyWithTarget(assignedReg->getRegisterNumber()) == map.getDependencyWithAssigned(targetRegNum)) { TR::Register *targetVirtReg = targetReg->getAssignedRegister(); machine->coerceRegisterAssignment(currentInstruction, virtReg, targetRegNum); virtReg->block(); targetVirtReg->block(); return; } // Grab a spare reg in order to free the target of the first dep // At this point the first dep's target could be blocked, assigned, or NoReg // If it's blocked or assigned we allocate a spare and assign the target's virtual to it // If it's NoReg, the spare reg will be used as the first dep's actual target TR::RealRegister *spareReg = machine->findBestFreeRegister(currentInstruction, virtReg->getKind(), targetRegNum == TR::RealRegister::NoReg ? dep->getExcludeGPR0() : false, false, targetRegNum == TR::RealRegister::NoReg ? virtReg : targetReg->getAssignedRegister()); bool haveFreeSpare = spareReg != NULL; if (!spareReg) { // If the regs in this dep group are not blocked we need to make sure we don't spill a reg that's in the middle of the chain if (!depsBlocked) { if (targetRegNum == TR::RealRegister::NoReg) spareReg = machine->freeBestRegister(currentInstruction, map.getDependencyWithTarget(assignedReg->getRegisterNumber())->getRegister(), assignedReg, false); else spareReg = machine->freeBestRegister(currentInstruction, virtReg, targetReg, false); } else { if (targetRegNum == TR::RealRegister::NoReg) spareReg = machine->freeBestRegister(currentInstruction, virtReg, NULL, dep->getExcludeGPR0()); else spareReg = machine->freeBestRegister(currentInstruction, targetReg->getAssignedRegister(), NULL, false); } } if (targetRegNum != TR::RealRegister::NoReg && spareReg != targetReg) { machine->coerceRegisterAssignment(currentInstruction, targetReg->getAssignedRegister(), spareReg->getRegisterNumber()); } TR_ASSERT(targetRegNum == TR::RealRegister::NoReg || targetReg->getState() == TR::RealRegister::Free, "Expecting free target register"); if (depsBlocked || targetRegNum != TR::RealRegister::NoReg || haveFreeSpare) { machine->coerceRegisterAssignment(currentInstruction, virtReg, targetRegNum == TR::RealRegister::NoReg ? spareReg->getRegisterNumber() : targetRegNum); virtReg->block(); } dep = map.getDependencyWithTarget(assignedReg->getRegisterNumber()); while (dep) { virtReg = dep->getRegister(); targetRegNum = dep->getRealRegister(); targetReg = machine->getPPCRealRegister(targetRegNum); assignedReg = virtReg->getAssignedRealRegister() ? toRealRegister(virtReg->getAssignedRealRegister()) : NULL; TR_ASSERT(targetReg->getState() == TR::RealRegister::Free || targetReg == spareReg, "Expecting free target register or target to have been filled to free spare register"); machine->coerceRegisterAssignment(currentInstruction, virtReg, targetRegNum); virtReg->block(); dep = assignedReg ? map.getDependencyWithTarget(assignedReg->getRegisterNumber()) : NULL; } }
void OMR::X86::Instruction::assignRegisters(TR_RegisterKinds kindsToBeAssigned) { if (!self()->getDependencyConditions()) { // Fast path when there are no dependency conditions. // return; } if (self()->getOpCodeValue() != ASSOCREGS) { self()->aboutToAssignRegDeps(); if ((self()->cg()->getAssignmentDirection() == self()->cg()->Backward)) { self()->getDependencyConditions()->assignPostConditionRegisters(self(), kindsToBeAssigned, self()->cg()); self()->getDependencyConditions()->assignPreConditionRegisters(self(), kindsToBeAssigned, self()->cg()); } else { self()->getDependencyConditions()->assignPreConditionRegisters(self()->getPrev(), kindsToBeAssigned, self()->cg()); self()->getDependencyConditions()->assignPostConditionRegisters(self(), kindsToBeAssigned, self()->cg()); } } else if ((self()->getOpCodeValue() == ASSOCREGS) && self()->cg()->enableRegisterAssociations()) { if (kindsToBeAssigned & TR_GPR_Mask) { TR::Machine *machine = self()->cg()->machine(); // First traverse the existing associations and remove them // so that they don't interfere with the new ones // for (int i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastAssignableGPR; ++i) { // Skip non-assignable registers // if (machine->getX86RealRegister((TR::RealRegister::RegNum)i)->getState() == TR::RealRegister::Locked) continue; TR::Register *virtReg = machine->getVirtualAssociatedWithReal((TR::RealRegister::RegNum)i); if (virtReg) { virtReg->setAssociation(TR::RealRegister::NoReg); } } // Next loop through and set up the new associations (both on the machine // and by associating the virtual registers with their real dependencies) // TR_X86RegisterDependencyGroup *depGroup = self()->getDependencyConditions()->getPostConditions(); for (int j = 0; j < self()->getDependencyConditions()->getNumPostConditions(); ++j) { TR::RegisterDependency *dep = depGroup->getRegisterDependency(j); machine->setVirtualAssociatedWithReal(dep->getRealRegister(), dep->getRegister()); } machine->setGPRWeightsFromAssociations(); } } }
void TR_S390BinaryAnalyser::genericAnalyser(TR::Node * root, TR::InstOpCode::Mnemonic regToRegOpCode, TR::InstOpCode::Mnemonic memToRegOpCode, TR::InstOpCode::Mnemonic copyOpCode) { TR::Node * firstChild; TR::Node * secondChild; firstChild = root->getFirstChild(); secondChild = root->getSecondChild(); TR::Register * firstRegister = firstChild->getRegister(); TR::Register * secondRegister = secondChild->getRegister(); TR::Compilation *comp = TR::comp(); TR::SymbolReference * firstReference = firstChild->getOpCode().hasSymbolReference() ? firstChild->getSymbolReference() : NULL; TR::SymbolReference * secondReference = secondChild->getOpCode().hasSymbolReference() ? secondChild->getSymbolReference() : NULL; setInputs(firstChild, firstRegister, secondChild, secondRegister, false, false, comp, (cg()->isAddressOfStaticSymRefWithLockedReg(firstReference) || cg()->isAddressOfPrivateStaticSymRefWithLockedReg(firstReference)), (cg()->isAddressOfStaticSymRefWithLockedReg(secondReference) || cg()->isAddressOfPrivateStaticSymRefWithLockedReg(secondReference))); /* * Check if SH or CH can be used to evaluate this integer subtract/compare node. * The second operand of SH/CH is a 16-bit number from memory. And using * these directly can save a load instruction. */ bool is16BitMemory2Operand = false; if (secondChild->getOpCodeValue() == TR::s2i && secondChild->getFirstChild()->getOpCodeValue() == TR::sloadi && secondChild->isSingleRefUnevaluated() && secondChild->getFirstChild()->isSingleRefUnevaluated()) { bool supported = true; if (memToRegOpCode == TR::InstOpCode::S) { memToRegOpCode = TR::InstOpCode::SH; } else if (memToRegOpCode == TR::InstOpCode::C) { memToRegOpCode = TR::InstOpCode::CH; } else { supported = false; } if (supported) { setMem2(); is16BitMemory2Operand = true; } } if (getEvalChild1()) { firstRegister = cg()->evaluate(firstChild); } if (getEvalChild2()) { secondRegister = cg()->evaluate(secondChild); } remapInputs(firstChild, firstRegister, secondChild, secondRegister); if (getCopyReg1()) { TR::Register * thirdReg; bool done = false; if (firstRegister->getKind() == TR_GPR64) { thirdReg = cg()->allocate64bitRegister(); } else if (firstRegister->getKind() == TR_VRF) { TR_ASSERT(false,"VRF: genericAnalyser unimplemented"); } else if (firstRegister->getKind() != TR_FPR && firstRegister->getKind() != TR_VRF) { thirdReg = cg()->allocateRegister(); } else { thirdReg = cg()->allocateRegister(TR_FPR); } if (cg()->getS390ProcessorInfo()->supportsArch(TR_S390ProcessorInfo::TR_z196)) { if (getBinaryReg3Reg2() || secondRegister != NULL) { if (regToRegOpCode == TR::InstOpCode::SR) { generateRRRInstruction(cg(), TR::InstOpCode::SRK, root, thirdReg, firstRegister, secondRegister); done = true; } else if (regToRegOpCode == TR::InstOpCode::SLR) { generateRRRInstruction(cg(), TR::InstOpCode::SLRK, root, thirdReg, firstRegister, secondRegister); done = true; } else if (regToRegOpCode == TR::InstOpCode::SGR) { generateRRRInstruction(cg(), TR::InstOpCode::SGRK, root, thirdReg, firstRegister, secondRegister); done = true; } else if (regToRegOpCode == TR::InstOpCode::SLGR) { generateRRRInstruction(cg(), TR::InstOpCode::SLGRK, root, thirdReg, firstRegister, secondRegister); done = true; } } } if (!done) { generateRRInstruction(cg(), copyOpCode, root, thirdReg, firstRegister); if (getBinaryReg3Reg2() || (secondRegister != NULL)) { generateRRInstruction(cg(), regToRegOpCode, root, thirdReg, secondRegister); } else { TR::Node* loadBaseAddr = is16BitMemory2Operand ? secondChild->getFirstChild() : secondChild; TR::MemoryReference * tempMR = generateS390MemoryReference(loadBaseAddr, cg()); //floating-point arithmatics don't have RXY format instructions, so no long displacement if (secondChild->getOpCode().isFloatingPoint()) { tempMR->enforce4KDisplacementLimit(secondChild, cg(), NULL); } generateRXInstruction(cg(), memToRegOpCode, root, thirdReg, tempMR); tempMR->stopUsingMemRefRegister(cg()); if (is16BitMemory2Operand) { cg()->decReferenceCount(secondChild->getFirstChild()); } } } root->setRegister(thirdReg); } else if (getBinaryReg1Reg2()) { generateRRInstruction(cg(), regToRegOpCode, root, firstRegister, secondRegister); root->setRegister(firstRegister); } else // assert getBinaryReg1Mem2() == true { TR_ASSERT( !getInvalid(), "TR_S390BinaryAnalyser::invalid case\n"); TR::MemoryReference * tempMR = generateS390MemoryReference(is16BitMemory2Operand ? secondChild->getFirstChild() : secondChild, cg()); //floating-point arithmatics don't have RXY format instructions, so no long displacement if (secondChild->getOpCode().isFloatingPoint()) { tempMR->enforce4KDisplacementLimit(secondChild, cg(), NULL); } generateRXInstruction(cg(), memToRegOpCode, root, firstRegister, tempMR); tempMR->stopUsingMemRefRegister(cg()); if (is16BitMemory2Operand) cg()->decReferenceCount(secondChild->getFirstChild()); root->setRegister(firstRegister); } cg()->decReferenceCount(firstChild); cg()->decReferenceCount(secondChild); return; }
TR::Register *IA32LinkageUtils::pushDoubleArg( TR::Node *child, TR::CodeGenerator *cg) { TR::Register *pushRegister; if (child->getRegister() == NULL) { if (child->getOpCodeValue() == TR::dconst) { TR_X86OpCodes pushOp; int32_t highValue = child->getLongIntHigh(); if (highValue >= -128 && highValue <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, highValue, cg); int32_t lowValue = child->getLongIntLow(); if (lowValue >= -128 && lowValue <= 127) { pushOp = PUSHImms; } else { pushOp = PUSHImm4; } generateImmInstruction(pushOp, child, lowValue, cg); cg->decReferenceCount(child); return NULL; } else if (child->getReferenceCount() == 1) { if (child->getOpCode().isLoad()) { TR::MemoryReference *lowMR = generateX86MemoryReference(child, cg); generateMemInstruction(PUSHMem, child, generateX86MemoryReference(*lowMR, 4, cg), cg); generateMemInstruction(PUSHMem, child, lowMR, cg); lowMR->decNodeReferenceCounts(cg); cg->decReferenceCount(child); return NULL; } else if (child->getOpCodeValue() == TR::lbits2d) { pushRegister = pushLongArg(child->getFirstChild(), cg); cg->decReferenceCount(child); return pushRegister; } } } pushRegister = cg->evaluate(child); TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp); generateRegImmInstruction(SUB4RegImms, child, espReal, 8, cg); if (cg->useSSEForSinglePrecision() && pushRegister->getKind() == TR_FPR) generateMemRegInstruction(MOVSDMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); else generateFPMemRegInstruction(DSTMemReg, child, generateX86MemoryReference(espReal, 0, cg), pushRegister, cg); cg->decReferenceCount(child); return pushRegister; }
TR::Register *TR_X86FPCompareAnalyser::fpCompareAnalyser(TR::Node *root, TR_X86OpCodes cmpRegRegOpCode, TR_X86OpCodes cmpRegMemOpCode, TR_X86OpCodes cmpiRegRegOpCode, bool useFCOMIInstructions) { TR::Node *firstChild, *secondChild; TR::ILOpCodes cmpOp = root->getOpCodeValue(); bool reverseMemOp = false; bool reverseCmpOp = false; TR::Compilation* comp = _cg->comp(); TR_X86OpCodes cmpInstr = useFCOMIInstructions ? cmpiRegRegOpCode : cmpRegRegOpCode; // Some operators must have their operands swapped to improve the generated // code needed to evaluate the result of the comparison. // bool mustSwapOperands = (cmpOp == TR::iffcmple || cmpOp == TR::ifdcmple || cmpOp == TR::iffcmpgtu || cmpOp == TR::ifdcmpgtu || cmpOp == TR::fcmple || cmpOp == TR::dcmple || cmpOp == TR::fcmpgtu || cmpOp == TR::dcmpgtu || (useFCOMIInstructions && (cmpOp == TR::iffcmplt || cmpOp == TR::ifdcmplt || cmpOp == TR::iffcmpgeu || cmpOp == TR::ifdcmpgeu || cmpOp == TR::fcmplt || cmpOp == TR::dcmplt || cmpOp == TR::fcmpgeu || cmpOp == TR::dcmpgeu))) ? true : false; // Some operators should not have their operands swapped to improve the generated // code needed to evaluate the result of the comparison. // bool preventOperandSwapping = (cmpOp == TR::iffcmpltu || cmpOp == TR::ifdcmpltu || cmpOp == TR::iffcmpge || cmpOp == TR::ifdcmpge || cmpOp == TR::fcmpltu || cmpOp == TR::dcmpltu || cmpOp == TR::fcmpge || cmpOp == TR::dcmpge || (useFCOMIInstructions && (cmpOp == TR::iffcmpgt || cmpOp == TR::ifdcmpgt || cmpOp == TR::iffcmpleu || cmpOp == TR::ifdcmpleu || cmpOp == TR::fcmpgt || cmpOp == TR::dcmpgt || cmpOp == TR::fcmpleu || cmpOp == TR::dcmpleu))) ? true : false; // For correctness, don't swap operands of these operators. // if (cmpOp == TR::fcmpg || cmpOp == TR::fcmpl || cmpOp == TR::dcmpg || cmpOp == TR::dcmpl) { preventOperandSwapping = true; } // Initial operand evaluation ordering. // if (preventOperandSwapping || (!mustSwapOperands && _cg->whichChildToEvaluate(root) == 0)) { firstChild = root->getFirstChild(); secondChild = root->getSecondChild(); setReversedOperands(false); } else { firstChild = root->getSecondChild(); secondChild = root->getFirstChild(); setReversedOperands(true); } TR::Register *firstRegister = firstChild->getRegister(); TR::Register *secondRegister = secondChild->getRegister(); setInputs(firstChild, firstRegister, secondChild, secondRegister, useFCOMIInstructions, // If either 'preventOperandSwapping' or 'mustSwapOperands' is set then the // initial operand ordering set above must be maintained. // preventOperandSwapping || mustSwapOperands); // Make sure any required operand ordering is respected. // if ((getCmpReg2Reg1() || getCmpReg2Mem1()) && (mustSwapOperands || preventOperandSwapping)) { reverseCmpOp = getCmpReg2Reg1() ? true : false; reverseMemOp = getCmpReg2Mem1() ? true : false; } // If we are not comparing with a memory operand, one of them evaluates // to a zero, and the zero is not already on the stack, then we can use // FTST to save a register. // // (With a memory operand, either the constant zero needs to be loaded // to use FCOM, or the memory operand needs to be loaded to use FTST, // so there is no gain in using FTST.) // // If the constant zero is in the target register, using FTST means the // comparison will be reversed. We cannot do this if the initial ordering // of the operands must be maintained. // // Finally, if FTST is used and this is the last use of the target, the // target register may need to be explicitly popped. // TR::Register *targetRegisterForFTST = NULL; TR::Node *targetChildForFTST = NULL; if (getEvalChild1() && isUnevaluatedZero(firstChild)) // do we need getEvalChild1() here? { if ( ((getCmpReg1Reg2() || reverseCmpOp) && !(preventOperandSwapping || mustSwapOperands)) || (getCmpReg2Reg1() && !reverseCmpOp)) { if (getEvalChild2()) { secondRegister = _cg->evaluate(secondChild); } targetRegisterForFTST = secondRegister; targetChildForFTST = secondChild; notReversedOperands(); } } else if (getEvalChild2() && isUnevaluatedZero(secondChild)) // do we need getEvalChild2() here? { if ( (getCmpReg1Reg2() || reverseCmpOp) || (getCmpReg2Reg1() && !reverseCmpOp && !(preventOperandSwapping || mustSwapOperands)) ) { if (getEvalChild1()) { firstRegister = _cg->evaluate(firstChild); } targetRegisterForFTST = firstRegister; targetChildForFTST = firstChild; } } if (!targetRegisterForFTST) { // If we have a choice, evaluate the target operand last. By doing so, we // help out the register assigner because the target must be TOS. This // avoids an unneccessary FXCH for the target. // if (getEvalChild1() && getEvalChild2()) { if (getCmpReg1Reg2() || getCmpReg1Mem2()) { secondRegister = _cg->evaluate(secondChild); firstRegister = _cg->evaluate(firstChild); } else { firstRegister = _cg->evaluate(firstChild); secondRegister = _cg->evaluate(secondChild); } } else { if (getEvalChild1()) { firstRegister = _cg->evaluate(firstChild); } if (getEvalChild2()) { secondRegister = _cg->evaluate(secondChild); } } } // Adjust the FP precision of feeding operands. // if (firstRegister && (firstRegister->needsPrecisionAdjustment() || comp->getOption(TR_StrictFPCompares) || (firstRegister->mayNeedPrecisionAdjustment() && secondChild->getOpCode().isLoadConst()) || (firstRegister->mayNeedPrecisionAdjustment() && !secondRegister))) { TR::TreeEvaluator::insertPrecisionAdjustment(firstRegister, root, _cg); } if (secondRegister && (secondRegister->needsPrecisionAdjustment() || comp->getOption(TR_StrictFPCompares) || (secondRegister->mayNeedPrecisionAdjustment() && firstChild->getOpCode().isLoadConst()) || (secondRegister->mayNeedPrecisionAdjustment() && !firstRegister))) { TR::TreeEvaluator::insertPrecisionAdjustment(secondRegister, root, _cg); } // Generate the compare instruction. // if (targetRegisterForFTST) { generateFPRegInstruction(FTSTReg, root, targetRegisterForFTST, _cg); } else if (!useFCOMIInstructions && (getCmpReg1Mem2() || reverseMemOp)) { TR::MemoryReference *tempMR = generateX86MemoryReference(secondChild, _cg); generateFPRegMemInstruction(cmpRegMemOpCode, root, firstRegister, tempMR, _cg); tempMR->decNodeReferenceCounts(_cg); } else if (!useFCOMIInstructions && getCmpReg2Mem1()) { TR::MemoryReference *tempMR = generateX86MemoryReference(firstChild, _cg); generateFPRegMemInstruction(cmpRegMemOpCode, root, secondRegister, tempMR, _cg); notReversedOperands(); tempMR->decNodeReferenceCounts(_cg); } else if (getCmpReg1Reg2() || reverseCmpOp) { generateFPCompareRegRegInstruction(cmpInstr, root, firstRegister, secondRegister, _cg); } else if (getCmpReg2Reg1()) { generateFPCompareRegRegInstruction(cmpInstr, root, secondRegister, firstRegister, _cg); notReversedOperands(); } _cg->decReferenceCount(firstChild); _cg->decReferenceCount(secondChild); // Evaluate the comparison. // if (getReversedOperands()) { cmpOp = TR::ILOpCode(cmpOp).getOpCodeForSwapChildren(); TR::Node::recreate(root, cmpOp); } if (useFCOMIInstructions && !targetRegisterForFTST) { return NULL; } // We must manually move the FP condition flags to the EFLAGS register if we don't // use the FCOMI instructions. // TR::Register *accRegister = _cg->allocateRegister(); TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions((uint8_t)1, 1, _cg); dependencies->addPreCondition(accRegister, TR::RealRegister::eax, _cg); dependencies->addPostCondition(accRegister, TR::RealRegister::eax, _cg); generateRegInstruction(STSWAcc, root, accRegister, dependencies, _cg); // Pop the FTST target register if it is not used any more. // if (targetRegisterForFTST && targetChildForFTST && targetChildForFTST->getReferenceCount() == 0) { generateFPSTiST0RegRegInstruction(FSTRegReg, root, targetRegisterForFTST, targetRegisterForFTST, _cg); } return accRegister; }
void TR_S390BinaryAnalyser::longSubtractAnalyser(TR::Node * root) { TR::Node * firstChild; TR::Node * secondChild; TR::Instruction * cursor = NULL; TR::RegisterDependencyConditions * dependencies = NULL; bool setsOrReadsCC = NEED_CC(root) || (root->getOpCodeValue() == TR::lusubb); TR::InstOpCode::Mnemonic regToRegOpCode; TR::InstOpCode::Mnemonic memToRegOpCode; TR::Compilation *comp = TR::comp(); if (TR::Compiler->target.is64Bit() || cg()->use64BitRegsOn32Bit()) { if (!setsOrReadsCC) { regToRegOpCode = TR::InstOpCode::SGR; memToRegOpCode = TR::InstOpCode::SG; } else { regToRegOpCode = TR::InstOpCode::SLGR; memToRegOpCode = TR::InstOpCode::SLG; } } else { regToRegOpCode = TR::InstOpCode::SLR; memToRegOpCode = TR::InstOpCode::SL; } firstChild = root->getFirstChild(); secondChild = root->getSecondChild(); TR::Register * firstRegister = firstChild->getRegister(); TR::Register * secondRegister = secondChild->getRegister(); setInputs(firstChild, firstRegister, secondChild, secondRegister, false, false, comp); /** Attempt to use SGH to subtract halfword (64 <- 16). * The second child is a halfword from memory */ bool is16BitMemory2Operand = false; if (TR::Compiler->target.cpu.getS390SupportsZ14() && secondChild->getOpCodeValue() == TR::s2l && secondChild->getFirstChild()->getOpCodeValue() == TR::sloadi && secondChild->isSingleRefUnevaluated() && secondChild->getFirstChild()->isSingleRefUnevaluated()) { setMem2(); memToRegOpCode = TR::InstOpCode::SGH; is16BitMemory2Operand = true; } if (getEvalChild1()) { firstRegister = cg()->evaluate(firstChild); } if (getEvalChild2()) { secondRegister = cg()->evaluate(secondChild); } remapInputs(firstChild, firstRegister, secondChild, secondRegister); if ((root->getOpCodeValue() == TR::lusubb) && TR_S390ComputeCC::setCarryBorrow(root->getChild(2), false, cg())) { // use SLBGR rather than SLGR/SGR // SLBG rather than SLG/SG // or // use SLBR rather than SLR // SLB rather than SL bool uses64bit = TR::Compiler->target.is64Bit() || cg()->use64BitRegsOn32Bit(); regToRegOpCode = uses64bit ? TR::InstOpCode::SLBGR : TR::InstOpCode::SLBR; memToRegOpCode = uses64bit ? TR::InstOpCode::SLBG : TR::InstOpCode::SLB; } if (TR::Compiler->target.is64Bit() || cg()->use64BitRegsOn32Bit()) { if (getCopyReg1()) { TR::Register * thirdReg = cg()->allocate64bitRegister(); root->setRegister(thirdReg); generateRRInstruction(cg(), TR::InstOpCode::LGR, root, thirdReg, firstRegister); if (getBinaryReg3Reg2()) { generateRRInstruction(cg(), regToRegOpCode, root, thirdReg, secondRegister); } else // assert getBinaryReg3Mem2() == true { TR::MemoryReference * longMR = generateS390MemoryReference(secondChild, cg()); generateRXInstruction(cg(), memToRegOpCode, root, thirdReg, longMR); longMR->stopUsingMemRefRegister(cg()); } } else if (getBinaryReg1Reg2()) { generateRRInstruction(cg(), regToRegOpCode, root, firstRegister, secondRegister); root->setRegister(firstRegister); } else // assert getBinaryReg1Mem2() == true { TR_ASSERT( !getInvalid(), "TR_S390BinaryAnalyser::invalid case\n"); TR::Node* baseAddrNode = is16BitMemory2Operand ? secondChild->getFirstChild() : secondChild; TR::MemoryReference * longMR = generateS390MemoryReference(baseAddrNode, cg()); generateRXInstruction(cg(), memToRegOpCode, root, firstRegister, longMR); longMR->stopUsingMemRefRegister(cg()); root->setRegister(firstRegister); if(is16BitMemory2Operand) { cg()->decReferenceCount(secondChild->getFirstChild()); } } } else // if 32bit codegen... { bool zArchTrexsupported = performTransformation(comp, "O^O Use SL/SLB for long sub."); TR::Register * highDiff = NULL; TR::LabelSymbol * doneLSub = TR::LabelSymbol::create(cg()->trHeapMemory(),cg()); if (getCopyReg1()) { TR::Register * lowThird = cg()->allocateRegister(); TR::Register * highThird = cg()->allocateRegister(); TR::RegisterPair * thirdReg = cg()->allocateConsecutiveRegisterPair(lowThird, highThird); highDiff = highThird; dependencies = new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 9, cg()); dependencies->addPostCondition(firstRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(firstRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(firstRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); // If 2nd operand has ref count of 1 and can be accessed by a memory reference, // then second register will not be used. if(secondRegister == firstRegister && !setsOrReadsCC) { TR_ASSERT( false, "lsub with identical children - fix Simplifier"); } if (secondRegister != NULL && firstRegister != secondRegister) { dependencies->addPostCondition(secondRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(secondRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(secondRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); } dependencies->addPostCondition(highThird, TR::RealRegister::AssignAny); root->setRegister(thirdReg); generateRRInstruction(cg(), TR::InstOpCode::LR, root, highThird, firstRegister->getHighOrder()); generateRRInstruction(cg(), TR::InstOpCode::LR, root, lowThird, firstRegister->getLowOrder()); if (getBinaryReg3Reg2()) { if ((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC) { generateRRInstruction(cg(), regToRegOpCode, root, lowThird, secondRegister->getLowOrder()); generateRRInstruction(cg(), TR::InstOpCode::SLBR, root, highThird, secondRegister->getHighOrder()); } else { generateRRInstruction(cg(), TR::InstOpCode::SR, root, highThird, secondRegister->getHighOrder()); generateRRInstruction(cg(), TR::InstOpCode::SLR, root, lowThird, secondRegister->getLowOrder()); } } else // assert getBinaryReg3Mem2() == true { TR::MemoryReference * highMR = generateS390MemoryReference(secondChild, cg()); TR::MemoryReference * lowMR = generateS390MemoryReference(*highMR, 4, cg()); dependencies->addAssignAnyPostCondOnMemRef(highMR); if ((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC) { generateRXInstruction(cg(), memToRegOpCode, root, lowThird, lowMR); generateRXInstruction(cg(), TR::InstOpCode::SLB, root, highThird, highMR); } else { generateRXInstruction(cg(), TR::InstOpCode::S, root, highThird, highMR); generateRXInstruction(cg(), TR::InstOpCode::SL, root, lowThird, lowMR); } highMR->stopUsingMemRefRegister(cg()); lowMR->stopUsingMemRefRegister(cg()); } } else if (getBinaryReg1Reg2()) { dependencies = new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 6, cg()); dependencies->addPostCondition(firstRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(firstRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(firstRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); if(secondRegister == firstRegister) { TR_ASSERT( false, "lsub with identical children - fix Simplifier"); } if (secondRegister != firstRegister) { dependencies->addPostCondition(secondRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(secondRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(secondRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); } if ((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC) { generateRRInstruction(cg(), regToRegOpCode, root, firstRegister->getLowOrder(), secondRegister->getLowOrder()); generateRRInstruction(cg(), TR::InstOpCode::SLBR, root, firstRegister->getHighOrder(), secondRegister->getHighOrder()); } else { generateRRInstruction(cg(), TR::InstOpCode::SR, root, firstRegister->getHighOrder(), secondRegister->getHighOrder()); generateRRInstruction(cg(), TR::InstOpCode::SLR, root, firstRegister->getLowOrder(), secondRegister->getLowOrder()); } highDiff = firstRegister->getHighOrder(); root->setRegister(firstRegister); } else // assert getBinaryReg1Mem2() == true { TR_ASSERT( !getInvalid(),"TR_S390BinaryAnalyser::invalid case\n"); dependencies = new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg()); dependencies->addPostCondition(firstRegister, TR::RealRegister::EvenOddPair); dependencies->addPostCondition(firstRegister->getHighOrder(), TR::RealRegister::LegalEvenOfPair); dependencies->addPostCondition(firstRegister->getLowOrder(), TR::RealRegister::LegalOddOfPair); TR::MemoryReference * highMR = generateS390MemoryReference(secondChild, cg()); TR::MemoryReference * lowMR = generateS390MemoryReference(*highMR, 4, cg()); dependencies->addAssignAnyPostCondOnMemRef(highMR); if ((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC) { generateRXInstruction(cg(), memToRegOpCode, root, firstRegister->getLowOrder(), lowMR); generateRXInstruction(cg(), TR::InstOpCode::SLB, root, firstRegister->getHighOrder(), highMR); } else { generateRXInstruction(cg(), TR::InstOpCode::S, root, firstRegister->getHighOrder(), highMR); generateRXInstruction(cg(), TR::InstOpCode::SL, root, firstRegister->getLowOrder(), lowMR); } highDiff = firstRegister->getHighOrder(); root->setRegister(firstRegister); highMR->stopUsingMemRefRegister(cg()); lowMR->stopUsingMemRefRegister(cg()); } if (!((ENABLE_ZARCH_FOR_32 && zArchTrexsupported) || setsOrReadsCC)) { // Check for overflow in LS int. If overflow, we are done. generateS390BranchInstruction(cg(), TR::InstOpCode::BRC,TR::InstOpCode::COND_MASK3, root, doneLSub); // Increment MS int due to overflow in LS int generateRIInstruction(cg(), TR::InstOpCode::AHI, root, highDiff, -1); generateS390LabelInstruction(cg(), TR::InstOpCode::LABEL, root, doneLSub, dependencies); } } cg()->decReferenceCount(firstChild); cg()->decReferenceCount(secondChild); return; }
OMR::Power::RegisterDependencyConditions::RegisterDependencyConditions( TR::CodeGenerator *cg, TR::Node *node, uint32_t extranum, TR::Instruction **cursorPtr) { List<TR::Register> regList(cg->trMemory()); TR::Instruction *iCursor = (cursorPtr==NULL)?NULL:*cursorPtr; int32_t totalNum = node->getNumChildren() + extranum; int32_t i; cg->comp()->incVisitCount(); int32_t numLongs = 0; // // Pre-compute how many longs are global register candidates // for (i = 0; i < node->getNumChildren(); ++i) { TR::Node *child = node->getChild(i); TR::Register *reg = child->getRegister(); if (reg!=NULL /* && reg->getKind()==TR_GPR */) { if (child->getHighGlobalRegisterNumber() > -1) numLongs++; } } totalNum = totalNum + numLongs; _preConditions = new (totalNum, cg->trMemory()) TR_PPCRegisterDependencyGroup; _postConditions = new (totalNum, cg->trMemory()) TR_PPCRegisterDependencyGroup; _numPreConditions = totalNum; _addCursorForPre = 0; _numPostConditions = totalNum; _addCursorForPost = 0; // First, handle dependencies that match current association for (i=0; i<node->getNumChildren(); i++) { TR::Node *child = node->getChild(i); TR::Register *reg = child->getRegister(); TR::Register *highReg = NULL; TR::RealRegister::RegNum regNum = (TR::RealRegister::RegNum)cg->getGlobalRegister(child->getGlobalRegisterNumber()); TR::RealRegister::RegNum highRegNum; if (child->getHighGlobalRegisterNumber() > -1) { highRegNum = (TR::RealRegister::RegNum)cg->getGlobalRegister(child->getHighGlobalRegisterNumber()); TR::RegisterPair *regPair = reg->getRegisterPair(); TR_ASSERT(regPair, "assertion failure"); highReg = regPair->getHighOrder(); reg = regPair->getLowOrder(); if (highReg->getAssociation() != highRegNum || reg->getAssociation() != regNum) continue; } else if (reg->getAssociation() != regNum) continue; TR_ASSERT(!regList.find(reg) && (!highReg || !regList.find(highReg)), "Should not happen\n"); addPreCondition(reg, regNum); addPostCondition(reg, regNum); regList.add(reg); if (highReg) { addPreCondition(highReg, highRegNum); addPostCondition(highReg, highRegNum); regList.add(highReg); } } // Second pass to handle dependencies for which association does not exist // or does not match for (i=0; i<node->getNumChildren(); i++) { TR::Node *child = node->getChild(i); TR::Register *reg = child->getRegister(); TR::Register *highReg = NULL; TR::Register *copyReg = NULL; TR::Register *highCopyReg = NULL; TR::RealRegister::RegNum regNum = (TR::RealRegister::RegNum)cg->getGlobalRegister(child->getGlobalRegisterNumber()); TR::RealRegister::RegNum highRegNum; if (child->getHighGlobalRegisterNumber() > -1) { highRegNum = (TR::RealRegister::RegNum)cg->getGlobalRegister(child->getHighGlobalRegisterNumber()); TR::RegisterPair *regPair = reg->getRegisterPair(); TR_ASSERT(regPair, "assertion failure"); highReg = regPair->getHighOrder(); reg = regPair->getLowOrder(); if (highReg->getAssociation() == highRegNum && reg->getAssociation() == regNum) continue; } else if (reg->getAssociation() == regNum) continue; if (regList.find(reg) || (highReg && regList.find(highReg))) { TR::InstOpCode::Mnemonic opCode; TR_RegisterKinds kind = reg->getKind(); switch (kind) { case TR_GPR: opCode = TR::InstOpCode::mr; break; case TR_FPR: opCode = TR::InstOpCode::fmr; break; case TR_VRF: opCode = TR::InstOpCode::vor; //TR_ASSERT(0, "VMX not fully supported."); break; case TR_VSX_VECTOR: opCode = TR::InstOpCode::xxlor; break; case TR_CCR: opCode = TR::InstOpCode::mcrf; break; default: TR_ASSERT(0, "Invalid register kind."); } if (regList.find(reg)) { bool containsInternalPointer = false; if (reg->getPinningArrayPointer()) containsInternalPointer = true; copyReg = (reg->containsCollectedReference() && !containsInternalPointer) ? cg->allocateCollectedReferenceRegister() : cg->allocateRegister(kind); if (containsInternalPointer) { copyReg->setContainsInternalPointer(); copyReg->setPinningArrayPointer(reg->getPinningArrayPointer()); } if (opCode == TR::InstOpCode::vor || opCode == TR::InstOpCode::xxlor) iCursor = generateTrg1Src2Instruction(cg, opCode, node, copyReg, reg, reg, iCursor); else iCursor = generateTrg1Src1Instruction(cg, opCode, node, copyReg, reg, iCursor); reg = copyReg; } if (highReg && regList.find(highReg)) { bool containsInternalPointer = false; if (highReg->getPinningArrayPointer()) containsInternalPointer = true; highCopyReg = (highReg->containsCollectedReference() && !containsInternalPointer) ? cg->allocateCollectedReferenceRegister() : cg->allocateRegister(kind); if (containsInternalPointer) { highCopyReg->setContainsInternalPointer(); highCopyReg->setPinningArrayPointer(highReg->getPinningArrayPointer()); } if (opCode == TR::InstOpCode::vor || opCode == TR::InstOpCode::xxlor) iCursor = generateTrg1Src2Instruction(cg, opCode, node, highCopyReg, highReg, highReg, iCursor); else iCursor = generateTrg1Src1Instruction(cg, opCode, node, highCopyReg, highReg, iCursor); highReg = highCopyReg; } } addPreCondition(reg, regNum); addPostCondition(reg, regNum); if (copyReg != NULL) cg->stopUsingRegister(copyReg); else regList.add(reg); if (highReg) { addPreCondition(highReg, highRegNum); addPostCondition(highReg, highRegNum); if (highCopyReg != NULL) cg->stopUsingRegister(highCopyReg); else regList.add(highReg); } } if (iCursor!=NULL && cursorPtr!=NULL) *cursorPtr = iCursor; }
/* * users should call the longSubtractAnalyser or longSubtractAnalyserWithExplicitOperands APIs instead of calling this one directly */ TR::Register* TR_X86SubtractAnalyser::longSubtractAnalyserImpl(TR::Node *root, TR::Node *&firstChild, TR::Node *&secondChild) { TR::Register *firstRegister = firstChild->getRegister(); TR::Register *secondRegister = secondChild->getRegister(); TR::Register *targetRegister = NULL; bool firstHighZero = false; bool secondHighZero = false; bool useSecondHighOrder = false; TR_X86OpCodes regRegOpCode = SUB4RegReg; TR_X86OpCodes regMemOpCode = SUB4RegMem; bool needsEflags = NEED_CC(root) || (root->getOpCodeValue() == TR::lusubb); // Can generate better code for long adds when one or more children have a high order zero word // can avoid the evaluation when we don't need the result of such nodes for another parent. // if (firstChild->isHighWordZero() && !needsEflags) { firstHighZero = true; } if (secondChild->isHighWordZero() && !needsEflags) { secondHighZero = true; TR::ILOpCodes secondOp = secondChild->getOpCodeValue(); if (secondChild->getReferenceCount() == 1 && secondRegister == 0) { if (secondOp == TR::iu2l || secondOp == TR::su2l || secondOp == TR::bu2l || (secondOp == TR::lushr && secondChild->getSecondChild()->getOpCodeValue() == TR::iconst && (secondChild->getSecondChild()->getInt() & TR::TreeEvaluator::shiftMask(true)) == 32)) { secondChild = secondChild->getFirstChild(); secondRegister = secondChild->getRegister(); if (secondOp == TR::lushr) { useSecondHighOrder = true; } } } } setInputs(firstChild, firstRegister, secondChild, secondRegister); if (isVolatileMemoryOperand(firstChild)) resetMem1(); if (isVolatileMemoryOperand(secondChild)) resetMem2(); if (getEvalChild1()) { firstRegister = _cg->evaluate(firstChild); } if (getEvalChild2()) { secondRegister = _cg->evaluate(secondChild); } if (secondHighZero && secondRegister && secondRegister->getRegisterPair()) { if (!useSecondHighOrder) { secondRegister = secondRegister->getLowOrder(); } else { secondRegister = secondRegister->getHighOrder(); } } if (root->getOpCodeValue() == TR::lusubb && TR_X86ComputeCC::setCarryBorrow(root->getChild(2), true, _cg)) { // use SBB rather than SUB // regRegOpCode = SBB4RegReg; regMemOpCode = SBB4RegMem; } if (getCopyReg1()) { TR::Register *lowThird = _cg->allocateRegister(); TR::Register *highThird = _cg->allocateRegister(); TR::RegisterPair *thirdReg = _cg->allocateRegisterPair(lowThird, highThird); targetRegister = thirdReg; generateRegRegInstruction(MOV4RegReg, root, lowThird, firstRegister->getLowOrder(), _cg); if (firstHighZero) { generateRegRegInstruction(XOR4RegReg, root, highThird, highThird, _cg); } else { generateRegRegInstruction(MOV4RegReg, root, highThird, firstRegister->getHighOrder(), _cg); } if (getSubReg3Reg2()) { if (secondHighZero) { generateRegRegInstruction(regRegOpCode, root, lowThird, secondRegister, _cg); generateRegImmInstruction(SBB4RegImms, root, highThird, 0, _cg); } else { generateRegRegInstruction(regRegOpCode, root, lowThird, secondRegister->getLowOrder(), _cg); generateRegRegInstruction(SBB4RegReg, root, highThird, secondRegister->getHighOrder(), _cg); } } else // assert getSubReg3Mem2() == true { TR::MemoryReference *lowMR = generateX86MemoryReference(secondChild, _cg); /** * The below code is needed to ensure correct behaviour when the subtract analyser encounters a lushr bytecode that shifts * by 32 bits. This is the only case where the useSecondHighOrder bit is set. * When the first child of the lushr is in a register, code above handles the shift. When the first child of the lushr is in * memory, the below ensures that the upper part of the first child of the lushr is used as lowMR. */ if (useSecondHighOrder) { TR_ASSERT(secondHighZero, "useSecondHighOrder should be consistent with secondHighZero. useSecondHighOrder subsumes secondHighZero"); lowMR = generateX86MemoryReference(*lowMR, 4, _cg); } generateRegMemInstruction(regMemOpCode, root, lowThird, lowMR, _cg); if (secondHighZero) { generateRegImmInstruction(SBB4RegImms, root, highThird, 0, _cg); } else { TR::MemoryReference *highMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(SBB4RegMem, root, highThird, highMR, _cg); } lowMR->decNodeReferenceCounts(_cg); } } else if (getSubReg1Reg2()) { if (secondHighZero) { generateRegRegInstruction(regRegOpCode, root, firstRegister->getLowOrder(), secondRegister, _cg); generateRegImmInstruction(SBB4RegImms, root, firstRegister->getHighOrder(), 0, _cg); } else { generateRegRegInstruction(regRegOpCode, root, firstRegister->getLowOrder(), secondRegister->getLowOrder(), _cg); generateRegRegInstruction(SBB4RegReg, root, firstRegister->getHighOrder(), secondRegister->getHighOrder(), _cg); } targetRegister = firstRegister; } else // assert getSubReg1Mem2() == true { TR::MemoryReference *lowMR = generateX86MemoryReference(secondChild, _cg); /** * The below code is needed to ensure correct behaviour when the subtract analyser encounters a lushr bytecode that shifts * by 32 bits. This is the only case where the useSecondHighOrder bit is set. * When the first child of the lushr is in a register, code above handles the shift. When the first child of the lushr is in * memory, the below ensures that the upper part of the first child of the lushr is used as lowMR. */ if (useSecondHighOrder) lowMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(regMemOpCode, root, firstRegister->getLowOrder(), lowMR, _cg); if (secondHighZero) { generateRegImmInstruction(SBB4RegImms, root, firstRegister->getHighOrder(), 0, _cg); } else { TR::MemoryReference *highMR = generateX86MemoryReference(*lowMR, 4, _cg); generateRegMemInstruction(SBB4RegMem, root, firstRegister->getHighOrder(), highMR, _cg); } targetRegister = firstRegister; lowMR->decNodeReferenceCounts(_cg); } return targetRegister; }
TR::RegisterDependencyConditions* TR_PPCScratchRegisterDependencyConditions::createDependencyConditions(TR::CodeGenerator *cg, TR_PPCScratchRegisterDependencyConditions *pre, TR_PPCScratchRegisterDependencyConditions *post) { int32_t preCount = pre ? pre->getNumberOfDependencies() : 0; int32_t postCount = post ? post->getNumberOfDependencies() : 0; TR_LiveRegisters *lrVector = cg->getLiveRegisters(TR_VSX_VECTOR); bool liveVSXVectorReg = (!lrVector || (lrVector->getNumberOfLiveRegisters() > 0)); TR_LiveRegisters *lrScalar = cg->getLiveRegisters(TR_VSX_SCALAR); bool liveVSXScalarReg = (!lrScalar || (lrScalar->getNumberOfLiveRegisters() > 0)); if (liveVSXVectorReg) { preCount += 64; postCount += 64; } else if (liveVSXScalarReg) { preCount += 32; postCount += 32; } TR::RegisterDependencyConditions *dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(preCount, postCount, cg->trMemory()); for (int i = 0; i < (pre ? pre->_numGPRDeps : 0); ++i) { dependencies->addPreCondition(pre->_gprDeps[i].getRegister(), pre->_gprDeps[i].getRealRegister(), pre->_gprDeps[i].getFlags()); if (pre->_excludeGPR0 & (1 << i)) dependencies->getPreConditions()->getRegisterDependency(i)->setExcludeGPR0(); } for (int i = 0; i < (post ? post->_numGPRDeps : 0); ++i) { dependencies->addPostCondition(post->_gprDeps[i].getRegister(), post->_gprDeps[i].getRealRegister(), post->_gprDeps[i].getFlags()); if (post->_excludeGPR0 & (1 << i)) dependencies->getPostConditions()->getRegisterDependency(i)->setExcludeGPR0(); } for (int i = 0; i < (pre ? pre->_numCCRDeps : 0); ++i) { dependencies->addPreCondition(pre->_ccrDeps[i].getRegister(), pre->_ccrDeps[i].getRealRegister(), pre->_ccrDeps[i].getFlags()); } for (int i = 0; i < (post ? post->_numCCRDeps : 0); ++i) { dependencies->addPostCondition(post->_ccrDeps[i].getRegister(), post->_ccrDeps[i].getRealRegister(), post->_ccrDeps[i].getFlags()); } const TR_PPCLinkageProperties& properties = cg->getLinkage()->getProperties(); if (liveVSXVectorReg) { for (int32_t i=TR::RealRegister::FirstVSR; i<=TR::RealRegister::LastVSR; i++) { if (!properties.getPreserved((TR::RealRegister::RegNum)i)) { TR::Register *vreg = cg->allocateRegister(TR_FPR); vreg->setPlaceholderReg(); dependencies->addPreCondition(vreg, (TR::RealRegister::RegNum)i); dependencies->addPostCondition(vreg, (TR::RealRegister::RegNum)i); } } } else { if (liveVSXScalarReg) { for (int32_t i=TR::RealRegister::vsr32; i<=TR::RealRegister::LastVSR; i++) { if (!properties.getPreserved((TR::RealRegister::RegNum)i)) { TR::Register *vreg = cg->allocateRegister(TR_FPR); vreg->setPlaceholderReg(); dependencies->addPreCondition(vreg, (TR::RealRegister::RegNum)i); dependencies->addPostCondition(vreg, (TR::RealRegister::RegNum)i); } } } } return dependencies; }
TR::Register * TR::IA32SystemLinkage::buildVolatileAndReturnDependencies( TR::Node *callNode, TR::RegisterDependencyConditions *deps) { TR_ASSERT(deps != NULL, "expected register dependencies"); // Allocate virtual register for return value // TR::Register *integerReturnReg = NULL; TR::Register *longReturnReg = NULL; TR::Register *fpReturnReg = NULL; TR::Register *returnReg = NULL; // An alias for one of the above switch (callNode->getDataType()) { case TR::NoType: break; case TR::Int8: case TR::Int16: case TR::Int32: returnReg = integerReturnReg = cg()->allocateRegister(); break; case TR::Address: returnReg = integerReturnReg = cg()->allocateCollectedReferenceRegister(); break; case TR::Float: returnReg = fpReturnReg = cg()->allocateSinglePrecisionRegister(TR_X87); break; case TR::Double: returnReg = fpReturnReg = cg()->allocateRegister(TR_X87); break; case TR::Int64: returnReg = longReturnReg = (TR::Register*)cg()->allocateRegisterPair(cg()->allocateRegister(), cg()->allocateRegister()); break; case TR::Aggregate: default: TR_ASSERT(false, "return type still not supported"); } // Deps for volatile regs // // TODO: This should be less dependent on the real registers, but the way // _properties is set up makes that very hard. TR_ASSERT(_properties.getIntegerReturnRegister() == TR::RealRegister::eax, "assertion failure"); TR_ASSERT(_properties.getLongLowReturnRegister() == TR::RealRegister::eax, "assertion failure"); TR_ASSERT(_properties.getLongHighReturnRegister() == TR::RealRegister::edx, "assertion failure"); TR_ASSERT(_properties.getFloatReturnRegister() == TR::RealRegister::st0, "assertion failure"); if (longReturnReg) { deps->addPostCondition(returnReg->getLowOrder(), TR::RealRegister::eax, cg()); deps->addPostCondition(returnReg->getHighOrder(), TR::RealRegister::edx, cg()); } else if (integerReturnReg) { deps->addPostCondition(returnReg, TR::RealRegister::eax, cg()); deps->addPostCondition(cg()->allocateRegister(), TR::RealRegister::edx, cg()); } else { deps->addPostCondition(cg()->allocateRegister(), TR::RealRegister::eax, cg()); deps->addPostCondition(cg()->allocateRegister(), TR::RealRegister::edx, cg()); } deps->addPostCondition(cg()->allocateRegister(), TR::RealRegister::ecx, cg()); // st0 if (fpReturnReg) { deps->addPostCondition(returnReg, _properties.getFloatReturnRegister(), cg()); } else { // No need for a dummy dep here because FPREGSPILL instruction takes care of it } // The reg dependency is left open intentionally, and need to be closed by // the caller. The reason is because, child class might call this method, while // adding more register dependecies; if we close the reg dependency here, // the child class could add NO more register dependencies. return returnReg; }
void TR_ARMRegisterDependencyGroup::assignRegisters(TR::Instruction *currentInstruction, TR_RegisterKinds kindToBeAssigned, uint32_t numberOfRegisters, TR::CodeGenerator *cg) { TR::Compilation *comp = cg->comp(); TR::Machine *machine = cg->machine(); TR::Register *virtReg; TR::RealRegister::RegNum dependentRegNum; TR::RealRegister *dependentRealReg, *assignedRegister; uint32_t i, j; bool changed; if (!comp->getOption(TR_DisableOOL)) { for (i = 0; i< numberOfRegisters; i++) { virtReg = dependencies[i].getRegister(); dependentRegNum = dependencies[i].getRealRegister(); if (dependentRegNum == TR::RealRegister::SpilledReg) { TR_ASSERT(virtReg->getBackingStorage(),"should have a backing store if dependentRegNum == spillRegIndex()\n"); if (virtReg->getAssignedRealRegister()) { // this happens when the register was first spilled in main line path then was reverse spilled // and assigned to a real register in OOL path. We protected the backing store when doing // the reverse spill so we could re-spill to the same slot now traceMsg (comp,"\nOOL: Found register spilled in main line and re-assigned inside OOL"); TR::Node *currentNode = currentInstruction->getNode(); TR::RealRegister *assignedReg = toRealRegister(virtReg->getAssignedRegister()); TR::MemoryReference *tempMR = new (cg->trHeapMemory()) TR::MemoryReference(currentNode, (TR::SymbolReference*)virtReg->getBackingStorage()->getSymbolReference(), sizeof(uintptr_t), cg); TR_ARMOpCodes opCode; TR_RegisterKinds rk = virtReg->getKind(); switch (rk) { case TR_GPR: opCode = ARMOp_ldr; break; case TR_FPR: opCode = virtReg->isSinglePrecision() ? ARMOp_ldfs : ARMOp_ldfd; break; default: TR_ASSERT(0, "\nRegister kind not supported in OOL spill\n"); break; } TR::Instruction *inst = generateTrg1MemInstruction(cg, opCode, currentNode, assignedReg, tempMR, currentInstruction); assignedReg->setAssignedRegister(NULL); virtReg->setAssignedRegister(NULL); assignedReg->setState(TR::RealRegister::Free); if (comp->getDebug()) cg->traceRegisterAssignment("Generate reload of virt %s due to spillRegIndex dep at inst %p\n", cg->comp()->getDebug()->getName(virtReg),currentInstruction); cg->traceRAInstruction(inst); } if (!(std::find(cg->getSpilledRegisterList()->begin(), cg->getSpilledRegisterList()->end(), virtReg) != cg->getSpilledRegisterList()->end())) cg->getSpilledRegisterList()->push_front(virtReg); } // we also need to free up all locked backing storage if we are exiting the OOL during backwards RA assignment else if (currentInstruction->isLabel() && virtReg->getAssignedRealRegister()) { TR::ARMLabelInstruction *labelInstr = (TR::ARMLabelInstruction *)currentInstruction; TR_BackingStore *location = virtReg->getBackingStorage(); TR_RegisterKinds rk = virtReg->getKind(); int32_t dataSize; if (labelInstr->getLabelSymbol()->isStartOfColdInstructionStream() && location) { traceMsg (comp,"\nOOL: Releasing backing storage (%p)\n", location); if (rk == TR_GPR) dataSize = TR::Compiler->om.sizeofReferenceAddress(); else dataSize = 8; location->setMaxSpillDepth(0); cg->freeSpill(location,dataSize,0); virtReg->setBackingStorage(NULL); } } } } for (i = 0; i < numberOfRegisters; i++) { virtReg = dependencies[i].getRegister(); if (virtReg->getAssignedRealRegister()!=NULL) { if (dependencies[i].getRealRegister() == TR::RealRegister::NoReg) { virtReg->block(); } else { dependentRegNum = toRealRegister(virtReg->getAssignedRealRegister())->getRegisterNumber(); for (j=0; j<numberOfRegisters; j++) { if (dependentRegNum == dependencies[j].getRealRegister()) { virtReg->block(); break; } } } } } do { changed = false; for (i = 0; i < numberOfRegisters; i++) { virtReg = dependencies[i].getRegister(); dependentRegNum = dependencies[i].getRealRegister(); dependentRealReg = machine->getRealRegister(dependentRegNum); if (dependentRegNum != TR::RealRegister::NoReg && dependentRegNum != TR::RealRegister::SpilledReg && dependentRealReg->getState() == TR::RealRegister::Free) { machine->coerceRegisterAssignment(currentInstruction, virtReg, dependentRegNum); virtReg->block(); changed = true; } } } while (changed == true); do { changed = false; for (i = 0; i < numberOfRegisters; i++) { virtReg = dependencies[i].getRegister(); assignedRegister = NULL; if (virtReg->getAssignedRealRegister() != NULL) { assignedRegister = toRealRegister(virtReg->getAssignedRealRegister()); } dependentRegNum = dependencies[i].getRealRegister(); dependentRealReg = machine->getRealRegister(dependentRegNum); if (dependentRegNum != TR::RealRegister::NoReg && dependentRegNum != TR::RealRegister::SpilledReg && dependentRealReg != assignedRegister) { machine->coerceRegisterAssignment(currentInstruction, virtReg, dependentRegNum); virtReg->block(); changed = true; } } } while (changed == true); for (i=0; i<numberOfRegisters; i++) { if (dependencies[i].getRealRegister() == TR::RealRegister::NoReg) { bool excludeGPR0 = dependencies[i].getExcludeGPR0()?true:false; TR::RealRegister *realOne; virtReg = dependencies[i].getRegister(); realOne = virtReg->getAssignedRealRegister(); if (realOne!=NULL && excludeGPR0 && toRealRegister(realOne)->getRegisterNumber()==TR::RealRegister::gr0) { if ((assignedRegister = machine->findBestFreeRegister(virtReg->getKind(), true)) == NULL) { assignedRegister = machine->freeBestRegister(currentInstruction, virtReg->getKind(), NULL, true); } machine->coerceRegisterAssignment(currentInstruction, virtReg, assignedRegister->getRegisterNumber()); } else if (realOne == NULL) { if (virtReg->getTotalUseCount() == virtReg->getFutureUseCount()) { if ((assignedRegister = machine->findBestFreeRegister(virtReg->getKind(), excludeGPR0, true)) == NULL) { assignedRegister = machine->freeBestRegister(currentInstruction, virtReg->getKind(), NULL, excludeGPR0); } } else { assignedRegister = machine->reverseSpillState(currentInstruction, virtReg, NULL, excludeGPR0); } virtReg->setAssignedRegister(assignedRegister); assignedRegister->setAssignedRegister(virtReg); assignedRegister->setState(TR::RealRegister::Assigned); virtReg->block(); } } } unblockRegisters(numberOfRegisters); for (i = 0; i < numberOfRegisters; i++) { TR::Register *dependentRegister = getRegisterDependency(i)->getRegister(); if (dependentRegister->getAssignedRegister()) { TR::RealRegister *assignedRegister = dependentRegister->getAssignedRegister()->getRealRegister(); if (getRegisterDependency(i)->getRealRegister() == TR::RealRegister::NoReg) getRegisterDependency(i)->setRealRegister(toRealRegister(assignedRegister)->getRegisterNumber()); if (dependentRegister->decFutureUseCount() == 0) { dependentRegister->setAssignedRegister(NULL); assignedRegister->setAssignedRegister(NULL); assignedRegister->setState(TR::RealRegister::Unlatched); // Was setting to Free } } } }