TR::Instruction *OMR::Power::Linkage::saveArguments(TR::Instruction *cursor, bool fsd, bool saveOnly, List<TR::ParameterSymbol> &parmList) { #define REAL_REGISTER(ri) machine->getRealRegister(ri) #define REGNUM(ri) ((TR::RealRegister::RegNum)(ri)) const TR::PPCLinkageProperties& properties = self()->getProperties(); TR::Machine *machine = self()->machine(); TR::RealRegister *stackPtr = self()->cg()->getStackPointerRegister(); TR::ResolvedMethodSymbol *bodySymbol = self()->comp()->getJittedMethodSymbol(); ListIterator<TR::ParameterSymbol> paramIterator(&parmList); TR::ParameterSymbol *paramCursor; TR::Node *firstNode = self()->comp()->getStartTree()->getNode(); TR_BitVector freeScratchable; int32_t busyMoves[3][64]; int32_t busyIndex = 0, i1; bool all_saved = false; // the freeScratchable structure will not be used when saveOnly == true // no additional conditions were added with the intention of keeping the code easier to read // and not full of if conditions freeScratchable.init(TR::RealRegister::LastFPR + 1, self()->trMemory()); // first, consider all argument registers free for (i1=TR::RealRegister::FirstGPR; i1<=TR::RealRegister::LastFPR; i1++) { if (!properties.getReserved(REGNUM(i1))) { freeScratchable.set(i1); } } // second, go through all parameters and reset registers that are actually used for (paramCursor=paramIterator.getFirst(); paramCursor!=NULL; paramCursor=paramIterator.getNext()) { int32_t lri = paramCursor->getLinkageRegisterIndex(); TR::DataType type = paramCursor->getType(); if (lri >= 0) { TR::RealRegister::RegNum regNum; bool twoRegs = (TR::Compiler->target.is32Bit() && type.isInt64() && lri < properties.getNumIntArgRegs()-1); if (!type.isFloatingPoint()) { regNum = properties.getIntegerArgumentRegister(lri); if (paramCursor->isReferencedParameter()) freeScratchable.reset(regNum); if (twoRegs) if (paramCursor->isReferencedParameter()) freeScratchable.reset(regNum+1); } else { regNum = properties.getFloatArgumentRegister(lri); if (paramCursor->isReferencedParameter()) freeScratchable.reset(regNum); if (twoRegs) if (paramCursor->isReferencedParameter()) freeScratchable.reset(regNum+1); } } } for (paramCursor=paramIterator.getFirst(); paramCursor!=NULL; paramCursor=paramIterator.getNext()) { int32_t lri = paramCursor->getLinkageRegisterIndex(); int32_t ai = paramCursor->getAllocatedIndex(); int32_t offset = self()->calculateParameterRegisterOffset(paramCursor->getParameterOffset(), *paramCursor); TR::DataType type = paramCursor->getType(); int32_t dtype = type.getDataType(); // TODO: Is there an accurate assume to insert here ? if (lri >= 0) { if (!paramCursor->isReferencedParameter() && !paramCursor->isParmHasToBeOnStack()) continue; TR::RealRegister::RegNum regNum; bool twoRegs = (TR::Compiler->target.is32Bit() && type.isInt64() && lri < properties.getNumIntArgRegs()-1); if (type.isFloatingPoint()) regNum = properties.getFloatArgumentRegister(lri); else regNum = properties.getIntegerArgumentRegister(lri); // Do not save arguments to the stack if in Full Speed Debug and saveOnly is not set. // If not in Full Speed Debug, the arguments will be saved. if (((ai<0 || self()->hasToBeOnStack(paramCursor)) && !fsd) || (fsd && saveOnly)) { switch (dtype) { case TR::Int8: case TR::Int16: case TR::Int32: { TR::InstOpCode::Mnemonic op = TR::InstOpCode::stw; if (!all_saved) cursor = generateMemSrc1Instruction(self()->cg(), op, firstNode, new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 4, self()->cg()), REAL_REGISTER(regNum), cursor); } break; case TR::Address: if (!all_saved) cursor = generateMemSrc1Instruction(self()->cg(),TR::InstOpCode::Op_st, firstNode, new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, TR::Compiler->om.sizeofReferenceAddress(), self()->cg()), REAL_REGISTER(regNum), cursor); break; case TR::Int64: if (!all_saved) cursor = generateMemSrc1Instruction(self()->cg(),TR::InstOpCode::Op_st, firstNode, new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, TR::Compiler->om.sizeofReferenceAddress(), self()->cg()), REAL_REGISTER(regNum), cursor); if (twoRegs) { if (!all_saved) cursor = generateMemSrc1Instruction(self()->cg(), TR::InstOpCode::stw, firstNode, new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset+4, 4, self()->cg()), REAL_REGISTER(REGNUM(regNum+1)), cursor); if (ai<0) freeScratchable.set(regNum+1); } break; case TR::Float: cursor = generateMemSrc1Instruction(self()->cg(), TR::InstOpCode::stfs, firstNode, new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 4, self()->cg()), REAL_REGISTER(regNum), cursor); break; case TR::Double: cursor = generateMemSrc1Instruction(self()->cg(), TR::InstOpCode::stfd, firstNode, new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 8, self()->cg()), REAL_REGISTER(regNum), cursor); break; default: TR_ASSERT(false, "assertion failure"); break; } if (ai<0) freeScratchable.set(regNum); } // Global register is allocated to this argument. // Don't process if in Full Speed Debug and saveOnly is set if (ai>=0 && (!fsd || !saveOnly)) { if (regNum != ai) // Equal assignment: do nothing { if (freeScratchable.isSet(ai)) { cursor = generateTrg1Src1Instruction(self()->cg(), (type.isFloatingPoint()) ? TR::InstOpCode::fmr:TR::InstOpCode::mr, firstNode, REAL_REGISTER(REGNUM(ai)), REAL_REGISTER(regNum), cursor); freeScratchable.reset(ai); freeScratchable.set(regNum); } else // The status of target global register is unclear (i.e. it is a arg reg) { busyMoves[0][busyIndex] = regNum; busyMoves[1][busyIndex] = ai; busyMoves[2][busyIndex] = 0; busyIndex++; } } if (TR::Compiler->target.is32Bit() && type.isInt64()) { int32_t aiLow = paramCursor->getAllocatedLow(); if (!twoRegs) // Low part needs to come from memory { offset += 4; // We are dealing with the low part if (freeScratchable.isSet(aiLow)) { cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lwz, firstNode, REAL_REGISTER(REGNUM(aiLow)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 4, self()->cg()), cursor); freeScratchable.reset(aiLow); } else { busyMoves[0][busyIndex] = offset; busyMoves[1][busyIndex] = aiLow; busyMoves[2][busyIndex] = 1; busyIndex++; } } else if (regNum+1 != aiLow) // Low part needs to be moved { if (freeScratchable.isSet(aiLow)) { cursor = generateTrg1Src1Instruction(self()->cg(), TR::InstOpCode::mr, firstNode, REAL_REGISTER(REGNUM(aiLow)), REAL_REGISTER(REGNUM(regNum+1)), cursor); freeScratchable.reset(aiLow); freeScratchable.set(regNum+1); } else { busyMoves[0][busyIndex] = regNum+1; busyMoves[1][busyIndex] = aiLow; busyMoves[2][busyIndex] = 0; busyIndex++; } } } } } // Don't process if in Full Speed Debug and saveOnly is set else if (ai >= 0 && (!fsd || !saveOnly)) // lri<0: arg needs to come from memory { switch (dtype) { case TR::Int8: case TR::Int16: case TR::Int32: if (freeScratchable.isSet(ai)) { cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lwz, firstNode, REAL_REGISTER(REGNUM(ai)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 4, self()->cg()), cursor); freeScratchable.reset(ai); } else { busyMoves[0][busyIndex] = offset; busyMoves[1][busyIndex] = ai; busyMoves[2][busyIndex] = 1; busyIndex++; } break; case TR::Address: if (freeScratchable.isSet(ai)) { cursor = generateTrg1MemInstruction(self()->cg(),TR::InstOpCode::Op_load, firstNode, REAL_REGISTER(REGNUM(ai)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, TR::Compiler->om.sizeofReferenceAddress(), self()->cg()), cursor); freeScratchable.reset(ai); } else { busyMoves[0][busyIndex] = offset; busyMoves[1][busyIndex] = ai; if (TR::Compiler->target.is64Bit()) busyMoves[2][busyIndex] = 2; else busyMoves[2][busyIndex] = 1; busyIndex++; } break; case TR::Int64: if (TR::Compiler->target.is64Bit()) { if (freeScratchable.isSet(ai)) { cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::ld, firstNode, REAL_REGISTER(REGNUM(ai)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 8, self()->cg()), cursor); freeScratchable.reset(ai); } else { busyMoves[0][busyIndex] = offset; busyMoves[1][busyIndex] = ai; busyMoves[2][busyIndex] = 2; busyIndex++; } } else // 32-bit { if (freeScratchable.isSet(ai)) { cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lwz, firstNode, REAL_REGISTER(REGNUM(ai)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 4, self()->cg()), cursor); freeScratchable.reset(ai); } else { busyMoves[0][busyIndex] = offset; busyMoves[1][busyIndex] = ai; busyMoves[2][busyIndex] = 1; busyIndex++; } ai = paramCursor->getAllocatedLow(); if (freeScratchable.isSet(ai)) { cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lwz, firstNode, REAL_REGISTER(REGNUM(ai)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset+4, 4, self()->cg()), cursor); freeScratchable.reset(ai); } else { busyMoves[0][busyIndex] = offset+4; busyMoves[1][busyIndex] = ai; busyMoves[2][busyIndex] = 1; busyIndex++; } } break; case TR::Float: if (freeScratchable.isSet(ai)) { cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lfs, firstNode, REAL_REGISTER(REGNUM(ai)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 4, self()->cg()), cursor); freeScratchable.reset(ai); } else { busyMoves[0][busyIndex] = offset; busyMoves[1][busyIndex] = ai; busyMoves[2][busyIndex] = 3; busyIndex++; } break; case TR::Double: if (freeScratchable.isSet(ai)) { cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lfd, firstNode, REAL_REGISTER(REGNUM(ai)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, offset, 8, self()->cg()), cursor); freeScratchable.reset(ai); } else { busyMoves[0][busyIndex] = offset; busyMoves[1][busyIndex] = ai; busyMoves[2][busyIndex] = 4; busyIndex++; } break; default: break; } } } if (!fsd || !saveOnly) { bool freeMore = true; int32_t numMoves = busyIndex; while (freeMore && numMoves>0) { freeMore = false; for (i1=0; i1<busyIndex; i1++) { int32_t source = busyMoves[0][i1]; int32_t target = busyMoves[1][i1]; if (!(target<0) && freeScratchable.isSet(target)) { switch(busyMoves[2][i1]) { case 0: cursor = generateTrg1Src1Instruction(self()->cg(), (source<=TR::RealRegister::LastGPR)?TR::InstOpCode::mr:TR::InstOpCode::fmr, firstNode, REAL_REGISTER(REGNUM(target)), REAL_REGISTER(REGNUM(source)), cursor); freeScratchable.set(source); break; case 1: cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lwz, firstNode, REAL_REGISTER(REGNUM(target)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, source, 4, self()->cg()), cursor); break; case 2: cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::ld, firstNode, REAL_REGISTER(REGNUM(target)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, source, 8, self()->cg()), cursor); break; case 3: cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lfs, firstNode, REAL_REGISTER(REGNUM(target)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, source, 4, self()->cg()), cursor); break; case 4: cursor = generateTrg1MemInstruction(self()->cg(), TR::InstOpCode::lfd, firstNode, REAL_REGISTER(REGNUM(target)), new (self()->trHeapMemory()) TR::MemoryReference(stackPtr, source, 8, self()->cg()), cursor); break; } freeScratchable.reset(target); freeMore = true; busyMoves[0][i1] = busyMoves[1][i1] = -1; numMoves--; } } } TR_ASSERT(numMoves<=0, "Circular argument register dependency can and should be avoided."); } return(cursor); }
void OMR::CodeGenPhase::performSetupForInstructionSelectionPhase(TR::CodeGenerator * cg, TR::CodeGenPhase * phase) { TR::Compilation *comp = cg->comp(); if (TR::Compiler->target.cpu.isZ() && TR::Compiler->om.shouldGenerateReadBarriersForFieldLoads()) { // TODO (GuardedStorage): We need to come up with a better solution than anchoring aloadi's // to enforce certain evaluation order traceMsg(comp, "GuardedStorage: in performSetupForInstructionSelectionPhase\n"); auto mapAllocator = getTypedAllocator<std::pair<TR::TreeTop*, TR::TreeTop*> >(comp->allocator()); std::map<TR::TreeTop*, TR::TreeTop*, std::less<TR::TreeTop*>, TR::typed_allocator<std::pair<TR::TreeTop* const, TR::TreeTop*>, TR::Allocator> > currentTreeTopToappendTreeTop(std::less<TR::TreeTop*> (), mapAllocator); TR_BitVector *unAnchorableAloadiNodes = comp->getBitVectorPool().get(); for (TR::PreorderNodeIterator iter(comp->getStartTree(), comp); iter != NULL; ++iter) { TR::Node *node = iter.currentNode(); traceMsg(comp, "GuardedStorage: Examining node = %p\n", node); // isNullCheck handles both TR::NULLCHK and TR::ResolveAndNULLCHK // both of which do not operate on their child but their // grandchild (or greatgrandchild). if (node->getOpCode().isNullCheck()) { // An aloadi cannot be anchored if there is a Null Check on // its child. There are two situations where this occurs. // The first is when doing an aloadi off some node that is // being NULLCHK'd (see Ex1). The second is when doing an // icalli in which case the aloadi loads the VFT of an // object that must be NULLCHK'd (see Ex2). // // Ex1: // n1n NULLCHK on n3n // n2n aloadi f <-- First Child And Parent of Null Chk'd Node // n3n aload O // // Ex2: // n1n NULLCHK on n4n // n2n icall foo <-- First Child // n3n aloadi <vft> <-- Parent of Null Chk'd Node // n4n aload O // n4n ==> aload O TR::Node *nodeBeingNullChkd = node->getNullCheckReference(); if (nodeBeingNullChkd) { TR::Node *firstChild = node->getFirstChild(); TR::Node *parentOfNullChkdNode = NULL; if (firstChild->getOpCode().isCall() && firstChild->getOpCode().isIndirect()) { parentOfNullChkdNode = firstChild->getFirstChild(); } else { parentOfNullChkdNode = firstChild; } if (parentOfNullChkdNode && parentOfNullChkdNode->getOpCodeValue() == TR::aloadi && parentOfNullChkdNode->getNumChildren() > 0 && parentOfNullChkdNode->getFirstChild() == nodeBeingNullChkd) { unAnchorableAloadiNodes->set(parentOfNullChkdNode->getGlobalIndex()); traceMsg(comp, "GuardedStorage: Cannot anchor %p\n", firstChild); } } } else { bool shouldAnchorNode = false; if (node->getOpCodeValue() == TR::aloadi && !unAnchorableAloadiNodes->isSet(node->getGlobalIndex())) { shouldAnchorNode = true; } else if (node->getOpCodeValue() == TR::aload && node->getSymbol()->isStatic() && node->getSymbol()->isCollectedReference()) { shouldAnchorNode = true; } if (shouldAnchorNode) { TR::TreeTop* anchorTreeTop = TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, node)); TR::TreeTop* appendTreeTop = iter.currentTree(); if (currentTreeTopToappendTreeTop.count(appendTreeTop) > 0) { appendTreeTop = currentTreeTopToappendTreeTop[appendTreeTop]; } // Anchor the aload/aloadi before the current treetop appendTreeTop->insertBefore(anchorTreeTop); currentTreeTopToappendTreeTop[iter.currentTree()] = anchorTreeTop; traceMsg(comp, "GuardedStorage: Anchored %p to treetop = %p\n", node, anchorTreeTop); } } } comp->getBitVectorPool().release(unAnchorableAloadiNodes); } if (cg->shouldBuildStructure() && (comp->getFlowGraph()->getStructure() != NULL)) { TR_Structure *rootStructure = TR_RegionAnalysis::getRegions(comp); comp->getFlowGraph()->setStructure(rootStructure); } phase->reportPhase(SetupForInstructionSelectionPhase); // Dump preIR if (comp->getOption(TR_TraceRegisterPressureDetails) && !comp->getOption(TR_DisableRegisterPressureSimulation)) { traceMsg(comp, " { Post optimization register pressure simulation\n"); TR_BitVector emptyBitVector; vcount_t vc = comp->incVisitCount(); cg->initializeRegisterPressureSimulator(); for (TR::Block *block = comp->getStartBlock(); block; block = block->getNextExtendedBlock()) { TR_LinkHead<TR_RegisterCandidate> emptyCandidateList; TR::CodeGenerator::TR_RegisterPressureState state(NULL, 0, emptyBitVector, emptyBitVector, &emptyCandidateList, cg->getNumberOfGlobalGPRs(), cg->getNumberOfGlobalFPRs(), cg->getNumberOfGlobalVRFs(), vc); TR::CodeGenerator::TR_RegisterPressureSummary summary(state._gprPressure, state._fprPressure, state._vrfPressure); cg->simulateBlockEvaluation(block, &state, &summary); } traceMsg(comp, " }\n"); } TR::LexicalMemProfiler mp(phase->getName(), comp->phaseMemProfiler()); LexicalTimer pt(phase->getName(), comp->phaseTimer()); cg->setUpForInstructionSelection(); }