void TR::ARMSystemLinkage::createEpilogue(TR::Instruction *cursor) { TR::CodeGenerator *codeGen = cg(); const TR::ARMLinkageProperties& properties = getProperties(); TR::Machine *machine = codeGen->machine(); TR::Node *lastNode = cursor->getNode(); TR::ResolvedMethodSymbol* bodySymbol = comp()->getJittedMethodSymbol(); TR::RealRegister *stackPtr = machine->getRealRegister(properties.getStackPointerRegister()); // restore link register (r14) auto *stackSlot = new (trHeapMemory()) TR::MemoryReference(stackPtr, bodySymbol->getLocalMappingCursor(), codeGen); cursor = generateMemSrc1Instruction(cg(), ARMOp_ldr, lastNode, stackSlot, machine->getRealRegister(TR::RealRegister::gr14), cursor); // restore all preserved registers for (int r = TR::RealRegister::gr4; r <= TR::RealRegister::gr11; ++r) { auto *stackSlot = new (trHeapMemory()) TR::MemoryReference(stackPtr, (TR::RealRegister::gr11 - r + 1)*4 + bodySymbol->getLocalMappingCursor(), codeGen); cursor = generateMemSrc1Instruction(cg(), ARMOp_ldr, lastNode, stackSlot, machine->getRealRegister((TR::RealRegister::RegNum)r), cursor); } // remove space for preserved registers auto frameSize = codeGen->getFrameSizeInBytes(); cursor = generateTrg1Src1ImmInstruction(codeGen, ARMOp_add, lastNode, stackPtr, stackPtr, frameSize, 0, cursor); // return using `mov r15, r14` TR::RealRegister *gr14 = machine->getRealRegister(TR::RealRegister::gr14); TR::RealRegister *gr15 = machine->getRealRegister(TR::RealRegister::gr15); cursor = generateTrg1Src1Instruction(codeGen, ARMOp_mov, lastNode, gr15, gr14, cursor); }
void TR::ARM64SystemLinkage::createEpilogue(TR::Instruction *cursor) { TR::CodeGenerator *codeGen = cg(); const TR::ARM64LinkageProperties& properties = getProperties(); TR::Machine *machine = codeGen->machine(); TR::Node *lastNode = cursor->getNode(); TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol(); TR::RealRegister *sp = machine->getRealRegister(properties.getStackPointerRegister()); // restore callee-saved registers uint32_t offset = bodySymbol->getLocalMappingCursor(); for (int r = TR::RealRegister::x19; r <= TR::RealRegister::x28; r++) { TR::RealRegister *rr = machine->getRealRegister((TR::RealRegister::RegNum)r); if (rr->getHasBeenAssignedInMethod()) { TR::MemoryReference *stackSlot = new (trHeapMemory()) TR::MemoryReference(sp, offset, codeGen); cursor = generateTrg1MemInstruction(cg(), TR::InstOpCode::ldrimmx, lastNode, rr, stackSlot, cursor); offset += 8; } } for (int r = TR::RealRegister::v8; r <= TR::RealRegister::v15; r++) { TR::RealRegister *rr = machine->getRealRegister((TR::RealRegister::RegNum)r); if (rr->getHasBeenAssignedInMethod()) { TR::MemoryReference *stackSlot = new (trHeapMemory()) TR::MemoryReference(sp, offset, codeGen); cursor = generateTrg1MemInstruction(cg(), TR::InstOpCode::vldrimmd, lastNode, rr, stackSlot, cursor); offset += 8; } } // restore link register (x30) TR::RealRegister *lr = machine->getRealRegister(TR::RealRegister::lr); if (machine->getLinkRegisterKilled()) { TR::MemoryReference *stackSlot = new (trHeapMemory()) TR::MemoryReference(sp, 0, codeGen); cursor = generateTrg1MemInstruction(cg(), TR::InstOpCode::ldrimmx, lastNode, lr, stackSlot, cursor); } // remove space for preserved registers uint32_t frameSize = codeGen->getFrameSizeInBytes(); if (constantIsUnsignedImm12(frameSize)) { cursor = generateTrg1Src1ImmInstruction(codeGen, TR::InstOpCode::addimmx, lastNode, sp, sp, frameSize, cursor); } else { TR_UNIMPLEMENTED(); } // return cursor = generateRegBranchInstruction(codeGen, TR::InstOpCode::ret, lastNode, lr, cursor); }
uint32_t FrontEnd::calculateSizeOfStackAtlas( bool encodeFourByteOffsets, uint32_t numberOfSlotsMapped, uint32_t bytesPerStackMap, TR::Compilation *comp) { TR::CodeGenerator *cg = comp->cg(); TR::GCStackAtlas * stackAtlas = cg->getStackAtlas(); // Calculate the size of each individual map in the atlas. The fixed // portion of the map contains: // // Low Code Offset (2 or 4) // Stack map (depends on # of mapped parms/locals) // uint32_t sizeOfEncodedCodeOffsetInBytes = encodeFourByteOffsets ? 4 : 2; uint32_t sizeOfSingleEncodedMapInBytes = sizeOfEncodedCodeOffsetInBytes; sizeOfSingleEncodedMapInBytes += bytesPerStackMap; // Calculate the atlas size // uint32_t atlasSize = sizeof(OMR::StackAtlasPOD); ListIterator<TR_GCStackMap> mapIterator(&stackAtlas->getStackMapList()); TR_GCStackMap *mapCursor = mapIterator.getFirst(); while (mapCursor != NULL) { TR_GCStackMap *nextMapCursor = mapIterator.getNext(); if (!mapsAreIdentical(mapCursor, nextMapCursor, stackAtlas, comp)) { atlasSize += sizeOfSingleEncodedMapInBytes; } mapCursor = nextMapCursor; } return atlasSize; }
void TR::ARM64SystemLinkage::createPrologue(TR::Instruction *cursor, List<TR::ParameterSymbol> &parmList) { TR::CodeGenerator *codeGen = cg(); TR::Machine *machine = codeGen->machine(); TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol(); const TR::ARM64LinkageProperties& properties = getProperties(); TR::RealRegister *sp = machine->getRealRegister(properties.getStackPointerRegister()); TR::Node *firstNode = comp()->getStartTree()->getNode(); // allocate stack space uint32_t frameSize = (uint32_t)codeGen->getFrameSizeInBytes(); if (constantIsUnsignedImm12(frameSize)) { cursor = generateTrg1Src1ImmInstruction(codeGen, TR::InstOpCode::subimmx, firstNode, sp, sp, frameSize, cursor); } else { TR_UNIMPLEMENTED(); } // save link register (x30) if (machine->getLinkRegisterKilled()) { TR::MemoryReference *stackSlot = new (trHeapMemory()) TR::MemoryReference(sp, 0, codeGen); cursor = generateMemSrc1Instruction(cg(), TR::InstOpCode::strimmx, firstNode, stackSlot, machine->getRealRegister(TR::RealRegister::x30), cursor); } // spill argument registers int32_t nextIntArgReg = 0; int32_t nextFltArgReg = 0; ListIterator<TR::ParameterSymbol> parameterIterator(&parmList); for (TR::ParameterSymbol *parameter = parameterIterator.getFirst(); parameter != NULL && (nextIntArgReg < getProperties().getNumIntArgRegs() || nextFltArgReg < getProperties().getNumFloatArgRegs()); parameter = parameterIterator.getNext()) { TR::MemoryReference *stackSlot = new (trHeapMemory()) TR::MemoryReference(sp, parameter->getParameterOffset(), codeGen); TR::InstOpCode::Mnemonic op; switch (parameter->getDataType()) { case TR::Int8: case TR::Int16: case TR::Int32: case TR::Int64: case TR::Address: if (nextIntArgReg < getProperties().getNumIntArgRegs()) { op = (parameter->getSize() == 8) ? TR::InstOpCode::strimmx : TR::InstOpCode::strimmw; cursor = generateMemSrc1Instruction(cg(), op, firstNode, stackSlot, machine->getRealRegister((TR::RealRegister::RegNum)(TR::RealRegister::x0 + nextIntArgReg)), cursor); nextIntArgReg++; } else { nextIntArgReg = getProperties().getNumIntArgRegs() + 1; } break; case TR::Float: case TR::Double: if (nextFltArgReg < getProperties().getNumFloatArgRegs()) { op = (parameter->getSize() == 8) ? TR::InstOpCode::vstrimmd : TR::InstOpCode::vstrimms; cursor = generateMemSrc1Instruction(cg(), op, firstNode, stackSlot, machine->getRealRegister((TR::RealRegister::RegNum)(TR::RealRegister::v0 + nextFltArgReg)), cursor); nextFltArgReg++; } else { nextFltArgReg = getProperties().getNumFloatArgRegs() + 1; } break; case TR::Aggregate: TR_ASSERT(false, "Function parameters of aggregate types are not currently supported on AArch64."); break; default: TR_ASSERT(false, "Unknown parameter type."); } } // save callee-saved registers uint32_t offset = bodySymbol->getLocalMappingCursor(); for (int r = TR::RealRegister::x19; r <= TR::RealRegister::x28; r++) { TR::RealRegister *rr = machine->getRealRegister((TR::RealRegister::RegNum)r); if (rr->getHasBeenAssignedInMethod()) { TR::MemoryReference *stackSlot = new (trHeapMemory()) TR::MemoryReference(sp, offset, codeGen); cursor = generateMemSrc1Instruction(cg(), TR::InstOpCode::strimmx, firstNode, stackSlot, rr, cursor); offset += 8; } } for (int r = TR::RealRegister::v8; r <= TR::RealRegister::v15; r++) { TR::RealRegister *rr = machine->getRealRegister((TR::RealRegister::RegNum)r); if (rr->getHasBeenAssignedInMethod()) { TR::MemoryReference *stackSlot = new (trHeapMemory()) TR::MemoryReference(sp, offset, codeGen); cursor = generateMemSrc1Instruction(cg(), TR::InstOpCode::vstrimmd, firstNode, stackSlot, rr, cursor); offset += 8; } } }
void TR::ARMSystemLinkage::createPrologue(TR::Instruction *cursor) { TR::CodeGenerator *codeGen = cg(); const TR::ARMLinkageProperties& properties = getProperties(); TR::Machine *machine = codeGen->machine(); TR::ResolvedMethodSymbol* bodySymbol = comp()->getJittedMethodSymbol(); TR::Node *firstNode = comp()->getStartTree()->getNode(); TR::RealRegister *stackPtr = machine->getRealRegister(properties.getStackPointerRegister()); // Entry breakpoint // if (comp()->getOption(TR_EntryBreakPoints)) { cursor = new (trHeapMemory()) TR::Instruction(cursor, ARMOp_bad, firstNode, cg()); } // allocate stack space auto frameSize = codeGen->getFrameSizeInBytes(); cursor = generateTrg1Src1ImmInstruction(codeGen, ARMOp_sub, firstNode, stackPtr, stackPtr, frameSize, 0, cursor); // spill argument registers auto nextIntArgReg = 0; auto nextFltArgReg = 0; ListIterator<TR::ParameterSymbol> parameterIterator(&bodySymbol->getParameterList()); for (TR::ParameterSymbol *parameter = parameterIterator.getFirst(); parameter!=NULL && (nextIntArgReg < getProperties().getNumIntArgRegs() || nextFltArgReg < getProperties().getNumFloatArgRegs()); parameter=parameterIterator.getNext()) { auto *stackSlot = new (trHeapMemory()) TR::MemoryReference(stackPtr, parameter->getParameterOffset(), codeGen); switch (parameter->getDataType()) { case TR::Int8: case TR::Int16: case TR::Int32: case TR::Address: if (nextIntArgReg < getProperties().getNumIntArgRegs()) { cursor = generateMemSrc1Instruction(cg(), ARMOp_str, firstNode, stackSlot, machine->getRealRegister((TR::RealRegister::RegNum)(TR::RealRegister::gr0 + nextIntArgReg)), cursor); nextIntArgReg++; } else { nextIntArgReg = getProperties().getNumIntArgRegs() + 1; } break; case TR::Int64: nextIntArgReg += nextIntArgReg & 0x1; // round to next even number if (nextIntArgReg + 1 < getProperties().getNumIntArgRegs()) { cursor = generateMemSrc1Instruction(cg(), ARMOp_str, firstNode, stackSlot, machine->getRealRegister((TR::RealRegister::RegNum)(TR::RealRegister::gr0 + nextIntArgReg)), cursor); stackSlot = new (trHeapMemory()) TR::MemoryReference(stackPtr, parameter->getParameterOffset() + 4, codeGen); cursor = generateMemSrc1Instruction(cg(), ARMOp_str, firstNode, stackSlot, machine->getRealRegister((TR::RealRegister::RegNum)(TR::RealRegister::gr0 + nextIntArgReg + 1)), cursor); nextIntArgReg += 2; } else { nextIntArgReg = getProperties().getNumIntArgRegs() + 1; } break; case TR::Float: comp()->failCompilation<UnsupportedParameterType>("Compiling methods with a single precision floating point parameter is not supported"); break; case TR::Double: if (nextFltArgReg < getProperties().getNumFloatArgRegs()) { cursor = generateMemSrc1Instruction(cg(), ARMOp_fstd, firstNode, stackSlot, machine->getRealRegister((TR::RealRegister::RegNum)(TR::RealRegister::fp0 + nextFltArgReg)), cursor); nextFltArgReg += 1; } else { nextFltArgReg = getProperties().getNumFloatArgRegs() + 1; } break; case TR::Aggregate: TR_ASSERT(false, "Function parameters of aggregate types are not currently supported on ARM."); } } // save all preserved registers for (int r = TR::RealRegister::gr4; r <= TR::RealRegister::gr11; ++r) { auto *stackSlot = new (trHeapMemory()) TR::MemoryReference(stackPtr, (TR::RealRegister::gr11 - r + 1)*4 + bodySymbol->getLocalMappingCursor(), codeGen); cursor = generateMemSrc1Instruction(cg(), ARMOp_str, firstNode, stackSlot, machine->getRealRegister((TR::RealRegister::RegNum)r), cursor); } // save link register (r14) auto *stackSlot = new (trHeapMemory()) TR::MemoryReference(stackPtr, bodySymbol->getLocalMappingCursor(), codeGen); cursor = generateMemSrc1Instruction(cg(), ARMOp_str, firstNode, stackSlot, machine->getRealRegister(TR::RealRegister::gr14), cursor); }
TR::Node * OMR::TransformUtil::scalarizeArrayCopy( TR::Compilation *comp, TR::Node *node, TR::TreeTop *tt, bool useElementType, bool &didTransformArrayCopyNode, TR::SymbolReference *sourceRef, TR::SymbolReference *targetRef, bool castToIntegral) { TR::CodeGenerator *cg = comp->cg(); didTransformArrayCopyNode = false; if ((comp->getOptLevel() == noOpt) || !comp->getOption(TR_ScalarizeSSOps) || node->getOpCodeValue() != TR::arraycopy || node->getNumChildren() != 3 || comp->requiresSpineChecks() || !node->getChild(2)->getOpCode().isLoadConst() || cg->getOptimizationPhaseIsComplete()) return node; int64_t byteLen = node->getChild(2)->get64bitIntegralValue(); if (byteLen == 0) { if (tt) { // Anchor the first two children if (!node->getFirstChild()->safeToDoRecursiveDecrement()) TR::TreeTop::create(comp, tt->getPrevTreeTop(), TR::Node::create(TR::treetop, 1, node->getFirstChild())); if (!node->getSecondChild()->safeToDoRecursiveDecrement()) TR::TreeTop::create(comp, tt->getPrevTreeTop(), TR::Node::create(TR::treetop, 1, node->getSecondChild())); tt->getPrevTreeTop()->join(tt->getNextTreeTop()); tt->getNode()->recursivelyDecReferenceCount(); didTransformArrayCopyNode = true; } return node; } else if (byteLen < 0) { return node; } else if (byteLen > TR_MAX_OTYPE_SIZE) { return node; } TR::DataType dataType = TR::Aggregate; // Get the element datatype from the (hidden) 4th child TR::DataType elementType = node->getArrayCopyElementType(); int32_t elementSize = TR::Symbol::convertTypeToSize(elementType); if (byteLen == elementSize) { dataType = elementType; } else if (!useElementType) { switch (byteLen) { case 1: dataType = TR::Int8; break; case 2: dataType = TR::Int16; break; case 4: dataType = TR::Int32; break; case 8: dataType = TR::Int64; break; } } else { return node; } // load/store double on 64-bit PPC requires offset to be word aligned // abort if this requirement is not met. // TODO: also need to check if the first two children are aload nodes bool cannot_use_load_store_long = false; if (TR::Compiler->target.cpu.isPower()) if (dataType == TR::Int64 && TR::Compiler->target.is64Bit()) { TR::Node * firstChild = node->getFirstChild(); if (firstChild->getNumChildren() == 2) { TR::Node *offsetChild = firstChild->getSecondChild(); TR_ASSERT(offsetChild->getOpCodeValue() != TR::iconst, "iconst shouldn't be used for 64-bit array indexing"); if (offsetChild->getOpCodeValue() == TR::lconst) { if ((offsetChild->getLongInt() & 0x3) != 0) cannot_use_load_store_long = true; } } TR::Node *secondChild = node->getSecondChild(); if (secondChild->getNumChildren() == 2) { TR::Node *offsetChild = secondChild->getSecondChild(); TR_ASSERT(offsetChild->getOpCodeValue() != TR::iconst, "iconst shouldn't be used for 64-bit array indexing"); if (offsetChild->getOpCodeValue() == TR::lconst) { if ((offsetChild->getLongInt() & 0x3) != 0) cannot_use_load_store_long = true; } } } if (cannot_use_load_store_long) return node; TR::SymbolReference *nodeRef; targetRef = comp->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0); sourceRef = targetRef; bool trace = comp->getOption(TR_TraceScalarizeSSOps); if (trace) traceMsg(comp,"scalarizeArrayCopy: node %p got targetRef (#%d) and sourceRef (#%d)\n", node,targetRef?targetRef->getReferenceNumber():-1,sourceRef?sourceRef->getReferenceNumber():-1); if (targetRef == NULL || sourceRef == NULL) { if (trace) traceMsg(comp,"do not scalarizeArrayCopy node %p : targetRef is NULL (%s) or sourceRef is NULL (%s)\n",node,targetRef?"no":"yes",sourceRef?"no":"yes"); return node; } #ifdef J9_PROJECT_SPECIFIC if (targetRef->getSymbol()->getDataType().isBCD() || sourceRef->getSymbol()->getDataType().isBCD()) { return node; } #endif if (performTransformation(comp, "%sScalarize arraycopy 0x%p\n", OPT_DETAILS, node)) { TR::Node *store = TR::TransformUtil::scalarizeAddressParameter(comp, node->getSecondChild(), byteLen, dataType, targetRef, true); TR::Node *load = TR::TransformUtil::scalarizeAddressParameter(comp, node->getFirstChild(), byteLen, dataType, sourceRef, false); if (tt) { // Transforming // treetop // arrayCopy <-- node // into // *store // node->recursivelyDecReferenceCount(); tt->setNode(node); } else { for (int16_t c = node->getNumChildren() - 1; c >= 0; c--) cg->recursivelyDecReferenceCount(node->getChild(c)); } TR::Node::recreate(node, store->getOpCodeValue()); node->setSymbolReference(store->getSymbolReference()); if (store->getOpCode().isStoreIndirect()) { node->setChild(0, store->getFirstChild()); node->setAndIncChild(1, load); node->setNumChildren(2); } else { node->setAndIncChild(0, load); node->setNumChildren(1); } didTransformArrayCopyNode = true; } return node; }
uint8_t * FrontEnd::createStackAtlas( bool encodeFourByteOffsets, uint32_t numberOfSlotsMapped, uint32_t bytesPerStackMap, uint8_t *encodedAtlasBaseAddress, uint32_t atlasSizeInBytes, TR::Compilation *comp) { TR::CodeGenerator *cg = comp->cg(); TR::GCStackAtlas *stackAtlas = cg->getStackAtlas(); stackAtlas->setAtlasBits(encodedAtlasBaseAddress); // Calculate the size of each individual map in the atlas. The fixed // portion of the map contains: // // Low Code Offset (2 or 4) // Stack map (depends on # of mapped parms/locals) // uint32_t sizeOfEncodedCodeOffsetInBytes = encodeFourByteOffsets ? 4 : 2; uint32_t sizeOfSingleEncodedMapInBytes = sizeOfEncodedCodeOffsetInBytes; sizeOfSingleEncodedMapInBytes += bytesPerStackMap; // Encode the atlas // OMR::StackAtlasPOD *pyAtlas = (OMR::StackAtlasPOD *)encodedAtlasBaseAddress; pyAtlas->numberOfMaps = stackAtlas->getNumberOfMaps(); pyAtlas->bytesPerStackMap = bytesPerStackMap; // Offset to the MAPPED pyFrameObject parameter // pyAtlas->frameObjectParmOffset = 0; // Lowest stack offset where MAPPED locals begin. // pyAtlas->localBaseOffset = stackAtlas->getLocalBaseOffset(); // Abort if we have overflowed the fields in pyAtlas. // if (bytesPerStackMap > USHRT_MAX || stackAtlas->getNumberOfMaps() > USHRT_MAX || stackAtlas->getNumberOfParmSlotsMapped() > USHRT_MAX || stackAtlas->getParmBaseOffset() < SHRT_MIN || stackAtlas->getParmBaseOffset() > SHRT_MAX || stackAtlas->getLocalBaseOffset() < SHRT_MIN || stackAtlas->getLocalBaseOffset() > SHRT_MAX) { comp->failCompilation<TR::CompilationException>("Overflowed the fields in pyAtlas"); } // Maps are in reverse order in list from what we want in the atlas // so advance to the address where the last map should go and start // building the maps moving back toward the beginning of the atlas. // uint8_t *cursorInEncodedAtlas = encodedAtlasBaseAddress + atlasSizeInBytes; ListIterator<TR_GCStackMap> mapIterator(&stackAtlas->getStackMapList()); TR_GCStackMap *mapCursor = mapIterator.getFirst(); while (mapCursor != NULL) { // Move back from the end of the atlas till the current map can be fit in, // then pass the cursor to the routine that actually creates and fills in // the stack map // TR_GCStackMap *nextMapCursor = mapIterator.getNext(); if (!mapsAreIdentical(mapCursor, nextMapCursor, stackAtlas, comp)) { cursorInEncodedAtlas -= sizeOfSingleEncodedMapInBytes; encodeStackMap(mapCursor, cursorInEncodedAtlas, encodeFourByteOffsets, bytesPerStackMap, comp); } mapCursor = nextMapCursor; } return encodedAtlasBaseAddress; }