/** * @return the total instruction length in bytes for setting up arguments */ int32_t TR::S390CallSnippet::instructionCountForArguments(TR::Node * callNode, TR::CodeGenerator * cg) { int32_t intArgNum = 0, floatArgNum = 0, count = 0; TR::Linkage* linkage = cg->getLinkage(callNode->getSymbol()->castToMethodSymbol()->getLinkageConvention()); int32_t argStart = callNode->getFirstArgumentIndex(); for (int32_t i = argStart; i < callNode->getNumChildren(); i++) { TR::Node * child = callNode->getChild(i); switch (child->getDataType()) { case TR::Int8: case TR::Int16: case TR::Int32: if (intArgNum < linkage->getNumIntegerArgumentRegisters()) { count += TR::InstOpCode::getInstructionLength(TR::InstOpCode::ST); } intArgNum++; break; case TR::Address: if (intArgNum < linkage->getNumIntegerArgumentRegisters()) { count += TR::InstOpCode::getInstructionLength(TR::InstOpCode::getLoadOpCode()); } intArgNum++; break; case TR::Int64: if (intArgNum < linkage->getNumIntegerArgumentRegisters()) { count += TR::InstOpCode::getInstructionLength(TR::InstOpCode::getLoadOpCode()); if ((TR::Compiler->target.is32Bit()) && intArgNum < linkage->getNumIntegerArgumentRegisters() - 1) { count += TR::InstOpCode::getInstructionLength(TR::InstOpCode::getLoadOpCode()); } } intArgNum += TR::Compiler->target.is64Bit() ? 1 : 2; break; case TR::Float: if (floatArgNum < linkage->getNumFloatArgumentRegisters()) { count += TR::InstOpCode::getInstructionLength(TR::InstOpCode::LE); } floatArgNum++; break; case TR::Double: if (floatArgNum < linkage->getNumFloatArgumentRegisters()) { count += TR::InstOpCode::getInstructionLength(TR::InstOpCode::LD); } floatArgNum++; break; } } return count; }
int32_t TR::IA32SystemLinkage::buildArgs( TR::Node *callNode, TR::RegisterDependencyConditions *deps) { // Push args in reverse order for a system call // int32_t argSize = 0; int32_t firstArg = callNode->getFirstArgumentIndex(); for (int i = callNode->getNumChildren() - 1; i >= firstArg; i--) { TR::Node *child = callNode->getChild(i); switch (child->getDataType()) { case TR::Int8: case TR::Int16: case TR::Address: case TR::Int32: TR::IA32LinkageUtils::pushIntegerWordArg(child, cg()); argSize += 4; break; case TR::Float: TR::IA32LinkageUtils::pushFloatArg(child,cg()); argSize += 4; break; case TR::Double: TR::IA32LinkageUtils::pushDoubleArg(child, cg()); argSize += 8; break; case TR::Int64: TR::IA32LinkageUtils::pushLongArg(child, cg()); argSize += 8; break; case TR::Aggregate: default: TR_ASSERT(0, "Attempted to push unknown type"); break; } } return argSize; }
// Build arguments for system linkage dispatch. // int32_t TR::AMD64SystemLinkage::buildArgs( TR::Node *callNode, TR::RegisterDependencyConditions *deps) { TR::SymbolReference *methodSymRef = callNode->getSymbolReference(); TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol(); TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg; TR::RealRegister *espReal = machine()->getX86RealRegister(TR::RealRegister::esp); int32_t firstNodeArgument = callNode->getFirstArgumentIndex(); int32_t lastNodeArgument = callNode->getNumChildren() - 1; int32_t offset = 0; int32_t sizeOfOutGoingArgs= 0; uint16_t numIntArgs = 0, numFloatArgs = 0; int32_t first, last, direction; int32_t numCopiedRegs = 0; TR::Register *copiedRegs[TR::X86LinkageProperties::MaxArgumentRegisters]; if (getProperties().passArgsRightToLeft()) { first = lastNodeArgument; last = firstNodeArgument - 1; direction = -1; } else { first = firstNodeArgument; last = lastNodeArgument + 1; direction = 1; } // If the dispatch is indirect we must add the VFT register to the preconditions // so that it gets register assigned with the other preconditions to the call. // if (callNode->getOpCode().isIndirect()) { TR::Node *vftChild = callNode->getFirstChild(); TR_ASSERT(vftChild->getRegister(), "expecting VFT child to be evaluated"); TR::RealRegister::RegNum scratchRegIndex = getProperties().getIntegerScratchRegister(1); deps->addPreCondition(vftChild->getRegister(), scratchRegIndex, cg()); } int32_t i; for (i = first; i != last; i += direction) { TR::parmLayoutResult layoutResult; TR::RealRegister::RegNum rregIndex = noReg; TR::Node *child = callNode->getChild(i); layoutParm(child, sizeOfOutGoingArgs, numIntArgs, numFloatArgs, layoutResult); if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG_PAIR) { // TODO: AMD64 SysV ABI might put a struct into a pair of linkage registerr TR_ASSERT(false, "haven't support linkage_reg_pair yet.\n"); } else if (layoutResult.abstract & TR::parmLayoutResult::IN_LINKAGE_REG) { TR_RegisterKinds regKind = layoutResult.regs[0].regKind; uint32_t regIndex = layoutResult.regs[0].regIndex; TR_ASSERT(regKind == TR_GPR || regKind == TR_FPR, "linkage registers includes TR_GPR and TR_FPR\n"); rregIndex = (regKind == TR_FPR) ? getProperties().getFloatArgumentRegister(regIndex): getProperties().getIntegerArgumentRegister(regIndex); } else { offset = layoutResult.offset; } TR::Register *vreg; vreg = cg()->evaluate(child); bool needsStackOffsetUpdate = false; if (rregIndex != noReg) { // For NULL JNI reference parameters, it is possible that the NULL value will be evaluated into // a different register than the child. In that case it is not necessary to copy the temporary scratch // register across the call. // if ((child->getReferenceCount() > 1) && (vreg == child->getRegister())) { TR::Register *argReg = cg()->allocateRegister(); if (vreg->containsCollectedReference()) argReg->setContainsCollectedReference(); generateRegRegInstruction(TR::Linkage::movOpcodes(RegReg, movType(child->getDataType())), child, argReg, vreg, cg()); vreg = argReg; copiedRegs[numCopiedRegs++] = vreg; } deps->addPreCondition(vreg, rregIndex, cg()); } else { // Ideally, we would like to push rather than move generateMemRegInstruction(TR::Linkage::movOpcodes(MemReg, fullRegisterMovType(vreg)), child, generateX86MemoryReference(espReal, offset, cg()), vreg, cg()); } cg()->decReferenceCount(child); } // Now that we're finished making the preconditions, all the interferences // are established and we can kill these regs. // for (i = 0; i < numCopiedRegs; i++) cg()->stopUsingRegister(copiedRegs[i]); deps->stopAddingPreConditions(); return sizeOfOutGoingArgs; }
int32_t TR::ARM64SystemLinkage::buildArgs(TR::Node *callNode, TR::RegisterDependencyConditions *dependencies) { const TR::ARM64LinkageProperties &properties = getProperties(); TR::ARM64MemoryArgument *pushToMemory = NULL; TR::Register *argMemReg; TR::Register *tempReg; int32_t argIndex = 0; int32_t numMemArgs = 0; int32_t argSize = 0; int32_t numIntegerArgs = 0; int32_t numFloatArgs = 0; int32_t totalSize; int32_t i; TR::Node *child; TR::DataType childType; TR::DataType resType = callNode->getType(); uint32_t firstArgumentChild = callNode->getFirstArgumentIndex(); /* Step 1 - figure out how many arguments are going to be spilled to memory i.e. not in registers */ for (i = firstArgumentChild; i < callNode->getNumChildren(); i++) { child = callNode->getChild(i); childType = child->getDataType(); switch (childType) { case TR::Int8: case TR::Int16: case TR::Int32: case TR::Int64: case TR::Address: if (numIntegerArgs >= properties.getNumIntArgRegs()) numMemArgs++; numIntegerArgs++; break; case TR::Float: case TR::Double: if (numFloatArgs >= properties.getNumFloatArgRegs()) numMemArgs++; numFloatArgs++; break; default: TR_ASSERT(false, "Argument type %s is not supported\n", childType.toString()); } } // From here, down, any new stack allocations will expire / die when the function returns TR::StackMemoryRegion stackMemoryRegion(*trMemory()); /* End result of Step 1 - determined number of memory arguments! */ if (numMemArgs > 0) { pushToMemory = new (trStackMemory()) TR::ARM64MemoryArgument[numMemArgs]; argMemReg = cg()->allocateRegister(); } totalSize = numMemArgs * 8; // align to 16-byte boundary totalSize = (totalSize + 15) & (~15); numIntegerArgs = 0; numFloatArgs = 0; for (i = firstArgumentChild; i < callNode->getNumChildren(); i++) { TR::MemoryReference *mref = NULL; TR::Register *argRegister; TR::InstOpCode::Mnemonic op; child = callNode->getChild(i); childType = child->getDataType(); switch (childType) { case TR::Int8: case TR::Int16: case TR::Int32: case TR::Int64: case TR::Address: if (childType == TR::Address) argRegister = pushAddressArg(child); else if (childType == TR::Int64) argRegister = pushLongArg(child); else argRegister = pushIntegerWordArg(child); if (numIntegerArgs < properties.getNumIntArgRegs()) { if (!cg()->canClobberNodesRegister(child, 0)) { if (argRegister->containsCollectedReference()) tempReg = cg()->allocateCollectedReferenceRegister(); else tempReg = cg()->allocateRegister(); generateMovInstruction(cg(), callNode, tempReg, argRegister); argRegister = tempReg; } if (numIntegerArgs == 0 && (resType.isAddress() || resType.isInt32() || resType.isInt64())) { TR::Register *resultReg; if (resType.isAddress()) resultReg = cg()->allocateCollectedReferenceRegister(); else resultReg = cg()->allocateRegister(); dependencies->addPreCondition(argRegister, TR::RealRegister::x0); dependencies->addPostCondition(resultReg, TR::RealRegister::x0); } else { addDependency(dependencies, argRegister, properties.getIntegerArgumentRegister(numIntegerArgs), TR_GPR, cg()); } } else { // numIntegerArgs >= properties.getNumIntArgRegs() if (childType == TR::Address || childType == TR::Int64) { op = TR::InstOpCode::strpostx; } else { op = TR::InstOpCode::strpostw; } mref = getOutgoingArgumentMemRef(argMemReg, argRegister, op, pushToMemory[argIndex++]); argSize += 8; // always 8-byte aligned } numIntegerArgs++; break; case TR::Float: case TR::Double: if (childType == TR::Float) argRegister = pushFloatArg(child); else argRegister = pushDoubleArg(child); if (numFloatArgs < properties.getNumFloatArgRegs()) { if (!cg()->canClobberNodesRegister(child, 0)) { tempReg = cg()->allocateRegister(TR_FPR); op = (childType == TR::Float) ? TR::InstOpCode::fmovs : TR::InstOpCode::fmovd; generateTrg1Src1Instruction(cg(), op, callNode, tempReg, argRegister); argRegister = tempReg; } if ((numFloatArgs == 0 && resType.isFloatingPoint())) { TR::Register *resultReg; if (resType.getDataType() == TR::Float) resultReg = cg()->allocateSinglePrecisionRegister(); else resultReg = cg()->allocateRegister(TR_FPR); dependencies->addPreCondition(argRegister, TR::RealRegister::v0); dependencies->addPostCondition(resultReg, TR::RealRegister::v0); } else { addDependency(dependencies, argRegister, properties.getFloatArgumentRegister(numFloatArgs), TR_FPR, cg()); } } else { // numFloatArgs >= properties.getNumFloatArgRegs() if (childType == TR::Double) { op = TR::InstOpCode::vstrpostd; } else { op = TR::InstOpCode::vstrposts; } mref = getOutgoingArgumentMemRef(argMemReg, argRegister, op, pushToMemory[argIndex++]); argSize += 8; // always 8-byte aligned } numFloatArgs++; break; } // end of switch } // end of for // NULL deps for non-preserved and non-system regs while (numIntegerArgs < properties.getNumIntArgRegs()) { if (numIntegerArgs == 0 && resType.isAddress()) { dependencies->addPreCondition(cg()->allocateRegister(), properties.getIntegerArgumentRegister(0)); dependencies->addPostCondition(cg()->allocateCollectedReferenceRegister(), properties.getIntegerArgumentRegister(0)); } else { addDependency(dependencies, NULL, properties.getIntegerArgumentRegister(numIntegerArgs), TR_GPR, cg()); } numIntegerArgs++; } int32_t floatRegsUsed = (numFloatArgs > properties.getNumFloatArgRegs()) ? properties.getNumFloatArgRegs() : numFloatArgs; for (i = (TR::RealRegister::RegNum)((uint32_t)TR::RealRegister::v0 + floatRegsUsed); i <= TR::RealRegister::LastFPR; i++) { if (!properties.getPreserved((TR::RealRegister::RegNum)i)) { // NULL dependency for non-preserved regs addDependency(dependencies, NULL, (TR::RealRegister::RegNum)i, TR_FPR, cg()); } } if (numMemArgs > 0) { TR::RealRegister *sp = cg()->machine()->getRealRegister(properties.getStackPointerRegister()); generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, callNode, argMemReg, sp, totalSize); for (argIndex = 0; argIndex < numMemArgs; argIndex++) { TR::Register *aReg = pushToMemory[argIndex].argRegister; generateMemSrc1Instruction(cg(), pushToMemory[argIndex].opCode, callNode, pushToMemory[argIndex].argMemory, aReg); cg()->stopUsingRegister(aReg); } cg()->stopUsingRegister(argMemReg); } return totalSize; }
uint8_t * TR::S390CallSnippet::S390flushArgumentsToStack(uint8_t * buffer, TR::Node * callNode, int32_t argSize, TR::CodeGenerator * cg) { int32_t intArgNum = 0, floatArgNum = 0, offset; TR::Machine *machine = cg->machine(); TR::Linkage * linkage = cg->getLinkage(callNode->getSymbol()->castToMethodSymbol()->getLinkageConvention()); int32_t argStart = callNode->getFirstArgumentIndex(); bool rightToLeft = linkage->getRightToLeft() && //we want the arguments for induceOSR to be passed from left to right as in any other non-helper call !callNode->getSymbolReference()->isOSRInductionHelper(); if (rightToLeft) { offset = linkage->getOffsetToFirstParm(); } else { offset = argSize + linkage->getOffsetToFirstParm(); } for (int32_t i = argStart; i < callNode->getNumChildren(); i++) { TR::Node * child = callNode->getChild(i); switch (child->getDataType()) { case TR::Int8: case TR::Int16: case TR::Int32: if (!rightToLeft) { offset -= TR::Compiler->target.is64Bit() ? 8 : 4; } if (intArgNum < linkage->getNumIntegerArgumentRegisters()) { buffer = storeArgumentItem(TR::InstOpCode::ST, buffer, machine->getRealRegister(linkage->getIntegerArgumentRegister(intArgNum)), offset, cg); } intArgNum++; if (rightToLeft) { offset += TR::Compiler->target.is64Bit() ? 8 : 4; } break; case TR::Address: if (!rightToLeft) { offset -= TR::Compiler->target.is64Bit() ? 8 : 4; } if (intArgNum < linkage->getNumIntegerArgumentRegisters()) { buffer = storeArgumentItem(TR::InstOpCode::getStoreOpCode(), buffer, machine->getRealRegister(linkage->getIntegerArgumentRegister(intArgNum)), offset, cg); } intArgNum++; if (rightToLeft) { offset += TR::Compiler->target.is64Bit() ? 8 : 4; } break; case TR::Int64: if (!rightToLeft) { offset -= (TR::Compiler->target.is64Bit() ? 16 : 8); } if (intArgNum < linkage->getNumIntegerArgumentRegisters()) { if (TR::Compiler->target.is64Bit()) { buffer = storeArgumentItem(TR::InstOpCode::STG, buffer, machine->getRealRegister(linkage->getIntegerArgumentRegister(intArgNum)), offset, cg); } else { buffer = storeArgumentItem(TR::InstOpCode::ST, buffer, machine->getRealRegister(linkage->getIntegerArgumentRegister(intArgNum)), offset, cg); if (intArgNum < linkage->getNumIntegerArgumentRegisters() - 1) { buffer = storeArgumentItem(TR::InstOpCode::ST, buffer, machine->getRealRegister(linkage->getIntegerArgumentRegister(intArgNum + 1)), offset + 4, cg); } } } intArgNum += TR::Compiler->target.is64Bit() ? 1 : 2; if (rightToLeft) { offset += TR::Compiler->target.is64Bit() ? 16 : 8; } break; case TR::Float: if (!rightToLeft) { offset -= TR::Compiler->target.is64Bit() ? 8 : 4; } if (floatArgNum < linkage->getNumFloatArgumentRegisters()) { buffer = storeArgumentItem(TR::InstOpCode::STE, buffer, machine->getRealRegister(linkage->getFloatArgumentRegister(floatArgNum)), offset, cg); } floatArgNum++; if (rightToLeft) { offset += TR::Compiler->target.is64Bit() ? 8 : 4; } break; case TR::Double: if (!rightToLeft) { offset -= TR::Compiler->target.is64Bit() ? 16 : 8; } if (floatArgNum < linkage->getNumFloatArgumentRegisters()) { buffer = storeArgumentItem(TR::InstOpCode::STD, buffer, machine->getRealRegister(linkage->getFloatArgumentRegister(floatArgNum)), offset, cg); } floatArgNum++; if (rightToLeft) { offset += TR::Compiler->target.is64Bit() ? 16 : 8; } break; } } return buffer; }
void TR_Debug::print(TR::FILE *pOutFile, TR::S390CallSnippet * snippet) { uint8_t * bufferPos = snippet->getSnippetLabel()->getCodeLocation(); TR::Node * callNode = snippet->getNode(); TR::SymbolReference * methodSymRef = snippet->getRealMethodSymbolReference(); if(!methodSymRef) methodSymRef = callNode->getSymbolReference(); TR::MethodSymbol * methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol(); TR::SymbolReference * glueRef; int8_t padbytes = snippet->getPadBytes(); printSnippetLabel(pOutFile, snippet->getSnippetLabel(), bufferPos, methodSymRef->isUnresolved() ? "Unresolved Call Snippet" : "Call Snippet"); bufferPos = printS390ArgumentsFlush(pOutFile, callNode, bufferPos, snippet->getSizeOfArguments()); if (methodSymRef->isUnresolved() || _comp->compileRelocatableCode()) { if (methodSymbol->isSpecial()) { glueRef = _cg->getSymRef(TR_S390interpreterUnresolvedSpecialGlue); } else if (methodSymbol->isStatic()) { glueRef = _cg->getSymRef(TR_S390interpreterUnresolvedStaticGlue); } else { glueRef = _cg->getSymRef(TR_S390interpreterUnresolvedDirectVirtualGlue); } } else { bool synchronised = methodSymbol->isSynchronised(); if ((methodSymbol->isVMInternalNative() || methodSymbol->isJITInternalNative())) { glueRef = _cg->getSymRef(TR_S390nativeStaticHelper); } else { switch (callNode->getDataType()) { case TR::NoType: if (synchronised) { glueRef = _cg->getSymRef(TR_S390interpreterSyncVoidStaticGlue); } else { glueRef = _cg->getSymRef(TR_S390interpreterVoidStaticGlue); } break; case TR::Int8: case TR::Int16: case TR::Int32: if (synchronised) { glueRef = _cg->getSymRef(TR_S390interpreterSyncIntStaticGlue); } else { glueRef = _cg->getSymRef(TR_S390interpreterIntStaticGlue); } break; case TR::Address: if (TR::Compiler->target.is64Bit()) { if (synchronised) { glueRef = _cg->getSymRef(TR_S390interpreterSyncLongStaticGlue); } else { glueRef = _cg->getSymRef(TR_S390interpreterLongStaticGlue); } } else { if (synchronised) { glueRef = _cg->getSymRef(TR_S390interpreterSyncIntStaticGlue); } else { glueRef = _cg->getSymRef(TR_S390interpreterIntStaticGlue); } } break; case TR::Int64: if (synchronised) { glueRef = _cg->getSymRef(TR_S390interpreterSyncLongStaticGlue); } else { glueRef = _cg->getSymRef(TR_S390interpreterLongStaticGlue); } break; case TR::Float: if (synchronised) { glueRef = _cg->getSymRef(TR_S390interpreterSyncFloatStaticGlue); } else { glueRef = _cg->getSymRef(TR_S390interpreterFloatStaticGlue); } break; case TR::Double: if (synchronised) { glueRef = _cg->getSymRef(TR_S390interpreterSyncDoubleStaticGlue); } else { glueRef = _cg->getSymRef(TR_S390interpreterDoubleStaticGlue); } break; default: TR_ASSERT(0, "Bad return data type for a call node. DataType was %s\n", getName(callNode->getDataType())); } } } bufferPos = printRuntimeInstrumentationOnOffInstruction(pOutFile, bufferPos, false); // RIOFF if (snippet->getKind() == TR::Snippet::IsUnresolvedCall) { int lengthOfLoad = (TR::Compiler->target.is64Bit())?6:4; printPrefix(pOutFile, NULL, bufferPos, 6); trfprintf(pOutFile, "LARL \tGPR14, *+%d <%p>\t# Start of Data Const.", 8 + lengthOfLoad + padbytes, bufferPos + 8 + lengthOfLoad + padbytes); bufferPos += 6; if (TR::Compiler->target.is64Bit()) { printPrefix(pOutFile, NULL, bufferPos, 6); trfprintf(pOutFile, "LG \tGPR_EP, 0(,GPR14)"); bufferPos += 6; } else { printPrefix(pOutFile, NULL, bufferPos, 4); trfprintf(pOutFile, "L \tGPR_EP, 0(,GPR14)"); bufferPos += 4; } printPrefix(pOutFile, NULL, bufferPos, 2); trfprintf(pOutFile, "BCR \tGPR_EP"); bufferPos += 2; } else { printPrefix(pOutFile, NULL, bufferPos, 6); trfprintf(pOutFile, "BRASL \tGPR14, <%p>\t# Branch to Helper Method %s", snippet->getSnippetDestAddr(), snippet->usedTrampoline()?"- Trampoline Used.":""); bufferPos += 6; } if (padbytes == 2) { printPrefix(pOutFile, NULL, bufferPos, 2); trfprintf(pOutFile, "DC \t0x0000 \t\t\t# 2-bytes padding for alignment"); bufferPos += 2; } else if (padbytes == 4) { printPrefix(pOutFile, NULL, bufferPos, 4) ; trfprintf(pOutFile, "DC \t0x00000000 \t\t# 4-bytes padding for alignment"); bufferPos += 4; } else if (padbytes == 6) { printPrefix(pOutFile, NULL, bufferPos, 6) ; trfprintf(pOutFile, "DC \t0x000000000000 \t\t# 6-bytes padding for alignment"); bufferPos += 6; } printPrefix(pOutFile, NULL, bufferPos, sizeof(intptrj_t)); trfprintf(pOutFile, "DC \t%p \t\t# Method Address", glueRef->getMethodAddress()); bufferPos += sizeof(intptrj_t); printPrefix(pOutFile, NULL, bufferPos, sizeof(intptrj_t)); trfprintf(pOutFile, "DC \t%p \t\t# Call Site RA", snippet->getCallRA()); bufferPos += sizeof(intptrj_t); if (methodSymRef->isUnresolved()) { printPrefix(pOutFile, NULL, bufferPos, 0); } else { printPrefix(pOutFile, NULL, bufferPos, sizeof(intptrj_t)); } trfprintf(pOutFile, "DC \t%p \t\t# Method Pointer", methodSymRef->isUnresolved() ? 0 : methodSymbol->getMethodAddress()); }
// Checks for syntactic equivalence and returns the side-table index // of the syntactically equivalent node if it found one; else it returns // -1 signifying that this is the first time any node similar syntactically // to this node has been seen. Adds the node to the hash table if seen for the // first time. // // int TR_LocalAnalysisInfo::hasOldExpressionOnRhs(TR::Node *node, bool recalcContainsCall, bool storeLhsContainsCall) { // // Get the relevant portion of the subtree // for this node; this is different for a null check // as its null check reference is the only // sub-expression that matters // TR::Node *relevantSubtree = NULL; if (node->getOpCodeValue() == TR::NULLCHK) relevantSubtree = node->getNullCheckReference(); else relevantSubtree = node; // containsCall checks whether the relevant node has some // sub-expression that cannot be commoned, e.g. call or a new // bool nodeContainsCall; if (!recalcContainsCall && (relevantSubtree == node)) { // can use pre-calculated value of containsCall and storeLhsContainsCall, to avoid visitCount overflow nodeContainsCall = node->containsCall(); } else { storeLhsContainsCall = false; nodeContainsCall = containsCall(relevantSubtree, storeLhsContainsCall); } if (nodeContainsCall) { // // If the node is not a store, a call-like sub-expression is inadmissable; // if the node is a store, a call-like sub-expression is allowed on the RHS // of the store as this does not inhibit privatization in any way as // the private temp store's RHS simply points at original RHS. But if a call-like // sub-expression is present in the LHS of the store, that is inadmissable // if (!node->getOpCode().isStore() || storeLhsContainsCall) return 0; } bool seenIndirectStore = false; #ifdef J9_PROJECT_SPECIFIC bool seenIndirectBCDStore = false; #endif bool seenWriteBarrier = false; int32_t storeNumChildren = node->getNumChildren(); // If node is a null check, compare the // null check reference only to establish syntactic equivalence // if (node->getOpCodeValue() == TR::NULLCHK) /////if (node->getOpCode().isNullCheck()) { int32_t k; for (k=0;k<_numNullChecks;k++) { if (!(_nullCheckNodesAsArray[k] == NULL)) { if (areSyntacticallyEquivalent(_nullCheckNodesAsArray[k]->getNullCheckReference(), node->getNullCheckReference())) return _nullCheckNodesAsArray[k]->getLocalIndex(); } } _nullCheckNodesAsArray[_numNullChecks++] = node; } else { // // If this node is a global store, then equivalence check is different. // We try to give a store to field (or static) o.f the same index as // a load of o.f. This is so that privatization happens for fields/statics. // So the store's opcode value is changed temporarily to be a load before // syntactic equivalence is checked; this enables matching stores/loads to // same global symbol. // if (node->getOpCode().isStore() && !node->getSymbolReference()->getSymbol()->isAutoOrParm()) { if (node->getOpCode().isWrtBar()) seenWriteBarrier = true; #ifdef J9_PROJECT_SPECIFIC seenIndirectBCDStore = node->getType().isBCD(); #endif if (node->getOpCode().isStoreIndirect()) { if (seenWriteBarrier) { TR::Node::recreate(node, _compilation->il.opCodeForIndirectArrayLoad(node->getDataType())); } else { TR::Node::recreate(node, _compilation->il.opCodeForCorrespondingIndirectStore(node->getOpCodeValue())); } node->setNumChildren(1); } else { TR::Node::recreate(node, _compilation->il.opCodeForDirectLoad(node->getDataType())); node->setNumChildren(0); } #ifdef J9_PROJECT_SPECIFIC if (seenIndirectBCDStore) node->setBCDStoreIsTemporarilyALoad(true); #endif seenIndirectStore = true; } int32_t hashValue = _hashTable->hash(node); HashTable::Cursor cursor(_hashTable, hashValue); TR::Node *other; for (other = cursor.firstNode(); other; other = cursor.nextNode()) { // Convert other node's opcode to be a load temporarily // (only for syntactic equivalence check; see explanation above) // to enable matching global stores/loads. // bool seenOtherIndirectStore = false; #ifdef J9_PROJECT_SPECIFIC bool seenOtherIndirectBCDStore = false; #endif bool seenOtherWriteBarrier = false; int32_t otherStoreNumChildren = other->getNumChildren(); if (other->getOpCode().isStore() && !other->getSymbolReference()->getSymbol()->isAutoOrParm()) { if (other->getOpCode().isWrtBar()) seenOtherWriteBarrier = true; #ifdef J9_PROJECT_SPECIFIC seenOtherIndirectBCDStore = other->getType().isBCD(); #endif if (other->getOpCode().isStoreIndirect()) { if (seenOtherWriteBarrier) { TR::Node::recreate(other, _compilation->il.opCodeForIndirectArrayLoad(other->getDataType())); } else { TR::Node::recreate(other, _compilation->il.opCodeForCorrespondingIndirectStore(other->getOpCodeValue())); } other->setNumChildren(1); } else { TR::Node::recreate(other, _compilation->il.opCodeForDirectLoad(other->getDataType())); other->setNumChildren(0); } #ifdef J9_PROJECT_SPECIFIC if (seenOtherIndirectBCDStore) other->setBCDStoreIsTemporarilyALoad(true); #endif seenOtherIndirectStore = true; } bool areSame = areSyntacticallyEquivalent(node, other); // Restore the other node's state to what it was originally // (if it was a global store) // if (seenOtherWriteBarrier) { other->setNumChildren(otherStoreNumChildren); if (otherStoreNumChildren == 3) TR::Node::recreate(other, TR::awrtbari); else TR::Node::recreate(other, TR::awrtbar); } else if (seenOtherIndirectStore) { other->setNumChildren(otherStoreNumChildren); #ifdef J9_PROJECT_SPECIFIC if (seenOtherIndirectBCDStore) other->setBCDStoreIsTemporarilyALoad(false); #endif if (other->getOpCode().isIndirect()) TR::Node::recreate(other, _compilation->il.opCodeForCorrespondingIndirectLoad(other->getOpCodeValue())); else TR::Node::recreate(other, _compilation->il.opCodeForDirectStore(other->getDataType())); } if (areSame) { if (seenWriteBarrier) { node->setNumChildren(storeNumChildren); if (storeNumChildren == 3) TR::Node::recreate(node, TR::awrtbari); else TR::Node::recreate(node, TR::awrtbar); } else if (seenIndirectStore) { node->setNumChildren(storeNumChildren); #ifdef J9_PROJECT_SPECIFIC if (seenIndirectBCDStore) node->setBCDStoreIsTemporarilyALoad(false); #endif if (node->getOpCode().isIndirect()) TR::Node::recreate(node, _compilation->il.opCodeForCorrespondingIndirectLoad(node->getOpCodeValue())); else TR::Node::recreate(node, _compilation->il.opCodeForDirectStore(node->getDataType())); } return other->getLocalIndex(); } } // No match from existing nodes in the hash table; // add this node to the hash table. // _hashTable->add(node, hashValue); } // Restore this node's state to what it was before // (if it was a global store) // if (seenWriteBarrier) { node->setNumChildren(storeNumChildren); if (storeNumChildren == 3) TR::Node::recreate(node, TR::awrtbari); else TR::Node::recreate(node, TR::awrtbar); } else if (seenIndirectStore) { node->setNumChildren(storeNumChildren); #ifdef J9_PROJECT_SPECIFIC if (seenIndirectBCDStore) node->setBCDStoreIsTemporarilyALoad(false); #endif if (node->getOpCode().isIndirect()) TR::Node::recreate(node, _compilation->il.opCodeForCorrespondingIndirectLoad(node->getOpCodeValue())); else TR::Node::recreate(node, _compilation->il.opCodeForDirectStore(node->getDataType())); } return -1; }
TR::Register* OMR::X86::TreeEvaluator::SIMDgetvelemEvaluator(TR::Node* node, TR::CodeGenerator* cg) { TR::Node* firstChild = node->getChild(0); TR::Node* secondChild = node->getChild(1); TR::Register* srcVectorReg = cg->evaluate(firstChild); TR::Register* resReg = 0; TR::Register* lowResReg = 0; TR::Register* highResReg = 0; int32_t elementCount = -1; switch (firstChild->getDataType()) { case TR::VectorInt8: case TR::VectorInt16: TR_ASSERT(false, "unsupported vector type %s in SIMDgetvelemEvaluator.\n", firstChild->getDataType().toString()); break; case TR::VectorInt32: elementCount = 4; resReg = cg->allocateRegister(); break; case TR::VectorInt64: elementCount = 2; if (TR::Compiler->target.is32Bit()) { lowResReg = cg->allocateRegister(); highResReg = cg->allocateRegister(); resReg = cg->allocateRegisterPair(lowResReg, highResReg); } else { resReg = cg->allocateRegister(); } break; case TR::VectorFloat: elementCount = 4; resReg = cg->allocateSinglePrecisionRegister(TR_FPR); break; case TR::VectorDouble: elementCount = 2; resReg = cg->allocateRegister(TR_FPR); break; default: TR_ASSERT(false, "unrecognized vector type %s in SIMDgetvelemEvaluator.\n", firstChild->getDataType().toString()); } if (secondChild->getOpCode().isLoadConst()) { int32_t elem = secondChild->getInt(); TR_ASSERT(elem >= 0 && elem < elementCount, "Element can only be 0 to %u\n", elementCount - 1); uint8_t shufconst = 0x00; TR::Register* dstReg = 0; if (4 == elementCount) { /* * if elem = 0, access the most significant 32 bits (set shufconst to 0x03) * if elem = 1, access the second most significant 32 bits (set shufconst to 0x02) * if elem = 2, access the third most significant 32 bits (set shufconst to 0x01) * if elem = 3, access the least significant 32 bits (set shufconst to 0x00) */ shufconst = (uint8_t)((3 - elem) & 0x03); /* * the value to be read (indicated by shufconst) from srcVectorReg is splatted into all 4 slots in the dstReg * this puts the value we want in the least significant bits and the other bits should never be read. * for float, dstReg and resReg are the same because PSHUFD can work directly with TR_FPR registers * for Int32, the result needs to be moved from the dstReg to a TR_GPR resReg. */ if (TR::VectorInt32 == firstChild->getDataType()) { dstReg = cg->allocateRegister(TR_VRF); } else //TR::VectorFloat == firstChild->getDataType() { dstReg = resReg; } /* * if elem = 3, the value we want is already in the least significant 32 bits * as a result, a mov instruction is good enough and splatting the value is unnecessary */ if (3 == elem) { generateRegRegInstruction(MOVDQURegReg, node, dstReg, srcVectorReg, cg); } else { generateRegRegImmInstruction(PSHUFDRegRegImm1, node, dstReg, srcVectorReg, shufconst, cg); } if (TR::VectorInt32 == firstChild->getDataType()) { generateRegRegInstruction(MOVDReg4Reg, node, resReg, dstReg, cg); cg->stopUsingRegister(dstReg); } } else //2 == elementCount { /* * for double, dstReg and resReg are the same because PSHUFD can work directly with TR_FPR registers * for Int64, the result needs to be moved from the dstReg to a TR_GPR resReg. */ if (TR::VectorInt64 == firstChild->getDataType()) { dstReg = cg->allocateRegister(TR_VRF); } else //TR::VectorDouble == firstChild->getDataType() { dstReg = resReg; } /* * the value to be read needs to be in the least significant 64 bits. * if elem = 0, the value we want is in the most significant 64 bits and needs to be splatted into * the least significant 64 bits (the other bits affected by the splat are never read) * if elem = 1, the value we want is already in the least significant 64 bits * as a result, a mov instruction is good enough and splatting the value is unnecessary */ if (1 == elem) { generateRegRegInstruction(MOVDQURegReg, node, dstReg, srcVectorReg, cg); } else //0 == elem { generateRegRegImmInstruction(PSHUFDRegRegImm1, node, dstReg, srcVectorReg, 0x0e, cg); } if (TR::VectorInt64 == firstChild->getDataType()) { if (TR::Compiler->target.is32Bit()) { generateRegRegInstruction(MOVDReg4Reg, node, lowResReg, dstReg, cg); generateRegRegImmInstruction(PSHUFDRegRegImm1, node, dstReg, srcVectorReg, (0 == elem) ? 0x03 : 0x01, cg); generateRegRegInstruction(MOVDReg4Reg, node, highResReg, dstReg, cg); } else { generateRegRegInstruction(MOVQReg8Reg, node, resReg, dstReg, cg); } cg->stopUsingRegister(dstReg); } } } else { //TODO: handle non-constant second child case TR_ASSERT(false, "non-const second child not currently supported in SIMDgetvelemEvaluator.\n"); } node->setRegister(resReg); cg->decReferenceCount(firstChild); cg->decReferenceCount(secondChild); return resReg; }