void CodeGen::genFloatMath(GenTree *tree, RegSet::RegisterPreference *pref) { assert(tree->OperGet() == GT_INTRINSIC); GenTreePtr op1 = tree->gtOp.gtOp1; // get tree into a register genCodeForTreeFloat(op1, pref); instruction ins; switch (tree->gtIntrinsic.gtIntrinsicId) { case CORINFO_INTRINSIC_Sin: ins = INS_invalid; break; case CORINFO_INTRINSIC_Cos: ins = INS_invalid; break; case CORINFO_INTRINSIC_Sqrt: ins = INS_vsqrt; break; case CORINFO_INTRINSIC_Abs: ins = INS_vabs; break; case CORINFO_INTRINSIC_Round: { regNumber reg = regSet.PickRegFloat(tree->TypeGet(), pref); genMarkTreeInReg(tree, reg); // convert it to a long and back inst_RV_RV(ins_FloatConv(TYP_LONG,tree->TypeGet()), reg, op1->gtRegNum, tree->TypeGet()); inst_RV_RV(ins_FloatConv(tree->TypeGet(), TYP_LONG), reg, reg); genCodeForTreeFloat_DONE(tree, op1->gtRegNum); return; } break; default: unreached(); } if (ins != INS_invalid) { regNumber reg = regSet.PickRegFloat(tree->TypeGet(), pref); genMarkTreeInReg(tree, reg); inst_RV_RV(ins, reg, op1->gtRegNum, tree->TypeGet()); // mark register that holds tree genCodeForTreeFloat_DONE(tree, reg); } else { unreached(); // If unreached is removed, mark register that holds tree // genCodeForTreeFloat_DONE(tree, op1->gtRegNum); } return; }
void Rationalizer::RewriteAssignment(LIR::Use& use) { assert(use.IsInitialized()); GenTreeOp* assignment = use.Def()->AsOp(); assert(assignment->OperGet() == GT_ASG); GenTree* location = assignment->gtGetOp1(); GenTree* value = assignment->gtGetOp2(); genTreeOps locationOp = location->OperGet(); switch (locationOp) { case GT_LCL_VAR: case GT_LCL_FLD: case GT_REG_VAR: case GT_PHI_ARG: RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp); BlockRange().Remove(location); break; case GT_IND: { GenTreeStoreInd* store = new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value); copyFlags(store, assignment, GTF_ALL_EFFECT); copyFlags(store, location, GTF_IND_FLAGS); if (assignment->IsReverseOp()) { store->gtFlags |= GTF_REVERSE_OPS; } // TODO: JIT dump // Remove the GT_IND node and replace the assignment node with the store BlockRange().Remove(location); BlockRange().InsertBefore(assignment, store); use.ReplaceWith(comp, store); BlockRange().Remove(assignment); } break; case GT_CLS_VAR: { location->SetOper(GT_CLS_VAR_ADDR); location->gtType = TYP_BYREF; assignment->SetOper(GT_STOREIND); // TODO: JIT dump } break; default: unreached(); break; } }
// return op that is the store equivalent of the given load opcode genTreeOps storeForm(genTreeOps loadForm) { switch (loadForm) { case GT_LCL_VAR: return GT_STORE_LCL_VAR; case GT_LCL_FLD: return GT_STORE_LCL_FLD; case GT_REG_VAR: noway_assert(!"reg vars only supported in classic backend\n"); unreached(); default: noway_assert(!"not a data load opcode\n"); unreached(); } }
void LegacyPolicy::SetFailure(InlineObservation obs) { // Expect a valid observation assert(InlIsValidObservation(obs)); switch (m_Decision) { case InlineDecision::FAILURE: // Repeated failure only ok if evaluating a prejit root // (since we can't fail fast because we're not inlining) // or if inlining and the observation is CALLSITE_TOO_MANY_LOCALS // (since we can't fail fast from lvaGrabTemp). assert(m_IsPrejitRoot || (obs == InlineObservation::CALLSITE_TOO_MANY_LOCALS)); break; case InlineDecision::UNDECIDED: case InlineDecision::CANDIDATE: m_Decision = InlineDecision::FAILURE; m_Observation = obs; break; default: // SUCCESS, NEVER, or ?? assert(!"Unexpected m_Decision"); unreached(); } }
// return op that is the load equivalent of the given addr opcode genTreeOps loadForm(genTreeOps addrForm) { switch (addrForm) { case GT_LCL_VAR_ADDR: return GT_LCL_VAR; case GT_LCL_FLD_ADDR: return GT_LCL_FLD; default: noway_assert(!"not a local address opcode\n"); unreached(); } }
//-------------------------------------------------------------------------------------------------- // ToGenTree - Convert an "expression" into a gentree node. // // Arguments: // comp Compiler instance to allocate trees // // Return Values: // Returns the gen tree representation for either a constant or a variable or an arrLen operation // defined by the "type" member // GenTreePtr LC_Expr::ToGenTree(Compiler* comp) { // Convert to GenTree nodes. switch (type) { case Ident: return ident.ToGenTree(comp); default: assert(!"Could not convert LC_Expr to GenTree"); unreached(); break; } }
// LIR helpers void BasicBlock::MakeLIR(GenTree* firstNode, GenTree* lastNode) { #ifdef LEGACY_BACKEND unreached(); #else // !LEGACY_BACKEND assert(!IsLIR()); assert((firstNode == nullptr) == (lastNode == nullptr)); assert((firstNode == lastNode) || firstNode->Precedes(lastNode)); m_firstNode = firstNode; m_lastNode = lastNode; bbFlags |= BBF_IS_LIR; #endif // LEGACY_BACKEND }
CorInfoInline InlGetCorInfoInlineDecision(InlineDecision d) { switch (d) { case InlineDecision::SUCCESS: return INLINE_PASS; case InlineDecision::FAILURE: return INLINE_FAIL; case InlineDecision::NEVER: return INLINE_NEVER; default: assert(!"Unexpected InlineDecision"); unreached(); } }
bool InlDecisionIsDecided(InlineDecision d) { switch (d) { case InlineDecision::NEVER: case InlineDecision::FAILURE: case InlineDecision::SUCCESS: return true; case InlineDecision::UNDECIDED: case InlineDecision::CANDIDATE: return false; default: assert(!"Unexpected InlineDecision"); unreached(); } }
//------------------------------------------------------------------------ // genAVX2Intrinsic: Generates the code for an AVX2 hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; var_types baseType = node->gtSIMDBaseType; instruction ins = INS_invalid; genConsumeOperands(node); switch (intrinsicID) { default: unreached(); break; } genProduceReg(node); }
const char* InlGetDecisionString(InlineDecision d) { switch (d) { case InlineDecision::SUCCESS: return "success"; case InlineDecision::FAILURE: return "failed this call site"; case InlineDecision::NEVER: return "failed this callee"; case InlineDecision::CANDIDATE: return "candidate"; case InlineDecision::UNDECIDED: return "undecided"; default: assert(!"Unexpected InlineDecision"); unreached(); } }
// Rewrite GT_OBJ of SIMD Vector as GT_IND(GT_LEA(obj.op1)) of a SIMD type. // // Arguments: // ppTree - A pointer-to-a-pointer for the GT_OBJ // fgWalkData - A pointer to tree walk data providing the context // // Return Value: // None. // // TODO-Cleanup: Once SIMD types are plumbed through the frontend, this will no longer // be required. // void Rationalizer::RewriteObj(LIR::Use& use) { #ifdef FEATURE_SIMD GenTreeObj* obj = use.Def()->AsObj(); // For UNIX struct passing, we can have Obj nodes for arguments. // For other cases, we should never see a non-SIMD type here. #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (!varTypeIsSIMD(obj)) { return; } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // Should come here only if featureSIMD is enabled noway_assert(comp->featureSIMD); // We should only call this with a SIMD type. noway_assert(varTypeIsSIMD(obj)); var_types simdType = obj->TypeGet(); // If the operand of obj is a GT_ADDR(GT_LCL_VAR) and LclVar is known to be a SIMD type, // replace obj by GT_LCL_VAR. GenTree* srcAddr = obj->gtGetOp1(); if (srcAddr->OperIsLocalAddr() && comp->isAddrOfSIMDType(srcAddr)) { BlockRange().Remove(obj); srcAddr->SetOper(loadForm(srcAddr->OperGet())); srcAddr->gtType = simdType; use.ReplaceWith(comp, srcAddr); } else { obj->SetOper(GT_IND); obj->gtType = simdType; } #else // we should never reach without feature SIMD assert(!"Unexpected obj during rationalization\n"); unreached(); #endif }
/*----------------------------------------------------------------------------- * ppu_finish_dt * * Finishes processing for a PPU data transfer command after the SPU command * that handles the other end has completed. *---------------------------------------------------------------------------*/ void ppu_finish_dt(PPU_DT_PARAMS *cmd) { switch (cmd->type) { // case PPU_CMD_DT_IN_FRONT: case PPU_CMD_DT_IN_BACK: ppu_finish_dt_in_back(cmd); break; case PPU_CMD_DT_OUT_FRONT: ppu_finish_dt_out_front(cmd); break; // case PPU_CMD_DT_OUT_BACK: default: unreached(); } }
//-------------------------------------------------------------------------------------------------- // ToGenTree - Convert an "identifier" into a gentree node. // // Arguments: // comp Compiler instance to allocate trees // // Return Values: // Returns the gen tree representation for either a constant or a variable or an arrLen operation // defined by the "type" member // GenTreePtr LC_Ident::ToGenTree(Compiler* comp) { // Convert to GenTree nodes. switch (type) { case Const: assert(constant <= INT32_MAX); return comp->gtNewIconNode(constant); case Var: return comp->gtNewLclvNode(constant, comp->lvaTable[constant].lvType); case ArrLen: return arrLen.ToGenTree(comp); case Null: return comp->gtNewIconNode(0, TYP_REF); default: assert(!"Could not convert LC_Ident to GenTree"); unreached(); break; } }
//------------------------------------------------------------------------ // genSSE42Intrinsic: Generates the code for an SSE4.2 hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; assert(targetReg != REG_NA); var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = op1->gtRegNum; regNumber op2Reg = op2->gtRegNum; genConsumeOperands(node); switch (intrinsicID) { case NI_SSE42_Crc32: if (op1Reg != targetReg) { inst_RV_RV(INS_mov, targetReg, op1Reg, targetType, emitTypeSize(targetType)); } if (baseType == TYP_UBYTE || baseType == TYP_USHORT) // baseType is the type of the second argument { assert(targetType == TYP_INT); inst_RV_RV(INS_crc32, targetReg, op2Reg, baseType, emitTypeSize(baseType)); } else { assert(op1->TypeGet() == op2->TypeGet()); assert(targetType == TYP_INT || targetType == TYP_LONG); inst_RV_RV(INS_crc32, targetReg, op2Reg, targetType, emitTypeSize(targetType)); } break; default: unreached(); break; } genProduceReg(node); }
void LegacyPolicy::SetNever(InlineObservation obs) { // Expect a valid observation assert(InlIsValidObservation(obs)); switch (m_Decision) { case InlineDecision::NEVER: // Repeated never only ok if evaluating a prejit root assert(m_IsPrejitRoot); break; case InlineDecision::UNDECIDED: case InlineDecision::CANDIDATE: m_Decision = InlineDecision::NEVER; m_Observation = obs; break; default: // SUCCESS, FAILURE or ?? assert(!"Unexpected m_Decision"); unreached(); } }
//------------------------------------------------------------------------ // genAVXIntrinsic: Generates the code for an AVX hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; var_types baseType = node->gtSIMDBaseType; emitAttr attr = EA_ATTR(node->gtSIMDSize); var_types targetType = node->TypeGet(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; emitter* emit = getEmitter(); genConsumeOperands(node); switch (intrinsicID) { case NI_AVX_SetZeroVector256: { assert(op1 == nullptr); assert(op2 == nullptr); // SetZeroVector256 will generate pxor with integral base-typ, but pxor is a AVX2 instruction, so we // generate xorps on AVX machines. if (!compiler->compSupports(InstructionSet_AVX2) && varTypeIsIntegral(baseType)) { emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg); } else { emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg); } break; } case NI_AVX_TestC: { emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum); emit->emitIns_R(INS_setb, EA_1BYTE, targetReg); break; } case NI_AVX_TestNotZAndNotC: { emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); break; } case NI_AVX_TestZ: { emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum); emit->emitIns_R(INS_sete, EA_1BYTE, targetReg); break; } default: unreached(); break; } genProduceReg(node); }
void Rationalizer::RewriteAssignment(LIR::Use& use) { assert(use.IsInitialized()); GenTreeOp* assignment = use.Def()->AsOp(); assert(assignment->OperGet() == GT_ASG); GenTree* location = assignment->gtGetOp1(); GenTree* value = assignment->gtGetOp2(); genTreeOps locationOp = location->OperGet(); if (assignment->OperIsBlkOp()) { #ifdef FEATURE_SIMD if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp()) { if (location->OperGet() == GT_LCL_VAR) { var_types simdType = location->TypeGet(); GenTree* initVal = assignment->gtOp.gtOp2; var_types baseType = comp->getBaseTypeOfSIMDLocal(location); if (baseType != TYP_UNKNOWN) { GenTreeSIMD* simdTree = new (comp, GT_SIMD) GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType)); assignment->gtOp.gtOp2 = simdTree; value = simdTree; initVal->gtNext = simdTree; simdTree->gtPrev = initVal; simdTree->gtNext = location; location->gtPrev = simdTree; } } } #endif // FEATURE_SIMD if ((location->TypeGet() == TYP_STRUCT) && !assignment->IsPhiDefn() && !value->IsMultiRegCall()) { if ((location->OperGet() == GT_LCL_VAR)) { // We need to construct a block node for the location. // Modify lcl to be the address form. location->SetOper(addrForm(locationOp)); LclVarDsc* varDsc = &(comp->lvaTable[location->AsLclVarCommon()->gtLclNum]); location->gtType = TYP_BYREF; GenTreeBlk* storeBlk = nullptr; unsigned int size = varDsc->lvExactSize; if (varDsc->lvStructGcCount != 0) { CORINFO_CLASS_HANDLE structHnd = varDsc->lvVerTypeInfo.GetClassHandle(); GenTreeObj* objNode = comp->gtNewObjNode(structHnd, location)->AsObj(); unsigned int slots = (unsigned)(roundUp(size, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE); objNode->SetGCInfo(varDsc->lvGcLayout, varDsc->lvStructGcCount, slots); objNode->ChangeOper(GT_STORE_OBJ); objNode->SetData(value); comp->fgMorphUnsafeBlk(objNode); storeBlk = objNode; } else { storeBlk = new (comp, GT_STORE_BLK) GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, location, value, size); } storeBlk->gtFlags |= (GTF_REVERSE_OPS | GTF_ASG); storeBlk->gtFlags |= ((location->gtFlags | value->gtFlags) & GTF_ALL_EFFECT); GenTree* insertionPoint = location->gtNext; BlockRange().InsertBefore(insertionPoint, storeBlk); use.ReplaceWith(comp, storeBlk); BlockRange().Remove(assignment); JITDUMP("After transforming local struct assignment into a block op:\n"); DISPTREERANGE(BlockRange(), use.Def()); JITDUMP("\n"); return; } else { assert(location->OperIsBlk()); } } } switch (locationOp) { case GT_LCL_VAR: case GT_LCL_FLD: case GT_REG_VAR: case GT_PHI_ARG: RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp); BlockRange().Remove(location); break; case GT_IND: { GenTreeStoreInd* store = new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value); copyFlags(store, assignment, GTF_ALL_EFFECT); copyFlags(store, location, GTF_IND_FLAGS); if (assignment->IsReverseOp()) { store->gtFlags |= GTF_REVERSE_OPS; } // TODO: JIT dump // Remove the GT_IND node and replace the assignment node with the store BlockRange().Remove(location); BlockRange().InsertBefore(assignment, store); use.ReplaceWith(comp, store); BlockRange().Remove(assignment); } break; case GT_CLS_VAR: { location->SetOper(GT_CLS_VAR_ADDR); location->gtType = TYP_BYREF; assignment->SetOper(GT_STOREIND); // TODO: JIT dump } break; case GT_BLK: case GT_OBJ: case GT_DYN_BLK: { assert(varTypeIsStruct(location)); GenTreeBlk* storeBlk = location->AsBlk(); genTreeOps storeOper; switch (location->gtOper) { case GT_BLK: storeOper = GT_STORE_BLK; break; case GT_OBJ: storeOper = GT_STORE_OBJ; break; case GT_DYN_BLK: storeOper = GT_STORE_DYN_BLK; break; default: unreached(); } JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper), GenTree::NodeName(storeOper)); storeBlk->SetOperRaw(storeOper); storeBlk->gtFlags &= ~GTF_DONT_CSE; storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE | GTF_BLK_UNALIGNED | GTF_DONT_CSE)); storeBlk->gtBlk.Data() = value; // Replace the assignment node with the store use.ReplaceWith(comp, storeBlk); BlockRange().Remove(assignment); DISPTREERANGE(BlockRange(), use.Def()); JITDUMP("\n"); } break; default: unreached(); break; } }
//------------------------------------------------------------------------ // BuildNode: Build the RefPositions for for a node // // Arguments: // treeNode - the node of interest // // Return Value: // The number of sources consumed by this node. // // Notes: // Preconditions: // LSRA Has been initialized. // // Postconditions: // RefPositions have been built for all the register defs and uses required // for this node. // int LinearScan::BuildNode(GenTree* tree) { assert(!tree->isContained()); Interval* prefSrcInterval = nullptr; int srcCount; int dstCount = 0; regMaskTP dstCandidates = RBM_NONE; regMaskTP killMask = RBM_NONE; bool isLocalDefUse = false; // Reset the build-related members of LinearScan. clearBuildState(); RegisterType registerType = TypeGet(tree); // Set the default dstCount. This may be modified below. if (tree->IsValue()) { dstCount = 1; if (tree->IsUnusedValue()) { isLocalDefUse = true; } } else { dstCount = 0; } switch (tree->OperGet()) { default: srcCount = BuildSimple(tree); break; case GT_LCL_VAR: case GT_LCL_FLD: { // We handle tracked variables differently from non-tracked ones. If it is tracked, // we will simply add a use of the tracked variable at its parent/consumer. // Otherwise, for a use we need to actually add the appropriate references for loading // or storing the variable. // // A tracked variable won't actually get used until the appropriate ancestor tree node // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument // to a call or an orphaned dead node. // LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; if (isCandidateVar(varDsc)) { INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1)); return 0; } srcCount = 0; #ifdef FEATURE_SIMD // Need an additional register to read upper 4 bytes of Vector3. if (tree->TypeGet() == TYP_SIMD12) { // We need an internal register different from targetReg in which 'tree' produces its result // because both targetReg and internal reg will be in use at the same time. buildInternalFloatRegisterDefForNode(tree, allSIMDRegs()); setInternalRegsDelayFree = true; buildInternalRegisterUses(); } #endif BuildDef(tree); } break; case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: srcCount = 1; assert(dstCount == 0); srcCount = BuildStoreLoc(tree->AsLclVarCommon()); break; case GT_FIELD_LIST: // These should always be contained. We don't correctly allocate or // generate code for a non-contained GT_FIELD_LIST. noway_assert(!"Non-contained GT_FIELD_LIST"); srcCount = 0; break; case GT_LIST: case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: case GT_PROF_HOOK: srcCount = 0; assert(dstCount == 0); break; case GT_START_PREEMPTGC: // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); break; case GT_CNS_DBL: { GenTreeDblCon* dblConst = tree->AsDblCon(); double constValue = dblConst->gtDblCon.gtDconVal; if (emitter::emitIns_valid_imm_for_fmov(constValue)) { // Directly encode constant to instructions. } else { // Reserve int to load constant from memory (IF_LARGELDC) buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); } } __fallthrough; case GT_CNS_INT: { srcCount = 0; assert(dstCount == 1); RefPosition* def = BuildDef(tree); def->getInterval()->isConstant = true; } break; case GT_BOX: case GT_COMMA: case GT_QMARK: case GT_COLON: srcCount = 0; assert(dstCount == 0); unreached(); break; case GT_RETURN: srcCount = BuildReturn(tree); break; case GT_RETFILT: assert(dstCount == 0); if (tree->TypeGet() == TYP_VOID) { srcCount = 0; } else { assert(tree->TypeGet() == TYP_INT); srcCount = 1; BuildUse(tree->gtGetOp1(), RBM_INTRET); } break; case GT_NOP: // A GT_NOP is either a passthrough (if it is void, or if it has // a child), but must be considered to produce a dummy value if it // has a type but no child. srcCount = 0; if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr) { assert(dstCount == 1); BuildDef(tree); } else { assert(dstCount == 0); } break; case GT_JTRUE: srcCount = 0; assert(dstCount == 0); break; case GT_JMP: srcCount = 0; assert(dstCount == 0); break; case GT_SWITCH: // This should never occur since switch nodes must not be visible at this // point in the JIT. srcCount = 0; noway_assert(!"Switch must be lowered at this point"); break; case GT_JMPTABLE: srcCount = 0; assert(dstCount == 1); BuildDef(tree); break; case GT_SWITCH_TABLE: buildInternalIntRegisterDefForNode(tree); srcCount = BuildBinaryUses(tree->AsOp()); assert(dstCount == 0); break; case GT_ASG: noway_assert(!"We should never hit any assignment operator in lowering"); srcCount = 0; break; case GT_ADD: case GT_SUB: if (varTypeIsFloating(tree->TypeGet())) { // overflow operations aren't supported on float/double types. assert(!tree->gtOverflow()); // No implicit conversions at this stage as the expectation is that // everything is made explicit by adding casts. assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); } __fallthrough; case GT_AND: case GT_OR: case GT_XOR: case GT_LSH: case GT_RSH: case GT_RSZ: case GT_ROR: srcCount = BuildBinaryUses(tree->AsOp()); assert(dstCount == 1); BuildDef(tree); break; case GT_RETURNTRAP: // this just turns into a compare of its child with an int // + a conditional call BuildUse(tree->gtGetOp1()); srcCount = 1; assert(dstCount == 0); killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); BuildDefsWithKills(tree, 0, RBM_NONE, killMask); break; case GT_MOD: case GT_UMOD: NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64"); assert(!"Shouldn't see an integer typed GT_MOD node in ARM64"); srcCount = 0; break; case GT_MUL: if (tree->gtOverflow()) { // Need a register different from target reg to check for overflow. buildInternalIntRegisterDefForNode(tree); setInternalRegsDelayFree = true; } __fallthrough; case GT_DIV: case GT_MULHI: case GT_UDIV: { srcCount = BuildBinaryUses(tree->AsOp()); buildInternalRegisterUses(); assert(dstCount == 1); BuildDef(tree); } break; case GT_INTRINSIC: { noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)); // Both operand and its result must be of the same floating point type. GenTree* op1 = tree->gtGetOp1(); assert(varTypeIsFloating(op1)); assert(op1->TypeGet() == tree->TypeGet()); BuildUse(op1); srcCount = 1; assert(dstCount == 1); BuildDef(tree); } break; #ifdef FEATURE_SIMD case GT_SIMD: srcCount = BuildSIMD(tree->AsSIMD()); break; #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWIntrinsic: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic()); break; #endif // FEATURE_HW_INTRINSICS case GT_CAST: assert(dstCount == 1); srcCount = BuildCast(tree->AsCast()); break; case GT_NEG: case GT_NOT: BuildUse(tree->gtGetOp1()); srcCount = 1; assert(dstCount == 1); BuildDef(tree); break; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_TEST_EQ: case GT_TEST_NE: case GT_JCMP: srcCount = BuildCmp(tree); break; case GT_CKFINITE: srcCount = 1; assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); BuildUse(tree->gtGetOp1()); BuildDef(tree); buildInternalRegisterUses(); break; case GT_CMPXCHG: { GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; assert(dstCount == 1); if (!compiler->compSupports(InstructionSet_Atomics)) { // For ARMv8 exclusives requires a single internal register buildInternalIntRegisterDefForNode(tree); } // For ARMv8 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent // them being reused as the target register which must be destroyed early RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation); setDelayFree(locationUse); RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue); setDelayFree(valueUse); if (!cmpXchgNode->gtOpComparand->isContained()) { RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand); // For ARMv8 exclusives the lifetime of the comparand must be extended because // it may be used used multiple during retries if (!compiler->compSupports(InstructionSet_Atomics)) { setDelayFree(comparandUse); } } // Internals may not collide with target setInternalRegsDelayFree = true; buildInternalRegisterUses(); BuildDef(tree); } break; case GT_LOCKADD: case GT_XADD: case GT_XCHG: { assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; if (!compiler->compSupports(InstructionSet_Atomics)) { // GT_XCHG requires a single internal register; the others require two. buildInternalIntRegisterDefForNode(tree); if (tree->OperGet() != GT_XCHG) { buildInternalIntRegisterDefForNode(tree); } } assert(!tree->gtGetOp1()->isContained()); RefPosition* op1Use = BuildUse(tree->gtGetOp1()); RefPosition* op2Use = nullptr; if (!tree->gtGetOp2()->isContained()) { op2Use = BuildUse(tree->gtGetOp2()); } // For ARMv8 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries if (!compiler->compSupports(InstructionSet_Atomics)) { // Internals may not collide with target if (dstCount == 1) { setDelayFree(op1Use); if (op2Use != nullptr) { setDelayFree(op2Use); } setInternalRegsDelayFree = true; } buildInternalRegisterUses(); } if (dstCount == 1) { BuildDef(tree); } } break; #if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); dstCount = tree->AsPutArgSplit()->gtNumRegs; break; #endif // FEATURE _SPLIT_ARG case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; case GT_PUTARG_REG: srcCount = BuildPutArgReg(tree->AsUnOp()); break; case GT_CALL: srcCount = BuildCall(tree->AsCall()); if (tree->AsCall()->HasMultiRegRetVal()) { dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); } break; case GT_ADDR: { // For a GT_ADDR, the child node should not be evaluated into a register GenTree* child = tree->gtGetOp1(); assert(!isCandidateLocalRef(child)); assert(child->isContained()); assert(dstCount == 1); srcCount = 0; BuildDef(tree); } break; case GT_BLK: case GT_DYN_BLK: // These should all be eliminated prior to Lowering. assert(!"Non-store block node in Lowering"); srcCount = 0; break; case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: srcCount = BuildBlockStore(tree->AsBlk()); break; case GT_INIT_VAL: // Always a passthrough of its child's value. assert(!"INIT_VAL should always be contained"); srcCount = 0; break; case GT_LCLHEAP: { assert(dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): // Here '-' means don't care. // // Size? Init Memory? # temp regs // 0 - 0 // const and <=6 ptr words - 0 // const and <PageSize No 0 // >6 ptr words Yes 0 // Non-const Yes 0 // Non-const No 2 // GenTree* size = tree->gtGetOp1(); if (size->IsCnsIntOrI()) { assert(size->isContained()); srcCount = 0; size_t sizeVal = size->gtIntCon.gtIconVal; if (sizeVal != 0) { // Compute the amount of memory to properly STACK_ALIGN. // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. // This should also help in debugging as we can examine the original size specified with // localloc. sizeVal = AlignUp(sizeVal, STACK_ALIGN); size_t stpCount = sizeVal / (REGSIZE_BYTES * 2); // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc) // if (stpCount <= 4) { // Need no internal registers } else if (!compiler->info.compInitMem) { // No need to initialize allocated stack space. if (sizeVal < compiler->eeGetPageSize()) { // Need no internal registers } else { // We need two registers: regCnt and RegTmp buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); } } } } else { srcCount = 1; if (!compiler->info.compInitMem) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); } } if (!size->isContained()) { BuildUse(size); } buildInternalRegisterUses(); BuildDef(tree); } break; case GT_ARR_BOUNDS_CHECK: #ifdef FEATURE_SIMD case GT_SIMD_CHK: #endif // FEATURE_SIMD { GenTreeBoundsChk* node = tree->AsBoundsChk(); // Consumes arrLen & index - has no result assert(dstCount == 0); GenTree* intCns = nullptr; GenTree* other = nullptr; srcCount = BuildOperandUses(tree->AsBoundsChk()->gtIndex); srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen); } break; case GT_ARR_ELEM: // These must have been lowered to GT_ARR_INDEX noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); srcCount = 0; assert(dstCount == 0); break; case GT_ARR_INDEX: { srcCount = 2; assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); setInternalRegsDelayFree = true; // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj()); setDelayFree(arrObjUse); BuildUse(tree->AsArrIndex()->IndexExpr()); buildInternalRegisterUses(); BuildDef(tree); } break; case GT_ARR_OFFSET: // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. srcCount = 2; if (!tree->gtArrOffs.gtOffset->isContained()) { BuildUse(tree->AsArrOffs()->gtOffset); srcCount++; } BuildUse(tree->AsArrOffs()->gtIndex); BuildUse(tree->AsArrOffs()->gtArrObj); assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; case GT_LEA: { GenTreeAddrMode* lea = tree->AsAddrMode(); GenTree* base = lea->Base(); GenTree* index = lea->Index(); int cns = lea->Offset(); // This LEA is instantiating an address, so we set up the srcCount here. srcCount = 0; if (base != nullptr) { srcCount++; BuildUse(base); } if (index != nullptr) { srcCount++; BuildUse(index); } assert(dstCount == 1); // On ARM64 we may need a single internal register // (when both conditions are true then we still only need a single internal register) if ((index != nullptr) && (cns != 0)) { // ARM64 does not support both Index and offset so we need an internal register buildInternalIntRegisterDefForNode(tree); } else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE)) { // This offset can't be contained in the add instruction, so we need an internal register buildInternalIntRegisterDefForNode(tree); } buildInternalRegisterUses(); BuildDef(tree); } break; case GT_STOREIND: { assert(dstCount == 0); if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree)) { srcCount = BuildGCWriteBarrier(tree); break; } srcCount = BuildIndir(tree->AsIndir()); if (!tree->gtGetOp2()->isContained()) { BuildUse(tree->gtGetOp2()); srcCount++; } } break; case GT_NULLCHECK: // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register // is required, and it is not a localDefUse. assert(dstCount == 0); assert(!tree->gtGetOp1()->isContained()); BuildUse(tree->gtGetOp1()); srcCount = 1; break; case GT_IND: assert(dstCount == 1); srcCount = BuildIndir(tree->AsIndir()); break; case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); BuildDef(tree, RBM_EXCEPTION_OBJECT); break; case GT_CLS_VAR: srcCount = 0; // GT_CLS_VAR, by the time we reach the backend, must always // be a pure use. // It will produce a result of the type of the // node, and use an internal register for the address. assert(dstCount == 1); assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; } // end switch (tree->OperGet()) if (tree->IsUnusedValue() && (dstCount != 0)) { isLocalDefUse = true; } // We need to be sure that we've set srcCount and dstCount appropriately assert((dstCount < 2) || tree->IsMultiRegCall()); assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); assert(!tree->IsUnusedValue() || (dstCount != 0)); assert(dstCount == tree->GetRegisterDstCount()); INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount)); return srcCount; }
void Rationalizer::RewriteAssignment(LIR::Use& use) { assert(use.IsInitialized()); GenTreeOp* assignment = use.Def()->AsOp(); assert(assignment->OperGet() == GT_ASG); GenTree* location = assignment->gtGetOp1(); GenTree* value = assignment->gtGetOp2(); genTreeOps locationOp = location->OperGet(); #ifdef FEATURE_SIMD if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp()) { if (location->OperGet() == GT_LCL_VAR) { var_types simdType = location->TypeGet(); GenTree* initVal = assignment->gtOp.gtOp2; var_types baseType = comp->getBaseTypeOfSIMDLocal(location); if (baseType != TYP_UNKNOWN) { GenTreeSIMD* simdTree = new (comp, GT_SIMD) GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType)); assignment->gtOp.gtOp2 = simdTree; value = simdTree; initVal->gtNext = simdTree; simdTree->gtPrev = initVal; simdTree->gtNext = location; location->gtPrev = simdTree; } } else { assert(location->OperIsBlk()); } } #endif // FEATURE_SIMD switch (locationOp) { case GT_LCL_VAR: case GT_LCL_FLD: case GT_REG_VAR: case GT_PHI_ARG: RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp); BlockRange().Remove(location); break; case GT_IND: { GenTreeStoreInd* store = new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value); copyFlags(store, assignment, GTF_ALL_EFFECT); copyFlags(store, location, GTF_IND_FLAGS); if (assignment->IsReverseOp()) { store->gtFlags |= GTF_REVERSE_OPS; } // TODO: JIT dump // Remove the GT_IND node and replace the assignment node with the store BlockRange().Remove(location); BlockRange().InsertBefore(assignment, store); use.ReplaceWith(comp, store); BlockRange().Remove(assignment); } break; case GT_CLS_VAR: { location->SetOper(GT_CLS_VAR_ADDR); location->gtType = TYP_BYREF; assignment->SetOper(GT_STOREIND); // TODO: JIT dump } break; case GT_BLK: case GT_OBJ: case GT_DYN_BLK: { assert(varTypeIsStruct(location)); GenTreeBlk* storeBlk = location->AsBlk(); genTreeOps storeOper; switch (location->gtOper) { case GT_BLK: storeOper = GT_STORE_BLK; break; case GT_OBJ: storeOper = GT_STORE_OBJ; break; case GT_DYN_BLK: storeOper = GT_STORE_DYN_BLK; break; default: unreached(); } JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper), GenTree::NodeName(storeOper)); storeBlk->SetOperRaw(storeOper); storeBlk->gtFlags &= ~GTF_DONT_CSE; storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE | GTF_BLK_UNALIGNED | GTF_BLK_INIT | GTF_DONT_CSE)); storeBlk->gtBlk.Data() = value; // Replace the assignment node with the store use.ReplaceWith(comp, storeBlk); BlockRange().Remove(assignment); DISPTREERANGE(BlockRange(), use.Def()); JITDUMP("\n"); } break; default: unreached(); break; } }
//------------------------------------------------------------------------ // DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use // the shift helper functions, so we here convert the shift into a helper call by // pulling its arguments out of linear order and making them the args to a call, then // replacing the original node with the new call. // // Arguments: // use - the LIR::Use object for the def that needs to be decomposed. // // Return Value: // The next node to process. // GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use) { assert(use.IsInitialized()); GenTree* tree = use.Def(); GenTree* gtLong = tree->gtGetOp1(); genTreeOps oper = tree->OperGet(); assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)); LIR::Use loOp1Use(Range(), >Long->gtOp.gtOp1, gtLong); loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); LIR::Use hiOp1Use(Range(), >Long->gtOp.gtOp2, gtLong); hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree); shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight); GenTree* loOp1 = gtLong->gtGetOp1(); GenTree* hiOp1 = gtLong->gtGetOp2(); GenTree* shiftWidthOp = tree->gtGetOp2(); Range().Remove(gtLong); Range().Remove(loOp1); Range().Remove(hiOp1); Range().Remove(shiftWidthOp); // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen // and generate the shift instructions ourselves there, rather than replacing it with a helper call. unsigned helper; switch (oper) { case GT_LSH: helper = CORINFO_HELP_LLSH; break; case GT_RSH: helper = CORINFO_HELP_LRSH; break; case GT_RSZ: helper = CORINFO_HELP_LRSZ; break; default: unreached(); } GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp); GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList); GenTreeCall* callNode = call->AsCall(); ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc(); retTypeDesc->InitializeLongReturnType(m_compiler); call = m_compiler->fgMorphArgs(callNode); Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call)); Range().Remove(tree); use.ReplaceWith(m_compiler, call); return call; }
// Display a compact representation of the bbJumpKind, that is, where this block branches. // This is similar to code in Compiler::fgTableDispBasicBlock(), but doesn't have that code's requirements to align things strictly. void BasicBlock::dspJumpKind() { switch (bbJumpKind) { case BBJ_EHFINALLYRET: printf(" (finret)"); break; case BBJ_EHFILTERRET: printf(" (fltret)"); break; case BBJ_EHCATCHRET: printf(" -> BB%02u (cret)", bbJumpDest->bbNum); break; case BBJ_THROW: printf(" (throw)"); break; case BBJ_RETURN: printf(" (return)"); break; case BBJ_NONE: // For fall-through blocks, print nothing. break; case BBJ_ALWAYS: if (bbFlags & BBF_KEEP_BBJ_ALWAYS) { printf(" -> BB%02u (ALWAYS)", bbJumpDest->bbNum); } else { printf(" -> BB%02u (always)", bbJumpDest->bbNum); } break; case BBJ_LEAVE: printf(" -> BB%02u (leave)", bbJumpDest->bbNum); break; case BBJ_CALLFINALLY: printf(" -> BB%02u (callf)", bbJumpDest->bbNum); break; case BBJ_COND: printf(" -> BB%02u (cond)", bbJumpDest->bbNum); break; case BBJ_SWITCH: printf(" ->"); unsigned jumpCnt; jumpCnt = bbJumpSwt->bbsCount; BasicBlock** jumpTab; jumpTab = bbJumpSwt->bbsDstTab; do { printf("%cBB%02u", (jumpTab == bbJumpSwt->bbsDstTab) ? ' ' : ',', (*jumpTab)->bbNum); } while (++jumpTab, --jumpCnt); printf(" (switch)"); break; default: unreached(); break; } }
//------------------------------------------------------------------------ // BuildSIMD: Set the NodeInfo for a GT_SIMD tree. // // Arguments: // tree - The GT_SIMD node of interest // // Return Value: // The number of sources consumed by this node. // int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { int srcCount = 0; // Only SIMDIntrinsicInit can be contained if (simdTree->isContained()) { assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); } int dstCount = simdTree->IsValue() ? 1 : 0; assert(dstCount == 1); bool buildUses = true; GenTree* op1 = simdTree->gtGetOp1(); GenTree* op2 = simdTree->gtGetOp2(); switch (simdTree->gtSIMDIntrinsicID) { case SIMDIntrinsicInit: case SIMDIntrinsicCast: case SIMDIntrinsicSqrt: case SIMDIntrinsicAbs: case SIMDIntrinsicConvertToSingle: case SIMDIntrinsicConvertToInt32: case SIMDIntrinsicConvertToDouble: case SIMDIntrinsicConvertToInt64: case SIMDIntrinsicWidenLo: case SIMDIntrinsicWidenHi: // No special handling required. break; case SIMDIntrinsicGetItem: { op1 = simdTree->gtGetOp1(); op2 = simdTree->gtGetOp2(); // We have an object and an index, either of which may be contained. bool setOp2DelayFree = false; if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal())) { // If the index is not a constant and the object is not contained or is a local // we will need a general purpose register to calculate the address // internal register must not clobber input index // TODO-Cleanup: An internal register will never clobber a source; this code actually // ensures that the index (op2) doesn't interfere with the target. buildInternalIntRegisterDefForNode(simdTree); setOp2DelayFree = true; } srcCount += BuildOperandUses(op1); if (!op2->isContained()) { RefPosition* op2Use = BuildUse(op2); if (setOp2DelayFree) { setDelayFree(op2Use); } srcCount++; } if (!op2->IsCnsIntOrI() && (!op1->isContained())) { // If vector is not already in memory (contained) and the index is not a constant, // we will use the SIMD temp location to store the vector. compiler->getSIMDInitTempVarNum(); } buildUses = false; } break; case SIMDIntrinsicAdd: case SIMDIntrinsicSub: case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicMin: case SIMDIntrinsicMax: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: case SIMDIntrinsicLessThanOrEqual: case SIMDIntrinsicGreaterThanOrEqual: // No special handling required. break; case SIMDIntrinsicSetX: case SIMDIntrinsicSetY: case SIMDIntrinsicSetZ: case SIMDIntrinsicSetW: case SIMDIntrinsicNarrow: { // Op1 will write to dst before Op2 is free BuildUse(op1); RefPosition* op2Use = BuildUse(op2); setDelayFree(op2Use); srcCount = 2; buildUses = false; break; } case SIMDIntrinsicInitN: { var_types baseType = simdTree->gtSIMDBaseType; srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType)); if (varTypeIsFloating(simdTree->gtSIMDBaseType)) { // Need an internal register to stitch together all the values into a single vector in a SIMD reg. buildInternalFloatRegisterDefForNode(simdTree); } int initCount = 0; for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2()) { assert(list->OperGet() == GT_LIST); GenTree* listItem = list->gtGetOp1(); assert(listItem->TypeGet() == baseType); assert(!listItem->isContained()); BuildUse(listItem); initCount++; } assert(initCount == srcCount); buildUses = false; break; } case SIMDIntrinsicInitArray: // We have an array and an index, which may be contained. break; case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: buildInternalFloatRegisterDefForNode(simdTree); break; case SIMDIntrinsicDotProduct: buildInternalFloatRegisterDefForNode(simdTree); break; case SIMDIntrinsicSelect: // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB // bsl target register must be VC. Reserve a temp in case we need to shuffle things. // This will require a different approach, as GenTreeSIMD has only two operands. assert(!"SIMDIntrinsicSelect not yet supported"); buildInternalFloatRegisterDefForNode(simdTree); break; case SIMDIntrinsicInitArrayX: case SIMDIntrinsicInitFixed: case SIMDIntrinsicCopyToArray: case SIMDIntrinsicCopyToArrayX: case SIMDIntrinsicNone: case SIMDIntrinsicGetCount: case SIMDIntrinsicGetOne: case SIMDIntrinsicGetZero: case SIMDIntrinsicGetAllOnes: case SIMDIntrinsicGetX: case SIMDIntrinsicGetY: case SIMDIntrinsicGetZ: case SIMDIntrinsicGetW: case SIMDIntrinsicInstEquals: case SIMDIntrinsicHWAccel: case SIMDIntrinsicWiden: case SIMDIntrinsicInvalid: assert(!"These intrinsics should not be seen during register allocation"); __fallthrough; default: noway_assert(!"Unimplemented SIMD node type."); unreached(); } if (buildUses) { assert(!op1->OperIs(GT_LIST)); assert(srcCount == 0); srcCount = BuildOperandUses(op1); if ((op2 != nullptr) && !op2->isContained()) { srcCount += BuildOperandUses(op2); } } assert(internalCount <= MaxInternalCount); buildInternalRegisterUses(); if (dstCount == 1) { BuildDef(simdTree); } else { assert(dstCount == 0); } return srcCount; }
//------------------------------------------------------------------------ // genHWIntrinsic: Generates the code for a given hardware intrinsic node. // // Arguments: // node - The hardware intrinsic node // void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID); HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); int ival = Compiler::ivalOfHWIntrinsic(intrinsicID); int numArgs = Compiler::numArgsOfHWIntrinsic(node); assert((flags & HW_Flag_NoCodeGen) == 0); if (genIsTableDrivenHWIntrinsic(category, flags)) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = getEmitter(); assert(numArgs >= 0); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); assert(ins != INS_invalid); emitAttr simdSize = EA_ATTR(node->gtSIMDSize); assert(simdSize != 0); switch (numArgs) { case 1: genConsumeOperands(node); op1Reg = op1->gtRegNum; if (category == HW_Category_MemoryLoad) { emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0); } else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0) { emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg); } else if ((ival != -1) && varTypeIsFloating(baseType)) { emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival); } else { emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg); } break; case 2: genConsumeOperands(node); op1Reg = op1->gtRegNum; op2Reg = op2->gtRegNum; if (category == HW_Category_MemoryStore) { emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0); } else if ((ival != -1) && varTypeIsFloating(baseType)) { genHWIntrinsic_R_R_RM_I(node, ins); } else if (category == HW_Category_MemoryLoad) { emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg); } else if (Compiler::isImmHWIntrinsic(intrinsicID, op2)) { if (intrinsicID == NI_SSE2_Extract) { // extract instructions return to GP-registers, so it needs int size as the emitsize simdSize = emitTypeSize(TYP_INT); } auto emitSwCase = [&](unsigned i) { emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i); }; if (op2->IsCnsIntOrI()) { ssize_t ival = op2->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase); } } else { genHWIntrinsic_R_R_RM(node, ins); } break; case 3: { assert(op1->OperIsList()); assert(op1->gtGetOp2()->OperIsList()); assert(op1->gtGetOp2()->gtGetOp2()->OperIsList()); GenTreeArgList* argList = op1->AsArgList(); op1 = argList->Current(); genConsumeRegs(op1); op1Reg = op1->gtRegNum; argList = argList->Rest(); op2 = argList->Current(); genConsumeRegs(op2); op2Reg = op2->gtRegNum; argList = argList->Rest(); GenTree* op3 = argList->Current(); genConsumeRegs(op3); regNumber op3Reg = op3->gtRegNum; if (Compiler::isImmHWIntrinsic(intrinsicID, op3)) { auto emitSwCase = [&](unsigned i) { emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i); }; if (op3->IsCnsIntOrI()) { ssize_t ival = op3->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase); } } else if (category == HW_Category_MemoryStore) { assert(intrinsicID == NI_SSE2_MaskMove); assert(targetReg == REG_NA); // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI if (op3Reg != REG_EDI) { emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg); } emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); } else { emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg); } break; } default: unreached(); break; } genProduceReg(node); return; } switch (isa) { case InstructionSet_SSE: genSSEIntrinsic(node); break; case InstructionSet_SSE2: genSSE2Intrinsic(node); break; case InstructionSet_SSE41: genSSE41Intrinsic(node); break; case InstructionSet_SSE42: genSSE42Intrinsic(node); break; case InstructionSet_AVX: genAVXIntrinsic(node); break; case InstructionSet_AVX2: genAVX2Intrinsic(node); break; case InstructionSet_AES: genAESIntrinsic(node); break; case InstructionSet_BMI1: genBMI1Intrinsic(node); break; case InstructionSet_BMI2: genBMI2Intrinsic(node); break; case InstructionSet_FMA: genFMAIntrinsic(node); break; case InstructionSet_LZCNT: genLZCNTIntrinsic(node); break; case InstructionSet_PCLMULQDQ: genPCLMULQDQIntrinsic(node); break; case InstructionSet_POPCNT: genPOPCNTIntrinsic(node); break; default: unreached(); break; } }
//------------------------------------------------------------------------ // genSSE41Intrinsic: Generates the code for an SSE4.1 hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); GenTree* op3 = nullptr; GenTree* op4 = nullptr; regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; regNumber op3Reg = REG_NA; regNumber op4Reg = REG_NA; emitter* emit = getEmitter(); if ((op1 != nullptr) && !op1->OperIsList()) { op1Reg = op1->gtRegNum; genConsumeOperands(node); } switch (intrinsicID) { case NI_SSE41_TestAllOnes: { regNumber tmpReg = node->GetSingleTempReg(); assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg); emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg); emit->emitIns_R(INS_setb, EA_1BYTE, targetReg); break; } case NI_SSE41_TestAllZeros: case NI_SSE41_TestZ: { assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); emit->emitIns_R(INS_sete, EA_1BYTE, targetReg); break; } case NI_SSE41_TestC: { assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); emit->emitIns_R(INS_setb, EA_1BYTE, targetReg); break; } case NI_SSE41_TestMixOnesZeros: case NI_SSE41_TestNotZAndNotC: { assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); break; } case NI_SSE41_Extract: { regNumber tmpTargetReg = REG_NA; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); if (baseType == TYP_FLOAT) { tmpTargetReg = node->ExtractTempReg(); } auto emitSwCase = [&](unsigned i) { if (baseType == TYP_FLOAT) { // extract instructions return to GP-registers, so it needs int size as the emitsize emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), op1Reg, tmpTargetReg, (int)i); emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg); } else { emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), targetReg, op1Reg, (int)i); } }; if (op2->IsCnsIntOrI()) { ssize_t ival = op2->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op2->gtRegNum, baseReg, offsReg, emitSwCase); } break; } default: unreached(); break; } genProduceReg(node); }
//------------------------------------------------------------------------ // genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); GenTree* op3 = nullptr; GenTree* op4 = nullptr; regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; regNumber op3Reg = REG_NA; regNumber op4Reg = REG_NA; emitter* emit = getEmitter(); if ((op1 != nullptr) && !op1->OperIsList()) { op1Reg = op1->gtRegNum; genConsumeOperands(node); } switch (intrinsicID) { case NI_SSE_CompareEqualOrderedScalar: case NI_SSE_CompareEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE_CompareGreaterThanOrderedScalar: case NI_SSE_CompareGreaterThanUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE_CompareGreaterThanOrEqualOrderedScalar: case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE_CompareLessThanOrderedScalar: case NI_SSE_CompareLessThanUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE_CompareLessThanOrEqualOrderedScalar: case NI_SSE_CompareLessThanOrEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE_CompareNotEqualOrderedScalar: case NI_SSE_CompareNotEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE_ConvertToSingle: case NI_SSE_StaticCast: { assert(op2 == nullptr); if (op1Reg != targetReg) { instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg); } break; } case NI_SSE_MoveMask: { assert(baseType == TYP_FLOAT); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg); break; } case NI_SSE_Prefetch0: case NI_SSE_Prefetch1: case NI_SSE_Prefetch2: case NI_SSE_PrefetchNonTemporal: { assert(baseType == TYP_UBYTE); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0); break; } case NI_SSE_SetScalarVector128: { assert(baseType == TYP_FLOAT); assert(op2 == nullptr); if (op1Reg == targetReg) { regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(INS_movaps, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg); op1Reg = tmpReg; } emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); emit->emitIns_SIMD_R_R_R(INS_movss, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg); break; } case NI_SSE_SetZeroVector128: { assert(baseType == TYP_FLOAT); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); break; } case NI_SSE_StoreFence: { assert(baseType == TYP_VOID); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns(INS_sfence); break; } default: unreached(); break; } genProduceReg(node); }
//------------------------------------------------------------------------ // genHWIntrinsic_R_R_RM_I: Generates the code for a hardware intrinsic node that takes a register operand, a // register/memory operand, an immediate operand, and that returns a value in register // // Arguments: // node - The hardware intrinsic node // ins - The instruction being generated // void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins) { var_types targetType = node->TypeGet(); regNumber targetReg = node->gtRegNum; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); emitAttr simdSize = EA_ATTR(node->gtSIMDSize); int ival = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId); emitter* emit = getEmitter(); // TODO-XArch-CQ: Commutative operations can have op1 be contained // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained regNumber op1Reg = op1->gtRegNum; assert(targetReg != REG_NA); assert(op1Reg != REG_NA); if (op2->isContained() || op2->isUsedFromSpillTemp()) { assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0); assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional()); TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; unsigned offset = (unsigned)-1; if (op2->isUsedFromSpillTemp()) { assert(op2->IsRegOptional()); tmpDsc = getSpillTempDsc(op2); varNum = tmpDsc->tdTempNum(); offset = 0; compiler->tmpRlsTemp(tmpDsc); } else if (op2->OperIsHWIntrinsic()) { emit->emitIns_SIMD_R_R_AR_I(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum, ival); return; } else if (op2->isIndir()) { GenTreeIndir* memIndir = op2->AsIndir(); GenTree* memBase = memIndir->gtOp1; switch (memBase->OperGet()) { case GT_LCL_VAR_ADDR: { varNum = memBase->AsLclVarCommon()->GetLclNum(); offset = 0; // Ensure that all the GenTreeIndir values are set to their defaults. assert(!memIndir->HasIndex()); assert(memIndir->Scale() == 1); assert(memIndir->Offset() == 0); break; } case GT_CLS_VAR_ADDR: { emit->emitIns_SIMD_R_R_C_I(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0, ival); return; } default: { emit->emitIns_SIMD_R_R_A_I(ins, simdSize, targetReg, op1Reg, memIndir, ival); return; } } } else { switch (op2->OperGet()) { case GT_LCL_FLD: { GenTreeLclFld* lclField = op2->AsLclFld(); varNum = lclField->GetLclNum(); offset = lclField->gtLclFld.gtLclOffs; break; } case GT_LCL_VAR: { assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate()); varNum = op2->AsLclVar()->GetLclNum(); offset = 0; break; } default: unreached(); break; } } // Ensure we got a good varNum and offset. // We also need to check for `tmpDsc != nullptr` since spill temp numbers // are negative and start with -1, which also happens to be BAD_VAR_NUM. assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); assert(offset != (unsigned)-1); emit->emitIns_SIMD_R_R_S_I(ins, simdSize, targetReg, op1Reg, varNum, offset, ival); } else { emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2->gtRegNum, ival); } }
//------------------------------------------------------------------------ // genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = getEmitter(); int ival = -1; if ((op1 != nullptr) && !op1->OperIsList()) { op1Reg = op1->gtRegNum; genConsumeOperands(node); } switch (intrinsicID) { // All integer overloads are handled by table codegen case NI_SSE2_CompareLessThan: { assert(op1 != nullptr); assert(op2 != nullptr); assert(baseType == TYP_DOUBLE); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); op2Reg = op2->gtRegNum; ival = Compiler::ivalOfHWIntrinsic(intrinsicID); assert(ival != -1); emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival); break; } case NI_SSE2_CompareEqualOrderedScalar: case NI_SSE2_CompareEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE2_CompareGreaterThanOrderedScalar: case NI_SSE2_CompareGreaterThanUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareGreaterThanOrEqualOrderedScalar: case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareLessThanOrderedScalar: case NI_SSE2_CompareLessThanUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareLessThanOrEqualOrderedScalar: case NI_SSE2_CompareLessThanOrEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareNotEqualOrderedScalar: case NI_SSE2_CompareNotEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE2_ConvertScalarToVector128Double: case NI_SSE2_ConvertScalarToVector128Single: { assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE); assert(op1 != nullptr); assert(op2 != nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); genHWIntrinsic_R_R_RM(node, ins); break; } case NI_SSE2_ConvertScalarToVector128Int64: case NI_SSE2_ConvertScalarToVector128UInt64: { assert(baseType == TYP_LONG || baseType == TYP_ULONG); assert(op1 != nullptr); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); // TODO-XArch-CQ -> use of type size of TYP_SIMD16 leads to // instruction register encoding errors for SSE legacy encoding emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg); break; } case NI_SSE2_ConvertToDouble: { assert(op2 == nullptr); if (op1Reg != targetReg) { instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg); } break; } case NI_SSE2_ConvertToInt32: case NI_SSE2_ConvertToInt64: case NI_SSE2_ConvertToUInt32: case NI_SSE2_ConvertToUInt64: { assert(op2 == nullptr); assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT) { emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg); } else { emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg); } break; } case NI_SSE2_LoadFence: { assert(baseType == TYP_VOID); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns(INS_lfence); break; } case NI_SSE2_MemoryFence: { assert(baseType == TYP_VOID); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns(INS_mfence); break; } case NI_SSE2_MoveMask: { assert(op2 == nullptr); assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg); break; } case NI_SSE2_SetScalarVector128: { assert(baseType == TYP_DOUBLE); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); if (op1Reg == targetReg) { regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(INS_movapd, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg); op1Reg = tmpReg; } emit->emitIns_SIMD_R_R_R(INS_xorpd, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg); break; } case NI_SSE2_SetZeroVector128: { assert(baseType != TYP_FLOAT); assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE); assert(op1 == nullptr); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); break; } default: unreached(); break; } genProduceReg(node); }