void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsic, regNumber nonConstImmReg, regNumber baseReg, regNumber offsReg, HWIntrinsicSwitchCaseBody emitSwCase) { assert(nonConstImmReg != REG_NA); emitter* emit = getEmitter(); const unsigned maxByte = (unsigned)Compiler::immUpperBoundOfHWIntrinsic(intrinsic) + 1; assert(maxByte <= 256); BasicBlock* jmpTable[256]; unsigned jmpTableBase = emit->emitBBTableDataGenBeg(maxByte, true); unsigned jmpTableOffs = 0; // Emit the jump table for (unsigned i = 0; i < maxByte; i++) { jmpTable[i] = genCreateTempLabel(); emit->emitDataGenData(i, jmpTable[i]); } emit->emitDataGenEnd(); // Compute and jump to the appropriate offset in the switch table emit->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), offsReg, compiler->eeFindJitDataOffs(jmpTableBase), 0); emit->emitIns_R_ARX(INS_mov, EA_4BYTE, offsReg, offsReg, nonConstImmReg, 4, 0); emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, baseReg); emit->emitIns_R_R(INS_add, EA_PTRSIZE, offsReg, baseReg); emit->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), offsReg); // Emit the switch table entries BasicBlock* switchTableBeg = genCreateTempLabel(); BasicBlock* switchTableEnd = genCreateTempLabel(); genDefineTempLabel(switchTableBeg); for (unsigned i = 0; i < maxByte; i++) { genDefineTempLabel(jmpTable[i]); emitSwCase(i); emit->emitIns_J(INS_jmp, switchTableEnd); } genDefineTempLabel(switchTableEnd); }
//------------------------------------------------------------------------ // genSSE42Intrinsic: Generates the code for an SSE4.2 hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; assert(targetReg != REG_NA); var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = op1->gtRegNum; regNumber op2Reg = op2->gtRegNum; genConsumeOperands(node); switch (intrinsicID) { case NI_SSE42_Crc32: if (op1Reg != targetReg) { inst_RV_RV(INS_mov, targetReg, op1Reg, targetType, emitTypeSize(targetType)); } if (baseType == TYP_UBYTE || baseType == TYP_USHORT) // baseType is the type of the second argument { assert(targetType == TYP_INT); inst_RV_RV(INS_crc32, targetReg, op2Reg, baseType, emitTypeSize(baseType)); } else { assert(op1->TypeGet() == op2->TypeGet()); assert(targetType == TYP_INT || targetType == TYP_LONG); inst_RV_RV(INS_crc32, targetReg, op2Reg, targetType, emitTypeSize(targetType)); } break; default: unreached(); break; } genProduceReg(node); }
//------------------------------------------------------------------------ // genPOPCNTIntrinsic: Generates the code for a POPCNT hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); regNumber targetReg = node->gtRegNum; assert(targetReg != REG_NA); var_types targetType = node->TypeGet(); regNumber op1Reg = op1->gtRegNum; genConsumeOperands(node); assert(intrinsicID == NI_POPCNT_PopCount); inst_RV_RV(INS_popcnt, targetReg, op1Reg, targetType, emitTypeSize(targetType)); genProduceReg(node); }
//------------------------------------------------------------------------ // TreeNodeInfoInitIndir: Specify register requirements for address expression // of an indirection operation. // // Arguments: // indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node // void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) { ContainCheckIndir(indirTree); // If this is the rhs of a block copy (i.e. non-enregisterable struct), // it has no register requirements. if (indirTree->TypeGet() == TYP_STRUCT) { return; } TreeNodeInfo* info = &(indirTree->gtLsraInfo); bool isStore = (indirTree->gtOper == GT_STOREIND); info->srcCount = GetIndirSourceCount(indirTree); GenTree* addr = indirTree->Addr(); GenTree* index = nullptr; unsigned cns = 0; #ifdef _TARGET_ARM_ // Unaligned loads/stores for floating point values must first be loaded into integer register(s) if (indirTree->gtFlags & GTF_IND_UNALIGNED) { var_types type = TYP_UNDEF; if (indirTree->OperGet() == GT_STOREIND) { type = indirTree->AsStoreInd()->Data()->TypeGet(); } else if (indirTree->OperGet() == GT_IND) { type = indirTree->TypeGet(); } if (type == TYP_FLOAT) { info->internalIntCount = 1; } else if (type == TYP_DOUBLE) { info->internalIntCount = 2; } } #endif if (addr->isContained()) { assert(addr->OperGet() == GT_LEA); GenTreeAddrMode* lea = addr->AsAddrMode(); index = lea->Index(); cns = lea->gtOffset; // On ARM we may need a single internal register // (when both conditions are true then we still only need a single internal register) if ((index != nullptr) && (cns != 0)) { // ARM does not support both Index and offset so we need an internal register info->internalIntCount++; } else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) { // This offset can't be contained in the ldr/str instruction, so we need an internal register info->internalIntCount++; } } }
//------------------------------------------------------------------------ // TreeNodeInfoInitIndir: Specify register requirements for address expression // of an indirection operation. // // Arguments: // indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node // void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree) { assert(indirTree->OperIsIndir()); // If this is the rhs of a block copy (i.e. non-enregisterable struct), // it has no register requirements. if (indirTree->TypeGet() == TYP_STRUCT) { return; } GenTreePtr addr = indirTree->gtGetOp1(); TreeNodeInfo* info = &(indirTree->gtLsraInfo); GenTreePtr base = nullptr; GenTreePtr index = nullptr; unsigned cns = 0; unsigned mul; bool rev; bool modifiedSources = false; bool makeContained = true; if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) { GenTreeAddrMode* lea = addr->AsAddrMode(); base = lea->Base(); index = lea->Index(); cns = lea->gtOffset; #ifdef _TARGET_ARM_ // ARM floating-point load/store doesn't support a form similar to integer // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported // form is vldr Rdst, [Rbase + imm] with a more limited constraint on the imm. if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns)) { if (indirTree->OperGet() == GT_STOREIND) { if (varTypeIsFloating(indirTree->AsStoreInd()->Data())) { makeContained = false; } } else if (indirTree->OperGet() == GT_IND) { if (varTypeIsFloating(indirTree)) { makeContained = false; } } } #endif if (makeContained) { m_lsra->clearOperandCounts(addr); addr->SetContained(); // The srcCount is decremented because addr is now "contained", // then we account for the base and index below, if they are non-null. info->srcCount--; } } else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) { // An addressing mode will be constructed that may cause some // nodes to not need a register, and cause others' lifetimes to be extended // to the GT_IND or even its parent if it's an assignment assert(base != addr); m_lsra->clearOperandCounts(addr); addr->SetContained(); // Traverse the computation below GT_IND to find the operands // for the addressing mode, marking the various constants and // intermediate results as not consuming/producing. // If the traversal were more complex, we might consider using // a traversal function, but the addressing mode is only made // up of simple arithmetic operators, and the code generator // only traverses one leg of each node. bool foundBase = (base == nullptr); bool foundIndex = (index == nullptr); GenTreePtr nextChild = nullptr; for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) { nextChild = nullptr; GenTreePtr op1 = child->gtOp.gtOp1; GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; if (op1 == base) { foundBase = true; } else if (op1 == index) { foundIndex = true; } else { m_lsra->clearOperandCounts(op1); op1->SetContained(); if (!op1->OperIsLeaf()) { nextChild = op1; } } if (op2 != nullptr) { if (op2 == base) { foundBase = true; } else if (op2 == index) { foundIndex = true; } else { m_lsra->clearOperandCounts(op2); op2->SetContained(); if (!op2->OperIsLeaf()) { assert(nextChild == nullptr); nextChild = op2; } } } } assert(foundBase && foundIndex); info->srcCount--; // it gets incremented below. } else if (addr->gtOper == GT_ARR_ELEM) { // The GT_ARR_ELEM consumes all the indices and produces the offset. // The array object lives until the mem access. // We also consume the target register to which the address is // computed info->srcCount++; assert(addr->gtLsraInfo.srcCount >= 2); addr->gtLsraInfo.srcCount -= 1; } else { // it is nothing but a plain indir info->srcCount--; // base gets added in below base = addr; } if (!makeContained) { return; } if (base != nullptr) { info->srcCount++; } if (index != nullptr && !modifiedSources) { info->srcCount++; } // On ARM we may need a single internal register // (when both conditions are true then we still only need a single internal register) if ((index != nullptr) && (cns != 0)) { // ARM does not support both Index and offset so we need an internal register info->internalIntCount = 1; } else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) { // This offset can't be contained in the ldr/str instruction, so we need an internal register info->internalIntCount = 1; } }
void CodeGen::genLoadFloat(GenTreePtr tree, regNumber reg) { if (tree->IsRegVar()) { // if it has been spilled, unspill it.% LclVarDsc * varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]; if (varDsc->lvSpilled) { UnspillFloat(varDsc); } inst_RV_RV(ins_FloatCopy(tree->TypeGet()), reg, tree->gtRegNum, tree->TypeGet()); } else { bool unalignedLoad = false; switch (tree->OperGet()) { case GT_IND: case GT_CLS_VAR: if (tree->gtFlags & GTF_IND_UNALIGNED) unalignedLoad = true; break; case GT_LCL_FLD: // Check for a misalignment on a Floating Point field // if (varTypeIsFloating(tree->TypeGet())) { if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0) { unalignedLoad = true; } } break; default: break; } if (unalignedLoad) { // Make the target addressable // regMaskTP addrReg = genMakeAddressable(tree, 0, RegSet::KEEP_REG, true); regSet.rsLockUsedReg(addrReg); // Must prevent regSet.rsGrabReg from choosing an addrReg var_types loadType = tree->TypeGet(); assert(loadType == TYP_DOUBLE || loadType == TYP_FLOAT); // Unaligned Floating-Point Loads must be loaded into integer register(s) // and then moved over to the Floating-Point register regNumber intRegLo = regSet.rsGrabReg(RBM_ALLINT); regNumber intRegHi = REG_NA; regMaskTP tmpLockMask = genRegMask(intRegLo); if (loadType == TYP_DOUBLE) { intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo)); tmpLockMask |= genRegMask(intRegHi); } regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs tree->gtType = TYP_INT; // Temporarily change the type to TYP_INT inst_RV_TT(ins_Load(TYP_INT), intRegLo, tree); regTracker.rsTrackRegTrash(intRegLo); if (loadType == TYP_DOUBLE) { inst_RV_TT(ins_Load(TYP_INT), intRegHi, tree, 4); regTracker.rsTrackRegTrash(intRegHi); } tree->gtType = loadType; // Change the type back to the floating point type regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs // move the integer register(s) over to the FP register // if (loadType == TYP_DOUBLE) getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, reg, intRegLo, intRegHi); else getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, reg, intRegLo); // Free up anything that was tied up by genMakeAddressable // regSet.rsUnlockUsedReg(addrReg); genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); } else { inst_RV_TT(ins_FloatLoad(tree->TypeGet()), reg, tree); } if (((tree->OperGet() == GT_CLS_VAR) || (tree->OperGet() == GT_IND)) && (tree->gtFlags & GTF_IND_VOLATILE)) { // Emit a memory barrier instruction after the load instGen_MemoryBarrier(); } } }
void CodeGen::genFloatAssign(GenTree *tree) { var_types type = tree->TypeGet(); GenTreePtr op1 = tree->gtGetOp1(); GenTreePtr op2 = tree->gtGetOp2(); regMaskTP needRegOp1 = RBM_ALLINT; regMaskTP addrReg = RBM_NONE; bool volat = false; // Is this a volatile store bool unaligned = false; // Is this an unaligned store regNumber op2reg = REG_NA; #ifdef DEBUGGING_SUPPORT unsigned lclVarNum = compiler->lvaCount; unsigned lclILoffs = DUMMY_INIT(0); #endif noway_assert(tree->OperGet() == GT_ASG); // Is the target a floating-point local variable? // possibly even an enregistered floating-point local variable? // switch (op1->gtOper) { unsigned varNum; LclVarDsc * varDsc; case GT_LCL_FLD: // Check for a misalignment on a Floating Point field // if (varTypeIsFloating(op1->TypeGet())) { if ((op1->gtLclFld.gtLclOffs % emitTypeSize(op1->TypeGet())) != 0) { unaligned = true; } } break; case GT_LCL_VAR: varNum = op1->gtLclVarCommon.gtLclNum; noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; #ifdef DEBUGGING_SUPPORT // For non-debuggable code, every definition of a lcl-var has // to be checked to see if we need to open a new scope for it. // Remember the local var info to call siCheckVarScope // AFTER code generation of the assignment. // if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0)) { lclVarNum = varNum; lclILoffs = op1->gtLclVar.gtLclILoffs; } #endif // Dead Store assert (with min opts we may have dead stores) // noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH)); // Does this variable live in a register? // if (genMarkLclVar(op1)) { noway_assert(!compiler->opts.compDbgCode); // We don't enregister any floats with debug codegen // Get hold of the target register // regNumber op1Reg = op1->gtRegVar.gtRegNum; // the variable being assigned should be dead in op2 assert(!varDsc->lvTracked || !VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex)); // Setup register preferencing, so that we try to target the op1 enregistered variable // regMaskTP bestMask = genRegMask(op1Reg); if (type==TYP_DOUBLE) { assert((bestMask & RBM_DBL_REGS) != 0); bestMask |= genRegMask(REG_NEXT(op1Reg)); } RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestMask); // Evaluate op2 into a floating point register // genCodeForTreeFloat(op2, &pref); noway_assert(op2->gtFlags & GTF_REG_VAL); // Make sure the value ends up in the right place ... // For example if op2 is a call that returns a result // in REG_F0, we will need to do a move instruction here // if ((op2->gtRegNum != op1Reg) || (op2->TypeGet() != type)) { regMaskTP spillRegs = regSet.rsMaskUsed & genRegMaskFloat(op1Reg, op1->TypeGet()); if (spillRegs != 0) regSet.rsSpillRegs(spillRegs); assert(type == op1->TypeGet()); inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op1Reg, op2->gtRegNum, type); } genUpdateLife(op1); goto DONE_ASG; } break; case GT_CLS_VAR: case GT_IND: // Check for a volatile/unaligned store // assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND)); // Required for GTF_IND_VOLATILE flag to be valid if (op1->gtFlags & GTF_IND_VOLATILE) volat = true; if (op1->gtFlags & GTF_IND_UNALIGNED) unaligned = true; break; default: break; } // Is the value being assigned an enregistered floating-point local variable? // switch (op2->gtOper) { case GT_LCL_VAR: if (!genMarkLclVar(op2)) break; __fallthrough; case GT_REG_VAR: // We must honor the order evalauation in case op1 reassigns our op2 register // if (tree->gtFlags & GTF_REVERSE_OPS) break; // Is there an implicit conversion that we have to insert? // Handle this case with the normal cases below. // if (type != op2->TypeGet()) break; // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); noway_assert(op2->gtFlags & GTF_REG_VAL); noway_assert(op2->IsRegVar()); op2reg = op2->gtRegVar.gtRegNum; genUpdateLife(op2); goto CHK_VOLAT_UNALIGN; default: break; } // Is the op2 (RHS) more complex than op1 (LHS)? // if (tree->gtFlags & GTF_REVERSE_OPS) { regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs); RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs); // Generate op2 (RHS) into a floating point register // genCodeForTreeFloat(op2, &pref); regSet.SetUsedRegFloat(op2, true); // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG); noway_assert(op2->gtFlags & GTF_REG_VAL); regSet.SetUsedRegFloat(op2, false); } else { needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs); // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); // Generate the RHS into any floating point register genCodeForTreeFloat(op2); } noway_assert(op2->gtFlags & GTF_REG_VAL); op2reg = op2->gtRegNum; // Is there an implicit conversion that we have to insert? // if (type != op2->TypeGet()) { regMaskTP bestMask = genRegMask(op2reg); if (type==TYP_DOUBLE) { if (bestMask & RBM_DBL_REGS) { bestMask |= genRegMask(REG_NEXT(op2reg)); } else { bestMask |= genRegMask(REG_PREV(op2reg)); } } RegSet::RegisterPreference op2Pref(RBM_ALLFLOAT, bestMask); op2reg = regSet.PickRegFloat(type, &op2Pref); inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op2reg, op2->gtRegNum, type); } // Make sure the LHS is still addressable // addrReg = genKeepAddressable(op1, addrReg); CHK_VOLAT_UNALIGN: regSet.rsLockUsedReg(addrReg); // Must prevent unaligned regSet.rsGrabReg from choosing an addrReg if (volat) { // Emit a memory barrier instruction before the store instGen_MemoryBarrier(); } if (unaligned) { var_types storeType = op1->TypeGet(); assert(storeType == TYP_DOUBLE || storeType == TYP_FLOAT); // Unaligned Floating-Point Stores must be done using the integer register(s) regNumber intRegLo = regSet.rsGrabReg(RBM_ALLINT); regNumber intRegHi = REG_NA; regMaskTP tmpLockMask = genRegMask(intRegLo); if (storeType == TYP_DOUBLE) { intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo)); tmpLockMask |= genRegMask(intRegHi); } // move the FP register over to the integer register(s) // if (storeType == TYP_DOUBLE) { getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegLo, intRegHi, op2reg); regTracker.rsTrackRegTrash(intRegHi); } else { getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intRegLo, op2reg); } regTracker.rsTrackRegTrash(intRegLo); regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs op1->gtType = TYP_INT; // Temporarily change the type to TYP_INT inst_TT_RV(ins_Store(TYP_INT), op1, intRegLo); if (storeType == TYP_DOUBLE) { inst_TT_RV(ins_Store(TYP_INT), op1, intRegHi, 4); } op1->gtType = storeType; // Change the type back to the floating point type regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs } else { // Move the value into the target // inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2reg); } // Free up anything that was tied up by the LHS // regSet.rsUnlockUsedReg(addrReg); genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); DONE_ASG: genUpdateLife(tree); #ifdef DEBUGGING_SUPPORT /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. */ if (lclVarNum < compiler->lvaCount) siCheckVarScope(lclVarNum, lclILoffs); #endif }
//------------------------------------------------------------------------ // genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = getEmitter(); int ival = -1; if ((op1 != nullptr) && !op1->OperIsList()) { op1Reg = op1->gtRegNum; genConsumeOperands(node); } switch (intrinsicID) { // All integer overloads are handled by table codegen case NI_SSE2_CompareLessThan: { assert(op1 != nullptr); assert(op2 != nullptr); assert(baseType == TYP_DOUBLE); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); op2Reg = op2->gtRegNum; ival = Compiler::ivalOfHWIntrinsic(intrinsicID); assert(ival != -1); emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival); break; } case NI_SSE2_CompareEqualOrderedScalar: case NI_SSE2_CompareEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE2_CompareGreaterThanOrderedScalar: case NI_SSE2_CompareGreaterThanUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareGreaterThanOrEqualOrderedScalar: case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareLessThanOrderedScalar: case NI_SSE2_CompareLessThanUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareLessThanOrEqualOrderedScalar: case NI_SSE2_CompareLessThanOrEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareNotEqualOrderedScalar: case NI_SSE2_CompareNotEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE2_ConvertScalarToVector128Double: case NI_SSE2_ConvertScalarToVector128Single: { assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE); assert(op1 != nullptr); assert(op2 != nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); genHWIntrinsic_R_R_RM(node, ins); break; } case NI_SSE2_ConvertScalarToVector128Int64: case NI_SSE2_ConvertScalarToVector128UInt64: { assert(baseType == TYP_LONG || baseType == TYP_ULONG); assert(op1 != nullptr); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); // TODO-XArch-CQ -> use of type size of TYP_SIMD16 leads to // instruction register encoding errors for SSE legacy encoding emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg); break; } case NI_SSE2_ConvertToDouble: { assert(op2 == nullptr); if (op1Reg != targetReg) { instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg); } break; } case NI_SSE2_ConvertToInt32: case NI_SSE2_ConvertToInt64: case NI_SSE2_ConvertToUInt32: case NI_SSE2_ConvertToUInt64: { assert(op2 == nullptr); assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT) { emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg); } else { emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg); } break; } case NI_SSE2_LoadFence: { assert(baseType == TYP_VOID); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns(INS_lfence); break; } case NI_SSE2_MemoryFence: { assert(baseType == TYP_VOID); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns(INS_mfence); break; } case NI_SSE2_MoveMask: { assert(op2 == nullptr); assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg); break; } case NI_SSE2_SetScalarVector128: { assert(baseType == TYP_DOUBLE); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); if (op1Reg == targetReg) { regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(INS_movapd, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg); op1Reg = tmpReg; } emit->emitIns_SIMD_R_R_R(INS_xorpd, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg); break; } case NI_SSE2_SetZeroVector128: { assert(baseType != TYP_FLOAT); assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE); assert(op1 == nullptr); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); break; } default: unreached(); break; } genProduceReg(node); }
//------------------------------------------------------------------------ // genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); GenTree* op3 = nullptr; GenTree* op4 = nullptr; regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; regNumber op3Reg = REG_NA; regNumber op4Reg = REG_NA; emitter* emit = getEmitter(); if ((op1 != nullptr) && !op1->OperIsList()) { op1Reg = op1->gtRegNum; genConsumeOperands(node); } switch (intrinsicID) { case NI_SSE_CompareEqualOrderedScalar: case NI_SSE_CompareEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE_CompareGreaterThanOrderedScalar: case NI_SSE_CompareGreaterThanUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE_CompareGreaterThanOrEqualOrderedScalar: case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE_CompareLessThanOrderedScalar: case NI_SSE_CompareLessThanUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE_CompareLessThanOrEqualOrderedScalar: case NI_SSE_CompareLessThanOrEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE_CompareNotEqualOrderedScalar: case NI_SSE_CompareNotEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE_ConvertToSingle: case NI_SSE_StaticCast: { assert(op2 == nullptr); if (op1Reg != targetReg) { instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg); } break; } case NI_SSE_MoveMask: { assert(baseType == TYP_FLOAT); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg); break; } case NI_SSE_Prefetch0: case NI_SSE_Prefetch1: case NI_SSE_Prefetch2: case NI_SSE_PrefetchNonTemporal: { assert(baseType == TYP_UBYTE); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0); break; } case NI_SSE_SetScalarVector128: { assert(baseType == TYP_FLOAT); assert(op2 == nullptr); if (op1Reg == targetReg) { regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(INS_movaps, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg); op1Reg = tmpReg; } emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); emit->emitIns_SIMD_R_R_R(INS_movss, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg); break; } case NI_SSE_SetZeroVector128: { assert(baseType == TYP_FLOAT); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); break; } case NI_SSE_StoreFence: { assert(baseType == TYP_VOID); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns(INS_sfence); break; } default: unreached(); break; } genProduceReg(node); }
//------------------------------------------------------------------------ // genHWIntrinsic: Generates the code for a given hardware intrinsic node. // // Arguments: // node - The hardware intrinsic node // void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID); HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); int ival = Compiler::ivalOfHWIntrinsic(intrinsicID); int numArgs = Compiler::numArgsOfHWIntrinsic(node); assert((flags & HW_Flag_NoCodeGen) == 0); if (genIsTableDrivenHWIntrinsic(category, flags)) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = getEmitter(); assert(numArgs >= 0); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); assert(ins != INS_invalid); emitAttr simdSize = EA_ATTR(node->gtSIMDSize); assert(simdSize != 0); switch (numArgs) { case 1: genConsumeOperands(node); op1Reg = op1->gtRegNum; if (category == HW_Category_MemoryLoad) { emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0); } else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0) { emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg); } else if ((ival != -1) && varTypeIsFloating(baseType)) { emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival); } else { emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg); } break; case 2: genConsumeOperands(node); op1Reg = op1->gtRegNum; op2Reg = op2->gtRegNum; if (category == HW_Category_MemoryStore) { emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0); } else if ((ival != -1) && varTypeIsFloating(baseType)) { genHWIntrinsic_R_R_RM_I(node, ins); } else if (category == HW_Category_MemoryLoad) { emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg); } else if (Compiler::isImmHWIntrinsic(intrinsicID, op2)) { if (intrinsicID == NI_SSE2_Extract) { // extract instructions return to GP-registers, so it needs int size as the emitsize simdSize = emitTypeSize(TYP_INT); } auto emitSwCase = [&](unsigned i) { emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i); }; if (op2->IsCnsIntOrI()) { ssize_t ival = op2->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase); } } else { genHWIntrinsic_R_R_RM(node, ins); } break; case 3: { assert(op1->OperIsList()); assert(op1->gtGetOp2()->OperIsList()); assert(op1->gtGetOp2()->gtGetOp2()->OperIsList()); GenTreeArgList* argList = op1->AsArgList(); op1 = argList->Current(); genConsumeRegs(op1); op1Reg = op1->gtRegNum; argList = argList->Rest(); op2 = argList->Current(); genConsumeRegs(op2); op2Reg = op2->gtRegNum; argList = argList->Rest(); GenTree* op3 = argList->Current(); genConsumeRegs(op3); regNumber op3Reg = op3->gtRegNum; if (Compiler::isImmHWIntrinsic(intrinsicID, op3)) { auto emitSwCase = [&](unsigned i) { emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i); }; if (op3->IsCnsIntOrI()) { ssize_t ival = op3->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase); } } else if (category == HW_Category_MemoryStore) { assert(intrinsicID == NI_SSE2_MaskMove); assert(targetReg == REG_NA); // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI if (op3Reg != REG_EDI) { emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg); } emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); } else { emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg); } break; } default: unreached(); break; } genProduceReg(node); return; } switch (isa) { case InstructionSet_SSE: genSSEIntrinsic(node); break; case InstructionSet_SSE2: genSSE2Intrinsic(node); break; case InstructionSet_SSE41: genSSE41Intrinsic(node); break; case InstructionSet_SSE42: genSSE42Intrinsic(node); break; case InstructionSet_AVX: genAVXIntrinsic(node); break; case InstructionSet_AVX2: genAVX2Intrinsic(node); break; case InstructionSet_AES: genAESIntrinsic(node); break; case InstructionSet_BMI1: genBMI1Intrinsic(node); break; case InstructionSet_BMI2: genBMI2Intrinsic(node); break; case InstructionSet_FMA: genFMAIntrinsic(node); break; case InstructionSet_LZCNT: genLZCNTIntrinsic(node); break; case InstructionSet_PCLMULQDQ: genPCLMULQDQIntrinsic(node); break; case InstructionSet_POPCNT: genPOPCNTIntrinsic(node); break; default: unreached(); break; } }
//------------------------------------------------------------------------ // genSSE41Intrinsic: Generates the code for an SSE4.1 hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); GenTree* op3 = nullptr; GenTree* op4 = nullptr; regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; regNumber op3Reg = REG_NA; regNumber op4Reg = REG_NA; emitter* emit = getEmitter(); if ((op1 != nullptr) && !op1->OperIsList()) { op1Reg = op1->gtRegNum; genConsumeOperands(node); } switch (intrinsicID) { case NI_SSE41_TestAllOnes: { regNumber tmpReg = node->GetSingleTempReg(); assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg); emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg); emit->emitIns_R(INS_setb, EA_1BYTE, targetReg); break; } case NI_SSE41_TestAllZeros: case NI_SSE41_TestZ: { assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); emit->emitIns_R(INS_sete, EA_1BYTE, targetReg); break; } case NI_SSE41_TestC: { assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); emit->emitIns_R(INS_setb, EA_1BYTE, targetReg); break; } case NI_SSE41_TestMixOnesZeros: case NI_SSE41_TestNotZAndNotC: { assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); break; } case NI_SSE41_Extract: { regNumber tmpTargetReg = REG_NA; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); if (baseType == TYP_FLOAT) { tmpTargetReg = node->ExtractTempReg(); } auto emitSwCase = [&](unsigned i) { if (baseType == TYP_FLOAT) { // extract instructions return to GP-registers, so it needs int size as the emitsize emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), op1Reg, tmpTargetReg, (int)i); emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg); } else { emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), targetReg, op1Reg, (int)i); } }; if (op2->IsCnsIntOrI()) { ssize_t ival = op2->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op2->gtRegNum, baseReg, offsReg, emitSwCase); } break; } default: unreached(); break; } genProduceReg(node); }