//---------------------------------------------------------------------------------------------- // ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node. // // Arguments: // node - The hardware intrinsic node. // void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTreeArgList* argList = nullptr; GenTree* op1 = node->gtOp.gtOp1; GenTree* op2 = node->gtOp.gtOp2; if (op1->OperIs(GT_LIST)) { argList = op1->AsArgList(); op1 = argList->Current(); op2 = argList->Rest()->Current(); } switch (HWIntrinsicInfo::lookup(node->gtHWIntrinsicId).form) { case HWIntrinsicInfo::SimdExtractOp: if (op2->IsCnsIntOrI()) { MakeSrcContained(node, op2); } break; case HWIntrinsicInfo::SimdInsertOp: if (op2->IsCnsIntOrI()) { MakeSrcContained(node, op2); GenTree* op3 = argList->Rest()->Rest()->Current(); // In the HW intrinsics C# API there is no direct way to specify a vector element to element mov // VX[a] = VY[b] // In C# this would naturally be expressed by // Insert(VX, a, Extract(VY, b)) // If both a & b are immediate constants contain the extract/getItem so that we can emit // the single instruction mov Vx[a], Vy[b] if (op3->OperIs(GT_HWIntrinsic) && (op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem)) { ContainCheckHWIntrinsic(op3->AsHWIntrinsic()); if (op3->gtOp.gtOp2->isContained()) { MakeSrcContained(node, op3); } } } break; default: break; } }
//------------------------------------------------------------------------ // lookupNumArgs: gets the number of arguments for the hardware intrinsic. // This attempts to do a table based lookup but will fallback to the number // of operands in 'node' if the table entry is -1. // // Arguments: // node -- GenTreeHWIntrinsic* node with nullptr default value // // Return Value: // number of arguments // int HWIntrinsicInfo::lookupNumArgs(const GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsic = node->gtHWIntrinsicId; assert(intrinsic != NI_Illegal); assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END); GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); int numArgs = 0; if (op1 == nullptr) { return 0; } if (op1->OperIsList()) { numArgs = 0; GenTreeArgList* list = op1->AsArgList(); while (list != nullptr) { numArgs++; list = list->Rest(); } // We should only use a list if we have 3 operands. assert(numArgs >= 3); return numArgs; } if (op2 == nullptr) { return 1; } return 2; }
/***************************************************************************** * gsMarkPtrsAndAssignGroups * Walk a tree looking for assignment groups, variables whose value is used * in a *p store or use, and variable passed to calls. This info is then used * to determine parameters which are vulnerable. * This function carries a state to know if it is under an assign node, call node * or indirection node. It starts a new tree walk for it's subtrees when the state * changes. */ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr *pTree, fgWalkData *data) { struct MarkPtrsInfo *pState= (MarkPtrsInfo *)data->pCallbackData; struct MarkPtrsInfo newState = *pState; Compiler *comp = data->compiler; GenTreePtr tree = *pTree; ShadowParamVarInfo *shadowVarInfo = pState->comp->gsShadowVarInfo; assert(shadowVarInfo); bool fIsBlk = false; unsigned lclNum; assert(!pState->isAssignSrc || pState->lvAssignDef != (unsigned)-1); if (pState->skipNextNode) { pState->skipNextNode = false; return WALK_CONTINUE; } switch (tree->OperGet()) { // Indirections - look for *p uses and defs case GT_INITBLK: case GT_COPYOBJ: case GT_COPYBLK: fIsBlk = true; // fallthrough case GT_IND: case GT_LDOBJ: case GT_ARR_ELEM: case GT_ARR_INDEX: case GT_ARR_OFFSET: case GT_FIELD: newState.isUnderIndir = true; { if (fIsBlk) { // Blk nodes have implicit indirections. comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void *)&newState); if (tree->OperGet() == GT_INITBLK) { newState.isUnderIndir = false; } comp->fgWalkTreePre(&tree->gtOp.gtOp2, comp->gsMarkPtrsAndAssignGroups, (void *)&newState); } else { newState.skipNextNode = true; // Don't have to worry about which kind of node we're dealing with comp->fgWalkTreePre(&tree, comp->gsMarkPtrsAndAssignGroups, (void *)&newState); } } return WALK_SKIP_SUBTREES; // local vars and param uses case GT_LCL_VAR: case GT_LCL_FLD: lclNum = tree->gtLclVarCommon.gtLclNum; if (pState->isUnderIndir) { // The variable is being dereferenced for a read or a write. comp->lvaTable[lclNum].lvIsPtr = 1; } if (pState->isAssignSrc) { // // Add lvAssignDef and lclNum to a common assign group if (shadowVarInfo[pState->lvAssignDef].assignGroup) { if (shadowVarInfo[lclNum].assignGroup) { // OR both bit vector shadowVarInfo[pState->lvAssignDef].assignGroup->bitVectOr(shadowVarInfo[lclNum].assignGroup); } else { shadowVarInfo[pState->lvAssignDef].assignGroup->bitVectSet(lclNum); } // Point both to the same bit vector shadowVarInfo[lclNum].assignGroup = shadowVarInfo[pState->lvAssignDef].assignGroup; } else if (shadowVarInfo[lclNum].assignGroup) { shadowVarInfo[lclNum].assignGroup->bitVectSet(pState->lvAssignDef); // Point both to the same bit vector shadowVarInfo[pState->lvAssignDef].assignGroup = shadowVarInfo[lclNum].assignGroup; } else { FixedBitVect *bv = FixedBitVect::bitVectInit(pState->comp->lvaCount, pState->comp); // (shadowVarInfo[pState->lvAssignDef] == NULL && shadowVarInfo[lclNew] == NULL); // Neither of them has an assign group yet. Make a new one. shadowVarInfo[pState->lvAssignDef].assignGroup = bv; shadowVarInfo[lclNum].assignGroup = bv; bv->bitVectSet(pState->lvAssignDef); bv->bitVectSet(lclNum); } } return WALK_CONTINUE; // Calls - Mark arg variables case GT_CALL: newState.isUnderIndir = false; newState.isAssignSrc = false; { if (tree->gtCall.gtCallObjp) { newState.isUnderIndir = true; comp->fgWalkTreePre(&tree->gtCall.gtCallObjp, gsMarkPtrsAndAssignGroups, (void *)&newState); } for (GenTreeArgList* args = tree->gtCall.gtCallArgs; args; args = args->Rest()) { comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void *)&newState); } for (GenTreeArgList* args = tree->gtCall.gtCallLateArgs; args; args = args->Rest()) { comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void *)&newState); } if (tree->gtCall.gtCallType == CT_INDIRECT) { newState.isUnderIndir = true; // A function pointer is treated like a write-through pointer since // it controls what code gets executed, and so indirectly can cause // a write to memory. comp->fgWalkTreePre(&tree->gtCall.gtCallAddr, gsMarkPtrsAndAssignGroups, (void *)&newState); } } return WALK_SKIP_SUBTREES; case GT_ADDR: newState.isUnderIndir = false; // We'll assume p in "**p = " can be vulnerable because by changing 'p', someone // could control where **p stores to. { comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void *)&newState); } return WALK_SKIP_SUBTREES; default: // Assignments - track assign groups and *p defs. if (tree->OperIsAssignment()) { bool isLocVar; bool isLocFld; // Walk dst side comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void *)&newState); // Now handle src side isLocVar = tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR; isLocFld = tree->gtOp.gtOp1->OperGet() == GT_LCL_FLD; if ((isLocVar || isLocFld) && tree->gtOp.gtOp2) { lclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum; newState.lvAssignDef = lclNum; newState.isAssignSrc = true; } comp->fgWalkTreePre(&tree->gtOp.gtOp2, comp->gsMarkPtrsAndAssignGroups, (void *)&newState); return WALK_SKIP_SUBTREES; } } return WALK_CONTINUE; }
bool RangeCheck::IsMonotonicallyIncreasing(GenTreePtr expr, SearchPath* path) { JITDUMP("[RangeCheck::IsMonotonicallyIncreasing] %p\n", dspPtr(expr)); if (path->Lookup(expr)) { return true; } // Add hashtable entry for expr. path->Set(expr, NULL); // Remove hashtable entry for expr when we exit the present scope. auto code = [&] { path->Remove(expr); }; jitstd::utility::scoped_code<decltype(code)> finally(code); // If the rhs expr is constant, then it is not part of the dependency // loop which has to increase monotonically. ValueNum vn = expr->gtVNPair.GetConservative(); if (m_pCompiler->vnStore->IsVNConstant(vn)) { return true; } // If the rhs expr is local, then try to find the def of the local. else if (expr->IsLocal()) { Location* loc = GetDef(expr); if (loc == nullptr) { return false; } GenTreePtr asg = loc->parent; assert(asg->OperKind() & GTK_ASGOP); switch (asg->OperGet()) { case GT_ASG: return IsMonotonicallyIncreasing(asg->gtGetOp2(), path); case GT_ASG_ADD: return IsBinOpMonotonicallyIncreasing(asg->gtGetOp1(), asg->gtGetOp2(), GT_ADD, path); } JITDUMP("Unknown local definition type\n"); return false; } else if (expr->OperGet() == GT_ADD) { return IsBinOpMonotonicallyIncreasing(expr->gtGetOp1(), expr->gtGetOp2(), GT_ADD, path); } else if (expr->OperGet() == GT_PHI) { for (GenTreeArgList* args = expr->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest()) { // If the arg is already in the path, skip. if (path->Lookup(args->Current())) { continue; } if (!IsMonotonicallyIncreasing(args->Current(), path)) { JITDUMP("Phi argument not monotonic\n"); return false; } } return true; } JITDUMP("Unknown tree type\n"); return false; }
// The parameter rejectNegativeConst is true when we are adding two local vars (see above) bool RangeCheck::IsMonotonicallyIncreasing(GenTree* expr, bool rejectNegativeConst) { JITDUMP("[RangeCheck::IsMonotonicallyIncreasing] [%06d]\n", Compiler::dspTreeID(expr)); // Add hashtable entry for expr. bool alreadyPresent = !m_pSearchPath->Set(expr, nullptr, SearchPath::Overwrite); if (alreadyPresent) { return true; } // Remove hashtable entry for expr when we exit the present scope. auto code = [this, expr] { m_pSearchPath->Remove(expr); }; jitstd::utility::scoped_code<decltype(code)> finally(code); if (m_pSearchPath->GetCount() > MAX_SEARCH_DEPTH) { return false; } // If expr is constant, then it is not part of the dependency // loop which has to increase monotonically. ValueNum vn = expr->gtVNPair.GetConservative(); if (m_pCompiler->vnStore->IsVNInt32Constant(vn)) { if (rejectNegativeConst) { int cons = m_pCompiler->vnStore->ConstantValue<int>(vn); return (cons >= 0); } else { return true; } } // If the rhs expr is local, then try to find the def of the local. else if (expr->IsLocal()) { BasicBlock* asgBlock; GenTreeOp* asg = GetSsaDefAsg(expr->AsLclVarCommon(), &asgBlock); return (asg != nullptr) && IsMonotonicallyIncreasing(asg->gtGetOp2(), rejectNegativeConst); } else if (expr->OperGet() == GT_ADD) { return IsBinOpMonotonicallyIncreasing(expr->AsOp()); } else if (expr->OperGet() == GT_PHI) { for (GenTreeArgList* args = expr->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest()) { // If the arg is already in the path, skip. if (m_pSearchPath->Lookup(args->Current())) { continue; } if (!IsMonotonicallyIncreasing(args->Current(), rejectNegativeConst)) { JITDUMP("Phi argument not monotonic\n"); return false; } } return true; } JITDUMP("Unknown tree type\n"); return false; }
Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parentStack) { assert(useEdge != nullptr); GenTree* node = *useEdge; assert(node != nullptr); #ifdef DEBUG const bool isLateArg = (node->gtFlags & GTF_LATE_ARG) != 0; #endif // First, remove any preceeding list nodes, which are not otherwise visited by the tree walk. // // NOTE: GT_FIELD_LIST head nodes, and GT_LIST nodes used by phi nodes will in fact be visited. for (GenTree* prev = node->gtPrev; prev != nullptr && prev->OperIsAnyList() && !(prev->OperIsFieldListHead()); prev = node->gtPrev) { BlockRange().Remove(prev); } // In addition, remove the current node if it is a GT_LIST node that is not an aggregate. if (node->OperIsAnyList()) { GenTreeArgList* list = node->AsArgList(); if (!list->OperIsFieldListHead()) { BlockRange().Remove(list); } return Compiler::WALK_CONTINUE; } LIR::Use use; if (parentStack.Height() < 2) { use = LIR::Use::GetDummyUse(BlockRange(), *useEdge); } else { use = LIR::Use(BlockRange(), useEdge, parentStack.Index(1)); } assert(node == use.Def()); switch (node->OperGet()) { case GT_ASG: RewriteAssignment(use); break; case GT_BOX: // GT_BOX at this level just passes through so get rid of it use.ReplaceWith(comp, node->gtGetOp1()); BlockRange().Remove(node); break; case GT_ADDR: RewriteAddress(use); break; case GT_IND: // Clear the `GTF_IND_ASG_LHS` flag, which overlaps with `GTF_IND_REQ_ADDR_IN_REG`. node->gtFlags &= ~GTF_IND_ASG_LHS; if (varTypeIsSIMD(node)) { RewriteSIMDOperand(use, false); } else { // Due to promotion of structs containing fields of type struct with a // single scalar type field, we could potentially see IR nodes of the // form GT_IND(GT_ADD(lclvarAddr, 0)) where 0 is an offset representing // a field-seq. These get folded here. // // TODO: This code can be removed once JIT implements recursive struct // promotion instead of lying about the type of struct field as the type // of its single scalar field. GenTree* addr = node->AsIndir()->Addr(); if (addr->OperGet() == GT_ADD && addr->gtGetOp1()->OperGet() == GT_LCL_VAR_ADDR && addr->gtGetOp2()->IsIntegralConst(0)) { GenTreeLclVarCommon* lclVarNode = addr->gtGetOp1()->AsLclVarCommon(); unsigned lclNum = lclVarNode->GetLclNum(); LclVarDsc* varDsc = comp->lvaTable + lclNum; if (node->TypeGet() == varDsc->TypeGet()) { JITDUMP("Rewriting GT_IND(GT_ADD(LCL_VAR_ADDR,0)) to LCL_VAR\n"); lclVarNode->SetOper(GT_LCL_VAR); lclVarNode->gtType = node->TypeGet(); use.ReplaceWith(comp, lclVarNode); BlockRange().Remove(addr); BlockRange().Remove(addr->gtGetOp2()); BlockRange().Remove(node); } } } break; case GT_NOP: // fgMorph sometimes inserts NOP nodes between defs and uses // supposedly 'to prevent constant folding'. In this case, remove the // NOP. if (node->gtGetOp1() != nullptr) { use.ReplaceWith(comp, node->gtGetOp1()); BlockRange().Remove(node); } break; case GT_COMMA: { GenTree* op1 = node->gtGetOp1(); if ((op1->gtFlags & GTF_ALL_EFFECT) == 0) { // The LHS has no side effects. Remove it. bool isClosed = false; unsigned sideEffects = 0; LIR::ReadOnlyRange lhsRange = BlockRange().GetTreeRange(op1, &isClosed, &sideEffects); // None of the transforms performed herein violate tree order, so these // should always be true. assert(isClosed); assert((sideEffects & GTF_ALL_EFFECT) == 0); BlockRange().Delete(comp, m_block, std::move(lhsRange)); } GenTree* replacement = node->gtGetOp2(); if (!use.IsDummyUse()) { use.ReplaceWith(comp, replacement); } else { // This is a top-level comma. If the RHS has no side effects we can remove // it as well. if ((replacement->gtFlags & GTF_ALL_EFFECT) == 0) { bool isClosed = false; unsigned sideEffects = 0; LIR::ReadOnlyRange rhsRange = BlockRange().GetTreeRange(replacement, &isClosed, &sideEffects); // None of the transforms performed herein violate tree order, so these // should always be true. assert(isClosed); assert((sideEffects & GTF_ALL_EFFECT) == 0); BlockRange().Delete(comp, m_block, std::move(rhsRange)); } } BlockRange().Remove(node); } break; case GT_ARGPLACE: // Remove argplace and list nodes from the execution order. // // TODO: remove phi args and phi nodes as well? BlockRange().Remove(node); break; #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM_) case GT_CLS_VAR: { // Class vars that are the target of an assignment will get rewritten into // GT_STOREIND(GT_CLS_VAR_ADDR, val) by RewriteAssignment. This check is // not strictly necessary--the GT_IND(GT_CLS_VAR_ADDR) pattern that would // otherwise be generated would also be picked up by RewriteAssignment--but // skipping the rewrite here saves an allocation and a bit of extra work. const bool isLHSOfAssignment = (use.User()->OperGet() == GT_ASG) && (use.User()->gtGetOp1() == node); if (!isLHSOfAssignment) { GenTree* ind = comp->gtNewOperNode(GT_IND, node->TypeGet(), node); node->SetOper(GT_CLS_VAR_ADDR); node->gtType = TYP_BYREF; BlockRange().InsertAfter(node, ind); use.ReplaceWith(comp, ind); // TODO: JIT dump } } break; #endif // _TARGET_XARCH_ case GT_INTRINSIC: // Non-target intrinsics should have already been rewritten back into user calls. assert(Compiler::IsTargetIntrinsic(node->gtIntrinsic.gtIntrinsicId)); break; #ifdef FEATURE_SIMD case GT_BLK: case GT_OBJ: { // TODO-1stClassStructs: These should have been transformed to GT_INDs, but in order // to preserve existing behavior, we will keep this as a block node if this is the // lhs of a block assignment, and either: // - It is a "generic" TYP_STRUCT assignment, OR // - It is an initblk, OR // - Neither the lhs or rhs are known to be of SIMD type. GenTree* parent = use.User(); bool keepBlk = false; if ((parent->OperGet() == GT_ASG) && (node == parent->gtGetOp1())) { if ((node->TypeGet() == TYP_STRUCT) || parent->OperIsInitBlkOp()) { keepBlk = true; } else if (!comp->isAddrOfSIMDType(node->AsBlk()->Addr())) { GenTree* dataSrc = parent->gtGetOp2(); if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD)) { noway_assert(dataSrc->OperIsIndir()); keepBlk = !comp->isAddrOfSIMDType(dataSrc->AsIndir()->Addr()); } } } RewriteSIMDOperand(use, keepBlk); } break; case GT_LCL_FLD: case GT_STORE_LCL_FLD: // TODO-1stClassStructs: Eliminate this. FixupIfSIMDLocal(node->AsLclVarCommon()); break; case GT_SIMD: { noway_assert(comp->featureSIMD); GenTreeSIMD* simdNode = node->AsSIMD(); unsigned simdSize = simdNode->gtSIMDSize; var_types simdType = comp->getSIMDTypeForSize(simdSize); // TODO-1stClassStructs: This should be handled more generally for enregistered or promoted // structs that are passed or returned in a different register type than their enregistered // type(s). if (simdNode->gtType == TYP_I_IMPL && simdNode->gtSIMDSize == TARGET_POINTER_SIZE) { // This happens when it is consumed by a GT_RET_EXPR. // It can only be a Vector2f or Vector2i. assert(genTypeSize(simdNode->gtSIMDBaseType) == 4); simdNode->gtType = TYP_SIMD8; } // Certain SIMD trees require rationalizing. if (simdNode->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicInitArray) { // Rewrite this as an explicit load. JITDUMP("Rewriting GT_SIMD array init as an explicit load:\n"); unsigned int baseTypeSize = genTypeSize(simdNode->gtSIMDBaseType); GenTree* address = new (comp, GT_LEA) GenTreeAddrMode(TYP_BYREF, simdNode->gtOp1, simdNode->gtOp2, baseTypeSize, offsetof(CORINFO_Array, u1Elems)); GenTree* ind = comp->gtNewOperNode(GT_IND, simdType, address); BlockRange().InsertBefore(simdNode, address, ind); use.ReplaceWith(comp, ind); BlockRange().Remove(simdNode); DISPTREERANGE(BlockRange(), use.Def()); JITDUMP("\n"); } else { // This code depends on the fact that NONE of the SIMD intrinsics take vector operands // of a different width. If that assumption changes, we will EITHER have to make these type // transformations during importation, and plumb the types all the way through the JIT, // OR add a lot of special handling here. GenTree* op1 = simdNode->gtGetOp1(); if (op1 != nullptr && op1->gtType == TYP_STRUCT) { op1->gtType = simdType; } GenTree* op2 = simdNode->gtGetOp2IfPresent(); if (op2 != nullptr && op2->gtType == TYP_STRUCT) { op2->gtType = simdType; } } } break; #endif // FEATURE_SIMD default: // JCC nodes should not be present in HIR. assert(node->OperGet() != GT_JCC); break; } // Do some extra processing on top-level nodes to remove unused local reads. if (node->OperIsLocalRead()) { if (use.IsDummyUse()) { comp->lvaDecRefCnts(node); BlockRange().Remove(node); } else { // Local reads are side-effect-free; clear any flags leftover from frontend transformations. node->gtFlags &= ~GTF_ALL_EFFECT; } } assert(isLateArg == ((use.Def()->gtFlags & GTF_LATE_ARG) != 0)); return Compiler::WALK_CONTINUE; }
//------------------------------------------------------------------------ // BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree. // // Arguments: // tree - The GT_HWIntrinsic node of interest // // Return Value: // The number of sources consumed by this node. // int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; int numArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicTree); GenTree* op1 = intrinsicTree->gtGetOp1(); GenTree* op2 = intrinsicTree->gtGetOp2(); GenTree* op3 = nullptr; int srcCount = 0; if ((op1 != nullptr) && op1->OperIsList()) { // op2 must be null, and there must be at least two more arguments. assert(op2 == nullptr); noway_assert(op1->AsArgList()->Rest() != nullptr); noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr); assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr); op2 = op1->AsArgList()->Rest()->Current(); op3 = op1->AsArgList()->Rest()->Rest()->Current(); op1 = op1->AsArgList()->Current(); } int dstCount = intrinsicTree->IsValue() ? 1 : 0; bool op2IsDelayFree = false; bool op3IsDelayFree = false; // Create internal temps, and handle any other special requirements. switch (HWIntrinsicInfo::lookup(intrinsicID).form) { case HWIntrinsicInfo::Sha1HashOp: assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); if (!op2->isContained()) { assert(!op3->isContained()); op2IsDelayFree = true; op3IsDelayFree = true; setInternalRegsDelayFree = true; } buildInternalFloatRegisterDefForNode(intrinsicTree); break; case HWIntrinsicInfo::SimdTernaryRMWOp: assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); if (!op2->isContained()) { assert(!op3->isContained()); op2IsDelayFree = true; op3IsDelayFree = true; } break; case HWIntrinsicInfo::Sha1RotateOp: buildInternalFloatRegisterDefForNode(intrinsicTree); break; case HWIntrinsicInfo::SimdExtractOp: case HWIntrinsicInfo::SimdInsertOp: if (!op2->isContained()) { // We need a temp to create a switch table buildInternalIntRegisterDefForNode(intrinsicTree); } break; default: break; } // Next, build uses if (numArgs > 3) { srcCount = 0; assert(!op2IsDelayFree && !op3IsDelayFree); assert(op1->OperIs(GT_LIST)); { for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest()) { srcCount += BuildOperandUses(list->Current()); } } assert(srcCount == numArgs); } else { if (op1 != nullptr) { srcCount += BuildOperandUses(op1); if (op2 != nullptr) { srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2); if (op3 != nullptr) { srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); } } } } buildInternalRegisterUses(); // Now defs if (intrinsicTree->IsValue()) { BuildDef(intrinsicTree); } return srcCount; }
//------------------------------------------------------------------------ // genHWIntrinsic: Generates the code for a given hardware intrinsic node. // // Arguments: // node - The hardware intrinsic node // void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID); HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); int ival = Compiler::ivalOfHWIntrinsic(intrinsicID); int numArgs = Compiler::numArgsOfHWIntrinsic(node); assert((flags & HW_Flag_NoCodeGen) == 0); if (genIsTableDrivenHWIntrinsic(category, flags)) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = getEmitter(); assert(numArgs >= 0); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); assert(ins != INS_invalid); emitAttr simdSize = EA_ATTR(node->gtSIMDSize); assert(simdSize != 0); switch (numArgs) { case 1: genConsumeOperands(node); op1Reg = op1->gtRegNum; if (category == HW_Category_MemoryLoad) { emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0); } else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0) { emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg); } else if ((ival != -1) && varTypeIsFloating(baseType)) { emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival); } else { emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg); } break; case 2: genConsumeOperands(node); op1Reg = op1->gtRegNum; op2Reg = op2->gtRegNum; if (category == HW_Category_MemoryStore) { emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0); } else if ((ival != -1) && varTypeIsFloating(baseType)) { genHWIntrinsic_R_R_RM_I(node, ins); } else if (category == HW_Category_MemoryLoad) { emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg); } else if (Compiler::isImmHWIntrinsic(intrinsicID, op2)) { if (intrinsicID == NI_SSE2_Extract) { // extract instructions return to GP-registers, so it needs int size as the emitsize simdSize = emitTypeSize(TYP_INT); } auto emitSwCase = [&](unsigned i) { emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i); }; if (op2->IsCnsIntOrI()) { ssize_t ival = op2->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase); } } else { genHWIntrinsic_R_R_RM(node, ins); } break; case 3: { assert(op1->OperIsList()); assert(op1->gtGetOp2()->OperIsList()); assert(op1->gtGetOp2()->gtGetOp2()->OperIsList()); GenTreeArgList* argList = op1->AsArgList(); op1 = argList->Current(); genConsumeRegs(op1); op1Reg = op1->gtRegNum; argList = argList->Rest(); op2 = argList->Current(); genConsumeRegs(op2); op2Reg = op2->gtRegNum; argList = argList->Rest(); GenTree* op3 = argList->Current(); genConsumeRegs(op3); regNumber op3Reg = op3->gtRegNum; if (Compiler::isImmHWIntrinsic(intrinsicID, op3)) { auto emitSwCase = [&](unsigned i) { emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i); }; if (op3->IsCnsIntOrI()) { ssize_t ival = op3->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase); } } else if (category == HW_Category_MemoryStore) { assert(intrinsicID == NI_SSE2_MaskMove); assert(targetReg == REG_NA); // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI if (op3Reg != REG_EDI) { emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg); } emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); } else { emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg); } break; } default: unreached(); break; } genProduceReg(node); return; } switch (isa) { case InstructionSet_SSE: genSSEIntrinsic(node); break; case InstructionSet_SSE2: genSSE2Intrinsic(node); break; case InstructionSet_SSE41: genSSE41Intrinsic(node); break; case InstructionSet_SSE42: genSSE42Intrinsic(node); break; case InstructionSet_AVX: genAVXIntrinsic(node); break; case InstructionSet_AVX2: genAVX2Intrinsic(node); break; case InstructionSet_AES: genAESIntrinsic(node); break; case InstructionSet_BMI1: genBMI1Intrinsic(node); break; case InstructionSet_BMI2: genBMI2Intrinsic(node); break; case InstructionSet_FMA: genFMAIntrinsic(node); break; case InstructionSet_LZCNT: genLZCNTIntrinsic(node); break; case InstructionSet_PCLMULQDQ: genPCLMULQDQIntrinsic(node); break; case InstructionSet_POPCNT: genPOPCNTIntrinsic(node); break; default: unreached(); break; } }