//------------------------------------------------------------------------ // ContainCheckIndir: Determine whether operands of an indir should be contained. // // Arguments: // indirNode - The indirection node of interest // // Notes: // This is called for both store and load indirections. // // Return Value: // None. // void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) { // If this is the rhs of a block copy it will be handled when we handle the store. if (indirNode->TypeGet() == TYP_STRUCT) { return; } #ifdef FEATURE_SIMD // If indirTree is of TYP_SIMD12, don't mark addr as contained // so that it always get computed to a register. This would // mean codegen side logic doesn't need to handle all possible // addr expressions that could be contained. // // TODO-ARM64-CQ: handle other addr mode expressions that could be marked // as contained. if (indirNode->TypeGet() == TYP_SIMD12) { return; } #endif // FEATURE_SIMD GenTree* addr = indirNode->Addr(); bool makeContained = true; if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr)) { GenTreeAddrMode* lea = addr->AsAddrMode(); GenTree* base = lea->Base(); GenTree* index = lea->Index(); int cns = lea->Offset(); #ifdef _TARGET_ARM_ // ARM floating-point load/store doesn't support a form similar to integer // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported // form is vldr Rdst, [Rbase + imm] with a more limited constraint on the imm. if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns)) { if (indirNode->OperGet() == GT_STOREIND) { if (varTypeIsFloating(indirNode->AsStoreInd()->Data())) { makeContained = false; } } else if (indirNode->OperGet() == GT_IND) { if (varTypeIsFloating(indirNode)) { makeContained = false; } } } #endif if (makeContained) { MakeSrcContained(indirNode, addr); } } }
//------------------------------------------------------------------------ // LowerCast: Lower GT_CAST(srcType, DstType) nodes. // // Arguments: // tree - GT_CAST node to be lowered // // Return Value: // None. // // Notes: // Casts from float/double to a smaller int type are transformed as follows: // GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) // GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) // GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) // GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) // // Note that for the overflow conversions we still depend on helper calls and // don't expect to see them here. // i) GT_CAST(float/double, int type with overflow detection) // void Lowering::LowerCast(GenTree* tree) { assert(tree->OperGet() == GT_CAST); JITDUMP("LowerCast for: "); DISPNODE(tree); JITDUMP("\n"); GenTree* op1 = tree->gtOp.gtOp1; var_types dstType = tree->CastToType(); var_types srcType = genActualType(op1->TypeGet()); var_types tmpType = TYP_UNDEF; if (varTypeIsFloating(srcType)) { noway_assert(!tree->gtOverflow()); assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small // int. } assert(!varTypeIsSmall(srcType)); if (tmpType != TYP_UNDEF) { GenTree* tmp = comp->gtNewCastNode(tmpType, op1, tree->IsUnsigned(), tmpType); tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); tree->gtFlags &= ~GTF_UNSIGNED; tree->gtOp.gtOp1 = tmp; BlockRange().InsertAfter(op1, tmp); } // Now determine if we have operands that should be contained. ContainCheckCast(tree->AsCast()); }
unsigned InitVarDscInfo::alignReg(var_types type, unsigned requiredRegAlignment) { NYI_ARM64("alignReg"); assert(requiredRegAlignment > 0); if (requiredRegAlignment == 1) return 0; // Everything is always "1" aligned assert(requiredRegAlignment == 2); // we don't expect anything else right now int alignMask = regArgNum(type) & (requiredRegAlignment - 1); if (alignMask == 0) return 0; // We're already aligned unsigned cAlignSkipped = requiredRegAlignment - alignMask; assert(cAlignSkipped == 1); // Alignment is currently only 1 or 2, so misalignment can only be 1. #ifdef _TARGET_ARM_ if (varTypeIsFloating(type)) { fltArgSkippedRegMask |= genMapFloatRegArgNumToRegMask(floatRegArgNum); } #endif // _TARGET_ARM_ assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type)); // if equal, then we aligned the last slot, and the arg can't be enregistered regArgNum(type) += cAlignSkipped; return cAlignSkipped; }
//------------------------------------------------------------------------ // TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. // // Arguments: // node - The PUTARG_REG node. // argReg - The register in which to pass the argument. // info - The info for the node's using call. // isVarArgs - True if the call uses a varargs calling convention. // callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. // // Return Value: // None. // void Lowering::TreeNodeInfoInitPutArgReg( GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) { assert(node != nullptr); assert(node->OperIsPutArgReg()); assert(argReg != REG_NA); // Each register argument corresponds to one source. info.srcCount++; // Set the register requirements for the node. regMaskTP argMask = genRegMask(argReg); #ifdef ARM_SOFTFP // If type of node is `long` then it is actually `double`. // The actual `long` types must have been transformed as a field list with two fields. if (node->TypeGet() == TYP_LONG) { info.srcCount++; assert(genRegArgNext(argReg) == REG_NEXT(argReg)); argMask |= genRegMask(REG_NEXT(argReg)); } #endif // ARM_SOFTFP node->gtLsraInfo.setDstCandidates(m_lsra, argMask); node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); }
void RegSet::SetLockedRegFloat(GenTree * tree, bool bValue) { regNumber reg = tree->gtRegNum; var_types type = tree->TypeGet(); assert(varTypeIsFloating(type)); regMaskTP regMask = genRegMaskFloat(reg, tree->TypeGet()); if (bValue) { JITDUMP("locking register %s\n", getRegNameFloat(reg, type)); assert((rsGetMaskUsed() & regMask) == regMask); assert((rsGetMaskLock() & regMask) == 0); rsSetMaskLock( (rsGetMaskLock() | regMask) ); } else { JITDUMP("unlocking register %s\n", getRegNameFloat(reg, type)); assert((rsGetMaskUsed() & regMask) == regMask); assert((rsGetMaskLock() & regMask) == regMask); rsSetMaskLock( (rsGetMaskLock() & ~regMask) ); } }
//------------------------------------------------------------------------ // ContainCheckIndir: Determine whether operands of an indir should be contained. // // Arguments: // indirNode - The indirection node of interest // // Notes: // This is called for both store and load indirections. // // Return Value: // None. // void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) { // If this is the rhs of a block copy it will be handled when we handle the store. if (indirNode->TypeGet() == TYP_STRUCT) { return; } GenTree* addr = indirNode->Addr(); bool makeContained = true; if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr)) { GenTreeAddrMode* lea = addr->AsAddrMode(); GenTree* base = lea->Base(); GenTree* index = lea->Index(); int cns = lea->Offset(); #ifdef _TARGET_ARM_ // ARM floating-point load/store doesn't support a form similar to integer // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported // form is vldr Rdst, [Rbase + imm] with a more limited constraint on the imm. if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns)) { if (indirNode->OperGet() == GT_STOREIND) { if (varTypeIsFloating(indirNode->AsStoreInd()->Data())) { makeContained = false; } } else if (indirNode->OperGet() == GT_IND) { if (varTypeIsFloating(indirNode)) { makeContained = false; } } } #endif if (makeContained) { MakeSrcContained(indirNode, addr); } } }
bool RegSet::IsLockedRegFloat(GenTreePtr tree) { /* The value must be sitting in a register */ assert(tree); assert(tree->gtFlags & GTF_REG_VAL); assert(varTypeIsFloating(tree->TypeGet())); regMaskTP regMask = genRegMaskFloat(tree->gtRegNum, tree->TypeGet()); return (rsGetMaskLock() & regMask) == regMask; }
void CodeGen::genCodeForTreeCastFloat(GenTree *tree, RegSet::RegisterPreference *pref) { GenTreePtr op1 = tree->gtOp.gtOp1; var_types from = op1->gtType; var_types to = tree->gtType; if (varTypeIsFloating(from)) genCodeForTreeCastFromFloat(tree, pref); else genCodeForTreeCastToFloat(tree, pref); }
//------------------------------------------------------------------------ // ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained. // // Arguments: // node - pointer to the node // void Lowering::ContainCheckStoreIndir(GenTreeIndir* node) { #ifdef _TARGET_ARM64_ GenTree* src = node->gtOp.gtOp2; if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0)) { // an integer zero for 'src' can be contained. MakeSrcContained(node, src); } #endif // _TARGET_ARM64_ ContainCheckIndir(node); }
//------------------------------------------------------------------------ // LowerCast: Lower GT_CAST(srcType, DstType) nodes. // // Arguments: // tree - GT_CAST node to be lowered // // Return Value: // None. // // Notes: // Casts from float/double to a smaller int type are transformed as follows: // GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) // GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) // GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) // GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) // // Note that for the overflow conversions we still depend on helper calls and // don't expect to see them here. // i) GT_CAST(float/double, int type with overflow detection) // void Lowering::LowerCast(GenTree* tree) { assert(tree->OperGet() == GT_CAST); JITDUMP("LowerCast for: "); DISPNODE(tree); JITDUMP("\n"); GenTreePtr op1 = tree->gtOp.gtOp1; var_types dstType = tree->CastToType(); var_types srcType = genActualType(op1->TypeGet()); var_types tmpType = TYP_UNDEF; if (varTypeIsFloating(srcType)) { noway_assert(!tree->gtOverflow()); } assert(!varTypeIsSmall(srcType)); // case of src is a floating point type and dst is a small type. if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType)) { NYI_ARM("Lowering for cast from float to small type"); // Not tested yet. tmpType = TYP_INT; } if (tmpType != TYP_UNDEF) { GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType); tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT)); tree->gtFlags &= ~GTF_UNSIGNED; tree->gtOp.gtOp1 = tmp; BlockRange().InsertAfter(op1, tmp); } // Now determine if we have operands that should be contained. ContainCheckCast(tree->AsCast()); }
bool InitVarDscInfo::enoughAvailRegs(var_types type, unsigned numRegs /* = 1 */) { assert(numRegs > 0); unsigned backFillCount = 0; #ifdef _TARGET_ARM_ // Check for back-filling if (varTypeIsFloating(type) && // We only back-fill the float registers !anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) (numRegs == 1) && // Is there a possibility we could back-fill? (fltArgSkippedRegMask != RBM_NONE)) // Is there an available back-fill slot? { backFillCount = 1; } #endif // _TARGET_ARM_ return regArgNum(type) + numRegs - backFillCount <= maxRegArgNum(type); }
void CodeGen::genCodeForTreeCastFromFloat(GenTree *tree, RegSet::RegisterPreference *pref) { GenTreePtr op1 = tree->gtOp.gtOp1; var_types from = op1->gtType; var_types final = tree->gtType; var_types intermediate = tree->CastToType(); regNumber srcReg; regNumber dstReg; assert(varTypeIsFloating(from)); // Evaluate op1 into a floating point register // if (varTypeIsFloating(final)) { genCodeForTreeFloat(op1, pref); } else {
unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */) { assert(numRegs > 0); unsigned resultArgNum = regArgNum(type); bool isBackFilled = false; #ifdef _TARGET_ARM_ // Check for back-filling if (varTypeIsFloating(type) && // We only back-fill the float registers !anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) (numRegs == 1) && // Is there a possibility we could back-fill? (fltArgSkippedRegMask != RBM_NONE)) // Is there an available back-fill slot? { // We will never back-fill something greater than a single register // (TYP_FLOAT, or TYP_STRUCT HFA with a single float). This is because // we don't have any types that require > 2 register alignment, so we // can't create a > 1 register alignment hole to back-fill. // Back-fill the register regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask resultArgNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); assert(resultArgNum < MAX_FLOAT_REG_ARG); isBackFilled = true; } #endif // _TARGET_ARM_ if (!isBackFilled) { // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated. #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // For System V the reg type counters should be independent. nextReg(TYP_INT, numRegs); nextReg(TYP_FLOAT, numRegs); #else nextReg(type, numRegs); #endif } return resultArgNum; }
//------------------------------------------------------------------------ // TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. // // Arguments: // node - The PUTARG_REG node. // argReg - The register in which to pass the argument. // info - The info for the node's using call. // isVarArgs - True if the call uses a varargs calling convention. // callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. // // Return Value: // None. // void Lowering::TreeNodeInfoInitPutArgReg( GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) { assert(node != nullptr); assert(node->OperIsPutArgReg()); assert(argReg != REG_NA); // Each register argument corresponds to one source. info.srcCount++; // Set the register requirements for the node. const regMaskTP argMask = genRegMask(argReg); node->gtLsraInfo.setDstCandidates(m_lsra, argMask); node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); }
void CodeGen::genComputeAddressableFloat(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg, regMaskTP needReg, RegSet::KeepReg keepReg, bool freeOnly /* = false */) { noway_assert(genStillAddressable(tree)); noway_assert(varTypeIsFloating(tree->TypeGet())); genDoneAddressableFloat(tree, addrRegInt, addrRegFlt, keptReg); regNumber reg; if (tree->gtFlags & GTF_REG_VAL) { reg = tree->gtRegNum; if (freeOnly && !(genRegMaskFloat(reg, tree->TypeGet()) & regSet.RegFreeFloat())) { goto LOAD_REG; } } else { LOAD_REG: RegSet::RegisterPreference pref(needReg, RBM_NONE); reg = regSet.PickRegFloat(tree->TypeGet(), &pref); genLoadFloat(tree, reg); } genMarkTreeInReg(tree, reg); if (keepReg == RegSet::KEEP_REG) { regSet.SetUsedRegFloat(tree, true); } }
//------------------------------------------------------------------------ // TreeNodeInfoInitCall: Set the NodeInfo for a call. // // Arguments: // call - The call node of interest // // Return Value: // None. // void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) { TreeNodeInfo* info = &(call->gtLsraInfo); LinearScan* l = m_lsra; Compiler* compiler = comp; bool hasMultiRegRetVal = false; ReturnTypeDesc* retTypeDesc = nullptr; info->srcCount = 0; if (call->TypeGet() != TYP_VOID) { hasMultiRegRetVal = call->HasMultiRegRetVal(); if (hasMultiRegRetVal) { // dst count = number of registers in which the value is returned by call retTypeDesc = call->GetReturnTypeDesc(); info->dstCount = retTypeDesc->GetReturnRegCount(); } else { info->dstCount = 1; } } else { info->dstCount = 0; } GenTree* ctrlExpr = call->gtControlExpr; if (call->gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. // Both cannot be non-null at the same time. assert(ctrlExpr == nullptr); assert(call->gtCallAddr != nullptr); ctrlExpr = call->gtCallAddr; } // set reg requirements on call target represented as control sequence. if (ctrlExpr != nullptr) { // we should never see a gtControlExpr whose type is void. assert(ctrlExpr->TypeGet() != TYP_VOID); info->srcCount++; // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into a register. if (call->IsFastTailCall()) { NYI_ARM("tail call"); #ifdef _TARGET_ARM64_ // Fast tail call - make sure that call target is always computed in IP0 // so that epilog sequence can generate "br xip0" to achieve fast tail call. ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0)); #endif // _TARGET_ARM64_ } } #ifdef _TARGET_ARM_ else { info->internalIntCount = 1; } #endif // _TARGET_ARM_ RegisterType registerType = call->TypeGet(); // Set destination candidates for return value of the call. #ifdef _TARGET_ARM_ if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) { // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. info->setDstCandidates(l, RBM_PINVOKE_TCB); } else #endif // _TARGET_ARM_ if (hasMultiRegRetVal) { assert(retTypeDesc != nullptr); info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); } else if (varTypeIsFloating(registerType)) { info->setDstCandidates(l, RBM_FLOATRET); } else if (registerType == TYP_LONG) { info->setDstCandidates(l, RBM_LNGRET); } else { info->setDstCandidates(l, RBM_INTRET); } // If there is an explicit this pointer, we don't want that node to produce anything // as it is redundant if (call->gtCallObjp != nullptr) { GenTreePtr thisPtrNode = call->gtCallObjp; if (thisPtrNode->gtOper == GT_PUTARG_REG) { l->clearOperandCounts(thisPtrNode); thisPtrNode->SetContained(); l->clearDstCount(thisPtrNode->gtOp.gtOp1); } else { l->clearDstCount(thisPtrNode); } } // First, count reg args bool callHasFloatRegArgs = false; for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { assert(list->OperIsList()); GenTreePtr argNode = list->Current(); fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); assert(curArgTabEntry); if (curArgTabEntry->regNum == REG_STK) { // late arg that is not passed in a register assert(argNode->gtOper == GT_PUTARG_STK); TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); continue; } // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct if (argNode->OperGet() == GT_FIELD_LIST) { argNode->SetContained(); // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) regNumber argReg = curArgTabEntry->regNum; for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) { TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs); // Update argReg for the next putarg_reg (if any) argReg = genRegArgNext(argReg); #if defined(_TARGET_ARM_) // A double register is modelled as an even-numbered single one if (entry->Current()->TypeGet() == TYP_DOUBLE) { argReg = genRegArgNext(argReg); } #endif // _TARGET_ARM_ } } #ifdef _TARGET_ARM_ else if (argNode->OperGet() == GT_PUTARG_SPLIT) { fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); TreeNodeInfoInitPutArgSplit(argNode->AsPutArgSplit(), *info, curArgTabEntry); } #endif else { TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs); } } // Now, count stack args // Note that these need to be computed into a register, but then // they're just stored to the stack - so the reg doesn't // need to remain live until the call. In fact, it must not // because the code generator doesn't actually consider it live, // so it can't be spilled. GenTreePtr args = call->gtCallArgs; while (args) { GenTreePtr arg = args->gtOp.gtOp1; // Skip arguments that have been moved to the Late Arg list if (!(args->gtFlags & GTF_LATE_ARG)) { if (arg->gtOper == GT_PUTARG_STK) { fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); assert(curArgTabEntry); assert(curArgTabEntry->regNum == REG_STK); TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); } #ifdef _TARGET_ARM_ else if (arg->OperGet() == GT_PUTARG_SPLIT) { fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); TreeNodeInfoInitPutArgSplit(arg->AsPutArgSplit(), *info, curArgTabEntry); } #endif else { TreeNodeInfo* argInfo = &(arg->gtLsraInfo); if (argInfo->dstCount != 0) { argInfo->isLocalDefUse = true; } argInfo->dstCount = 0; } } args = args->gtOp.gtOp2; } // If it is a fast tail call, it is already preferenced to use IP0. // Therefore, no need set src candidates on call tgt again. if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) { NYI_ARM("float reg varargs"); // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by Arm64 ABI. ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS)); } #ifdef _TARGET_ARM_ if (call->NeedsNullCheck()) { info->internalIntCount++; } #endif // _TARGET_ARM_ }
//------------------------------------------------------------------------ // IsContainableImmed: Is an immediate encodable in-place? // // Return Value: // True if the immediate can be folded into an instruction, // for example small enough and non-relocatable. bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) { if (varTypeIsFloating(parentNode->TypeGet())) { // We can contain a floating point 0.0 constant in a compare instruction switch (parentNode->OperGet()) { default: return false; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: if (childNode->IsIntegralConst(0)) { // TODO-ARM-Cleanup: not tested yet. NYI_ARM("ARM IsContainableImmed for floating point type"); return true; } break; } } else { // Make sure we have an actual immediate if (!childNode->IsCnsIntOrI()) return false; if (childNode->IsIconHandle() && comp->opts.compReloc) return false; ssize_t immVal = childNode->gtIntCon.gtIconVal; emitAttr attr = emitActualTypeSize(childNode->TypeGet()); emitAttr size = EA_SIZE(attr); #ifdef _TARGET_ARM_ insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif switch (parentNode->OperGet()) { default: return false; case GT_ADD: case GT_SUB: #ifdef _TARGET_ARM64_ case GT_CMPXCHG: case GT_LOCKADD: case GT_XADD: return emitter::emitIns_valid_imm_for_add(immVal, size); #elif defined(_TARGET_ARM_) return emitter::emitIns_valid_imm_for_add(immVal, flags); #endif break; #ifdef _TARGET_ARM64_ case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: return emitter::emitIns_valid_imm_for_cmp(immVal, size); break; case GT_AND: case GT_OR: case GT_XOR: case GT_TEST_EQ: case GT_TEST_NE: return emitter::emitIns_valid_imm_for_alu(immVal, size); break; case GT_JCMP: assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); return true; break; #elif defined(_TARGET_ARM_) case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_CMP: case GT_AND: case GT_OR: case GT_XOR: return emitter::emitIns_valid_imm_for_alu(immVal); break; #endif // _TARGET_ARM_ #ifdef _TARGET_ARM64_ case GT_STORE_LCL_VAR: if (immVal == 0) return true; break; #endif } } return false; }
//------------------------------------------------------------------------ // TreeNodeInfoInitIndir: Specify register requirements for address expression // of an indirection operation. // // Arguments: // indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node // void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree) { assert(indirTree->OperIsIndir()); // If this is the rhs of a block copy (i.e. non-enregisterable struct), // it has no register requirements. if (indirTree->TypeGet() == TYP_STRUCT) { return; } GenTreePtr addr = indirTree->gtGetOp1(); TreeNodeInfo* info = &(indirTree->gtLsraInfo); GenTreePtr base = nullptr; GenTreePtr index = nullptr; unsigned cns = 0; unsigned mul; bool rev; bool modifiedSources = false; bool makeContained = true; if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) { GenTreeAddrMode* lea = addr->AsAddrMode(); base = lea->Base(); index = lea->Index(); cns = lea->gtOffset; #ifdef _TARGET_ARM_ // ARM floating-point load/store doesn't support a form similar to integer // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported // form is vldr Rdst, [Rbase + imm] with a more limited constraint on the imm. if (lea->HasIndex() || !emitter::emitIns_valid_imm_for_vldst_offset(cns)) { if (indirTree->OperGet() == GT_STOREIND) { if (varTypeIsFloating(indirTree->AsStoreInd()->Data())) { makeContained = false; } } else if (indirTree->OperGet() == GT_IND) { if (varTypeIsFloating(indirTree)) { makeContained = false; } } } #endif if (makeContained) { m_lsra->clearOperandCounts(addr); addr->SetContained(); // The srcCount is decremented because addr is now "contained", // then we account for the base and index below, if they are non-null. info->srcCount--; } } else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) { // An addressing mode will be constructed that may cause some // nodes to not need a register, and cause others' lifetimes to be extended // to the GT_IND or even its parent if it's an assignment assert(base != addr); m_lsra->clearOperandCounts(addr); addr->SetContained(); // Traverse the computation below GT_IND to find the operands // for the addressing mode, marking the various constants and // intermediate results as not consuming/producing. // If the traversal were more complex, we might consider using // a traversal function, but the addressing mode is only made // up of simple arithmetic operators, and the code generator // only traverses one leg of each node. bool foundBase = (base == nullptr); bool foundIndex = (index == nullptr); GenTreePtr nextChild = nullptr; for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) { nextChild = nullptr; GenTreePtr op1 = child->gtOp.gtOp1; GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; if (op1 == base) { foundBase = true; } else if (op1 == index) { foundIndex = true; } else { m_lsra->clearOperandCounts(op1); op1->SetContained(); if (!op1->OperIsLeaf()) { nextChild = op1; } } if (op2 != nullptr) { if (op2 == base) { foundBase = true; } else if (op2 == index) { foundIndex = true; } else { m_lsra->clearOperandCounts(op2); op2->SetContained(); if (!op2->OperIsLeaf()) { assert(nextChild == nullptr); nextChild = op2; } } } } assert(foundBase && foundIndex); info->srcCount--; // it gets incremented below. } else if (addr->gtOper == GT_ARR_ELEM) { // The GT_ARR_ELEM consumes all the indices and produces the offset. // The array object lives until the mem access. // We also consume the target register to which the address is // computed info->srcCount++; assert(addr->gtLsraInfo.srcCount >= 2); addr->gtLsraInfo.srcCount -= 1; } else { // it is nothing but a plain indir info->srcCount--; // base gets added in below base = addr; } if (!makeContained) { return; } if (base != nullptr) { info->srcCount++; } if (index != nullptr && !modifiedSources) { info->srcCount++; } // On ARM we may need a single internal register // (when both conditions are true then we still only need a single internal register) if ((index != nullptr) && (cns != 0)) { // ARM does not support both Index and offset so we need an internal register info->internalIntCount = 1; } else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) { // This offset can't be contained in the ldr/str instruction, so we need an internal register info->internalIntCount = 1; } }
//------------------------------------------------------------------------ // BuildNode: Build the RefPositions for for a node // // Arguments: // treeNode - the node of interest // // Return Value: // The number of sources consumed by this node. // // Notes: // Preconditions: // LSRA Has been initialized. // // Postconditions: // RefPositions have been built for all the register defs and uses required // for this node. // int LinearScan::BuildNode(GenTree* tree) { assert(!tree->isContained()); Interval* prefSrcInterval = nullptr; int srcCount; int dstCount = 0; regMaskTP dstCandidates = RBM_NONE; regMaskTP killMask = RBM_NONE; bool isLocalDefUse = false; // Reset the build-related members of LinearScan. clearBuildState(); RegisterType registerType = TypeGet(tree); // Set the default dstCount. This may be modified below. if (tree->IsValue()) { dstCount = 1; if (tree->IsUnusedValue()) { isLocalDefUse = true; } } else { dstCount = 0; } switch (tree->OperGet()) { default: srcCount = BuildSimple(tree); break; case GT_LCL_VAR: case GT_LCL_FLD: { // We handle tracked variables differently from non-tracked ones. If it is tracked, // we will simply add a use of the tracked variable at its parent/consumer. // Otherwise, for a use we need to actually add the appropriate references for loading // or storing the variable. // // A tracked variable won't actually get used until the appropriate ancestor tree node // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument // to a call or an orphaned dead node. // LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; if (isCandidateVar(varDsc)) { INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1)); return 0; } srcCount = 0; #ifdef FEATURE_SIMD // Need an additional register to read upper 4 bytes of Vector3. if (tree->TypeGet() == TYP_SIMD12) { // We need an internal register different from targetReg in which 'tree' produces its result // because both targetReg and internal reg will be in use at the same time. buildInternalFloatRegisterDefForNode(tree, allSIMDRegs()); setInternalRegsDelayFree = true; buildInternalRegisterUses(); } #endif BuildDef(tree); } break; case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: srcCount = 1; assert(dstCount == 0); srcCount = BuildStoreLoc(tree->AsLclVarCommon()); break; case GT_FIELD_LIST: // These should always be contained. We don't correctly allocate or // generate code for a non-contained GT_FIELD_LIST. noway_assert(!"Non-contained GT_FIELD_LIST"); srcCount = 0; break; case GT_LIST: case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: case GT_PROF_HOOK: srcCount = 0; assert(dstCount == 0); break; case GT_START_PREEMPTGC: // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); break; case GT_CNS_DBL: { GenTreeDblCon* dblConst = tree->AsDblCon(); double constValue = dblConst->gtDblCon.gtDconVal; if (emitter::emitIns_valid_imm_for_fmov(constValue)) { // Directly encode constant to instructions. } else { // Reserve int to load constant from memory (IF_LARGELDC) buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); } } __fallthrough; case GT_CNS_INT: { srcCount = 0; assert(dstCount == 1); RefPosition* def = BuildDef(tree); def->getInterval()->isConstant = true; } break; case GT_BOX: case GT_COMMA: case GT_QMARK: case GT_COLON: srcCount = 0; assert(dstCount == 0); unreached(); break; case GT_RETURN: srcCount = BuildReturn(tree); break; case GT_RETFILT: assert(dstCount == 0); if (tree->TypeGet() == TYP_VOID) { srcCount = 0; } else { assert(tree->TypeGet() == TYP_INT); srcCount = 1; BuildUse(tree->gtGetOp1(), RBM_INTRET); } break; case GT_NOP: // A GT_NOP is either a passthrough (if it is void, or if it has // a child), but must be considered to produce a dummy value if it // has a type but no child. srcCount = 0; if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr) { assert(dstCount == 1); BuildDef(tree); } else { assert(dstCount == 0); } break; case GT_JTRUE: srcCount = 0; assert(dstCount == 0); break; case GT_JMP: srcCount = 0; assert(dstCount == 0); break; case GT_SWITCH: // This should never occur since switch nodes must not be visible at this // point in the JIT. srcCount = 0; noway_assert(!"Switch must be lowered at this point"); break; case GT_JMPTABLE: srcCount = 0; assert(dstCount == 1); BuildDef(tree); break; case GT_SWITCH_TABLE: buildInternalIntRegisterDefForNode(tree); srcCount = BuildBinaryUses(tree->AsOp()); assert(dstCount == 0); break; case GT_ASG: noway_assert(!"We should never hit any assignment operator in lowering"); srcCount = 0; break; case GT_ADD: case GT_SUB: if (varTypeIsFloating(tree->TypeGet())) { // overflow operations aren't supported on float/double types. assert(!tree->gtOverflow()); // No implicit conversions at this stage as the expectation is that // everything is made explicit by adding casts. assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); } __fallthrough; case GT_AND: case GT_OR: case GT_XOR: case GT_LSH: case GT_RSH: case GT_RSZ: case GT_ROR: srcCount = BuildBinaryUses(tree->AsOp()); assert(dstCount == 1); BuildDef(tree); break; case GT_RETURNTRAP: // this just turns into a compare of its child with an int // + a conditional call BuildUse(tree->gtGetOp1()); srcCount = 1; assert(dstCount == 0); killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); BuildDefsWithKills(tree, 0, RBM_NONE, killMask); break; case GT_MOD: case GT_UMOD: NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64"); assert(!"Shouldn't see an integer typed GT_MOD node in ARM64"); srcCount = 0; break; case GT_MUL: if (tree->gtOverflow()) { // Need a register different from target reg to check for overflow. buildInternalIntRegisterDefForNode(tree); setInternalRegsDelayFree = true; } __fallthrough; case GT_DIV: case GT_MULHI: case GT_UDIV: { srcCount = BuildBinaryUses(tree->AsOp()); buildInternalRegisterUses(); assert(dstCount == 1); BuildDef(tree); } break; case GT_INTRINSIC: { noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) || (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)); // Both operand and its result must be of the same floating point type. GenTree* op1 = tree->gtGetOp1(); assert(varTypeIsFloating(op1)); assert(op1->TypeGet() == tree->TypeGet()); BuildUse(op1); srcCount = 1; assert(dstCount == 1); BuildDef(tree); } break; #ifdef FEATURE_SIMD case GT_SIMD: srcCount = BuildSIMD(tree->AsSIMD()); break; #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWIntrinsic: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic()); break; #endif // FEATURE_HW_INTRINSICS case GT_CAST: assert(dstCount == 1); srcCount = BuildCast(tree->AsCast()); break; case GT_NEG: case GT_NOT: BuildUse(tree->gtGetOp1()); srcCount = 1; assert(dstCount == 1); BuildDef(tree); break; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_TEST_EQ: case GT_TEST_NE: case GT_JCMP: srcCount = BuildCmp(tree); break; case GT_CKFINITE: srcCount = 1; assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); BuildUse(tree->gtGetOp1()); BuildDef(tree); buildInternalRegisterUses(); break; case GT_CMPXCHG: { GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; assert(dstCount == 1); if (!compiler->compSupports(InstructionSet_Atomics)) { // For ARMv8 exclusives requires a single internal register buildInternalIntRegisterDefForNode(tree); } // For ARMv8 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent // them being reused as the target register which must be destroyed early RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation); setDelayFree(locationUse); RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue); setDelayFree(valueUse); if (!cmpXchgNode->gtOpComparand->isContained()) { RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand); // For ARMv8 exclusives the lifetime of the comparand must be extended because // it may be used used multiple during retries if (!compiler->compSupports(InstructionSet_Atomics)) { setDelayFree(comparandUse); } } // Internals may not collide with target setInternalRegsDelayFree = true; buildInternalRegisterUses(); BuildDef(tree); } break; case GT_LOCKADD: case GT_XADD: case GT_XCHG: { assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; if (!compiler->compSupports(InstructionSet_Atomics)) { // GT_XCHG requires a single internal register; the others require two. buildInternalIntRegisterDefForNode(tree); if (tree->OperGet() != GT_XCHG) { buildInternalIntRegisterDefForNode(tree); } } assert(!tree->gtGetOp1()->isContained()); RefPosition* op1Use = BuildUse(tree->gtGetOp1()); RefPosition* op2Use = nullptr; if (!tree->gtGetOp2()->isContained()) { op2Use = BuildUse(tree->gtGetOp2()); } // For ARMv8 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries if (!compiler->compSupports(InstructionSet_Atomics)) { // Internals may not collide with target if (dstCount == 1) { setDelayFree(op1Use); if (op2Use != nullptr) { setDelayFree(op2Use); } setInternalRegsDelayFree = true; } buildInternalRegisterUses(); } if (dstCount == 1) { BuildDef(tree); } } break; #if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); dstCount = tree->AsPutArgSplit()->gtNumRegs; break; #endif // FEATURE _SPLIT_ARG case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; case GT_PUTARG_REG: srcCount = BuildPutArgReg(tree->AsUnOp()); break; case GT_CALL: srcCount = BuildCall(tree->AsCall()); if (tree->AsCall()->HasMultiRegRetVal()) { dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); } break; case GT_ADDR: { // For a GT_ADDR, the child node should not be evaluated into a register GenTree* child = tree->gtGetOp1(); assert(!isCandidateLocalRef(child)); assert(child->isContained()); assert(dstCount == 1); srcCount = 0; BuildDef(tree); } break; case GT_BLK: case GT_DYN_BLK: // These should all be eliminated prior to Lowering. assert(!"Non-store block node in Lowering"); srcCount = 0; break; case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: srcCount = BuildBlockStore(tree->AsBlk()); break; case GT_INIT_VAL: // Always a passthrough of its child's value. assert(!"INIT_VAL should always be contained"); srcCount = 0; break; case GT_LCLHEAP: { assert(dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): // Here '-' means don't care. // // Size? Init Memory? # temp regs // 0 - 0 // const and <=6 ptr words - 0 // const and <PageSize No 0 // >6 ptr words Yes 0 // Non-const Yes 0 // Non-const No 2 // GenTree* size = tree->gtGetOp1(); if (size->IsCnsIntOrI()) { assert(size->isContained()); srcCount = 0; size_t sizeVal = size->gtIntCon.gtIconVal; if (sizeVal != 0) { // Compute the amount of memory to properly STACK_ALIGN. // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. // This should also help in debugging as we can examine the original size specified with // localloc. sizeVal = AlignUp(sizeVal, STACK_ALIGN); size_t stpCount = sizeVal / (REGSIZE_BYTES * 2); // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc) // if (stpCount <= 4) { // Need no internal registers } else if (!compiler->info.compInitMem) { // No need to initialize allocated stack space. if (sizeVal < compiler->eeGetPageSize()) { // Need no internal registers } else { // We need two registers: regCnt and RegTmp buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); } } } } else { srcCount = 1; if (!compiler->info.compInitMem) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); } } if (!size->isContained()) { BuildUse(size); } buildInternalRegisterUses(); BuildDef(tree); } break; case GT_ARR_BOUNDS_CHECK: #ifdef FEATURE_SIMD case GT_SIMD_CHK: #endif // FEATURE_SIMD { GenTreeBoundsChk* node = tree->AsBoundsChk(); // Consumes arrLen & index - has no result assert(dstCount == 0); GenTree* intCns = nullptr; GenTree* other = nullptr; srcCount = BuildOperandUses(tree->AsBoundsChk()->gtIndex); srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen); } break; case GT_ARR_ELEM: // These must have been lowered to GT_ARR_INDEX noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); srcCount = 0; assert(dstCount == 0); break; case GT_ARR_INDEX: { srcCount = 2; assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); setInternalRegsDelayFree = true; // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj()); setDelayFree(arrObjUse); BuildUse(tree->AsArrIndex()->IndexExpr()); buildInternalRegisterUses(); BuildDef(tree); } break; case GT_ARR_OFFSET: // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. srcCount = 2; if (!tree->gtArrOffs.gtOffset->isContained()) { BuildUse(tree->AsArrOffs()->gtOffset); srcCount++; } BuildUse(tree->AsArrOffs()->gtIndex); BuildUse(tree->AsArrOffs()->gtArrObj); assert(dstCount == 1); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; case GT_LEA: { GenTreeAddrMode* lea = tree->AsAddrMode(); GenTree* base = lea->Base(); GenTree* index = lea->Index(); int cns = lea->Offset(); // This LEA is instantiating an address, so we set up the srcCount here. srcCount = 0; if (base != nullptr) { srcCount++; BuildUse(base); } if (index != nullptr) { srcCount++; BuildUse(index); } assert(dstCount == 1); // On ARM64 we may need a single internal register // (when both conditions are true then we still only need a single internal register) if ((index != nullptr) && (cns != 0)) { // ARM64 does not support both Index and offset so we need an internal register buildInternalIntRegisterDefForNode(tree); } else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE)) { // This offset can't be contained in the add instruction, so we need an internal register buildInternalIntRegisterDefForNode(tree); } buildInternalRegisterUses(); BuildDef(tree); } break; case GT_STOREIND: { assert(dstCount == 0); if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree)) { srcCount = BuildGCWriteBarrier(tree); break; } srcCount = BuildIndir(tree->AsIndir()); if (!tree->gtGetOp2()->isContained()) { BuildUse(tree->gtGetOp2()); srcCount++; } } break; case GT_NULLCHECK: // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register // is required, and it is not a localDefUse. assert(dstCount == 0); assert(!tree->gtGetOp1()->isContained()); BuildUse(tree->gtGetOp1()); srcCount = 1; break; case GT_IND: assert(dstCount == 1); srcCount = BuildIndir(tree->AsIndir()); break; case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); BuildDef(tree, RBM_EXCEPTION_OBJECT); break; case GT_CLS_VAR: srcCount = 0; // GT_CLS_VAR, by the time we reach the backend, must always // be a pure use. // It will produce a result of the type of the // node, and use an internal register for the address. assert(dstCount == 1); assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); buildInternalIntRegisterDefForNode(tree); buildInternalRegisterUses(); BuildDef(tree); break; } // end switch (tree->OperGet()) if (tree->IsUnusedValue() && (dstCount != 0)) { isLocalDefUse = true; } // We need to be sure that we've set srcCount and dstCount appropriately assert((dstCount < 2) || tree->IsMultiRegCall()); assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); assert(!tree->IsUnusedValue() || (dstCount != 0)); assert(dstCount == tree->GetRegisterDstCount()); INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount)); return srcCount; }
//------------------------------------------------------------------------ // BuildSIMD: Set the NodeInfo for a GT_SIMD tree. // // Arguments: // tree - The GT_SIMD node of interest // // Return Value: // The number of sources consumed by this node. // int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { int srcCount = 0; // Only SIMDIntrinsicInit can be contained if (simdTree->isContained()) { assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); } int dstCount = simdTree->IsValue() ? 1 : 0; assert(dstCount == 1); bool buildUses = true; GenTree* op1 = simdTree->gtGetOp1(); GenTree* op2 = simdTree->gtGetOp2(); switch (simdTree->gtSIMDIntrinsicID) { case SIMDIntrinsicInit: case SIMDIntrinsicCast: case SIMDIntrinsicSqrt: case SIMDIntrinsicAbs: case SIMDIntrinsicConvertToSingle: case SIMDIntrinsicConvertToInt32: case SIMDIntrinsicConvertToDouble: case SIMDIntrinsicConvertToInt64: case SIMDIntrinsicWidenLo: case SIMDIntrinsicWidenHi: // No special handling required. break; case SIMDIntrinsicGetItem: { op1 = simdTree->gtGetOp1(); op2 = simdTree->gtGetOp2(); // We have an object and an index, either of which may be contained. bool setOp2DelayFree = false; if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal())) { // If the index is not a constant and the object is not contained or is a local // we will need a general purpose register to calculate the address // internal register must not clobber input index // TODO-Cleanup: An internal register will never clobber a source; this code actually // ensures that the index (op2) doesn't interfere with the target. buildInternalIntRegisterDefForNode(simdTree); setOp2DelayFree = true; } srcCount += BuildOperandUses(op1); if (!op2->isContained()) { RefPosition* op2Use = BuildUse(op2); if (setOp2DelayFree) { setDelayFree(op2Use); } srcCount++; } if (!op2->IsCnsIntOrI() && (!op1->isContained())) { // If vector is not already in memory (contained) and the index is not a constant, // we will use the SIMD temp location to store the vector. compiler->getSIMDInitTempVarNum(); } buildUses = false; } break; case SIMDIntrinsicAdd: case SIMDIntrinsicSub: case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicMin: case SIMDIntrinsicMax: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: case SIMDIntrinsicLessThanOrEqual: case SIMDIntrinsicGreaterThanOrEqual: // No special handling required. break; case SIMDIntrinsicSetX: case SIMDIntrinsicSetY: case SIMDIntrinsicSetZ: case SIMDIntrinsicSetW: case SIMDIntrinsicNarrow: { // Op1 will write to dst before Op2 is free BuildUse(op1); RefPosition* op2Use = BuildUse(op2); setDelayFree(op2Use); srcCount = 2; buildUses = false; break; } case SIMDIntrinsicInitN: { var_types baseType = simdTree->gtSIMDBaseType; srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType)); if (varTypeIsFloating(simdTree->gtSIMDBaseType)) { // Need an internal register to stitch together all the values into a single vector in a SIMD reg. buildInternalFloatRegisterDefForNode(simdTree); } int initCount = 0; for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2()) { assert(list->OperGet() == GT_LIST); GenTree* listItem = list->gtGetOp1(); assert(listItem->TypeGet() == baseType); assert(!listItem->isContained()); BuildUse(listItem); initCount++; } assert(initCount == srcCount); buildUses = false; break; } case SIMDIntrinsicInitArray: // We have an array and an index, which may be contained. break; case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: buildInternalFloatRegisterDefForNode(simdTree); break; case SIMDIntrinsicDotProduct: buildInternalFloatRegisterDefForNode(simdTree); break; case SIMDIntrinsicSelect: // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB // bsl target register must be VC. Reserve a temp in case we need to shuffle things. // This will require a different approach, as GenTreeSIMD has only two operands. assert(!"SIMDIntrinsicSelect not yet supported"); buildInternalFloatRegisterDefForNode(simdTree); break; case SIMDIntrinsicInitArrayX: case SIMDIntrinsicInitFixed: case SIMDIntrinsicCopyToArray: case SIMDIntrinsicCopyToArrayX: case SIMDIntrinsicNone: case SIMDIntrinsicGetCount: case SIMDIntrinsicGetOne: case SIMDIntrinsicGetZero: case SIMDIntrinsicGetAllOnes: case SIMDIntrinsicGetX: case SIMDIntrinsicGetY: case SIMDIntrinsicGetZ: case SIMDIntrinsicGetW: case SIMDIntrinsicInstEquals: case SIMDIntrinsicHWAccel: case SIMDIntrinsicWiden: case SIMDIntrinsicInvalid: assert(!"These intrinsics should not be seen during register allocation"); __fallthrough; default: noway_assert(!"Unimplemented SIMD node type."); unreached(); } if (buildUses) { assert(!op1->OperIs(GT_LIST)); assert(srcCount == 0); srcCount = BuildOperandUses(op1); if ((op2 != nullptr) && !op2->isContained()) { srcCount += BuildOperandUses(op2); } } assert(internalCount <= MaxInternalCount); buildInternalRegisterUses(); if (dstCount == 1) { BuildDef(simdTree); } else { assert(dstCount == 0); } return srcCount; }
//------------------------------------------------------------------------ // IsContainableImmed: Is an immediate encodable in-place? // // Return Value: // True if the immediate can be folded into an instruction, // for example small enough and non-relocatable. // // TODO-CQ: we can contain a floating point 0.0 constant in a compare instruction // (vcmp on arm, fcmp on arm64). // bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) { if (!varTypeIsFloating(parentNode->TypeGet())) { // Make sure we have an actual immediate if (!childNode->IsCnsIntOrI()) return false; if (childNode->gtIntCon.ImmedValNeedsReloc(comp)) return false; // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. target_ssize_t immVal = (target_ssize_t)childNode->gtIntCon.gtIconVal; emitAttr attr = emitActualTypeSize(childNode->TypeGet()); emitAttr size = EA_SIZE(attr); #ifdef _TARGET_ARM_ insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif switch (parentNode->OperGet()) { case GT_ADD: case GT_SUB: #ifdef _TARGET_ARM64_ case GT_CMPXCHG: case GT_LOCKADD: case GT_XADD: return comp->compSupports(InstructionSet_Atomics) ? false : emitter::emitIns_valid_imm_for_add(immVal, size); #elif defined(_TARGET_ARM_) return emitter::emitIns_valid_imm_for_add(immVal, flags); #endif break; #ifdef _TARGET_ARM64_ case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: return emitter::emitIns_valid_imm_for_cmp(immVal, size); case GT_AND: case GT_OR: case GT_XOR: case GT_TEST_EQ: case GT_TEST_NE: return emitter::emitIns_valid_imm_for_alu(immVal, size); case GT_JCMP: assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); return true; #elif defined(_TARGET_ARM_) case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_CMP: case GT_AND: case GT_OR: case GT_XOR: return emitter::emitIns_valid_imm_for_alu(immVal); #endif // _TARGET_ARM_ #ifdef _TARGET_ARM64_ case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: if (immVal == 0) return true; break; #endif default: break; } } return false; }
void CodeGen::genLoadFloat(GenTreePtr tree, regNumber reg) { if (tree->IsRegVar()) { // if it has been spilled, unspill it.% LclVarDsc * varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]; if (varDsc->lvSpilled) { UnspillFloat(varDsc); } inst_RV_RV(ins_FloatCopy(tree->TypeGet()), reg, tree->gtRegNum, tree->TypeGet()); } else { bool unalignedLoad = false; switch (tree->OperGet()) { case GT_IND: case GT_CLS_VAR: if (tree->gtFlags & GTF_IND_UNALIGNED) unalignedLoad = true; break; case GT_LCL_FLD: // Check for a misalignment on a Floating Point field // if (varTypeIsFloating(tree->TypeGet())) { if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0) { unalignedLoad = true; } } break; default: break; } if (unalignedLoad) { // Make the target addressable // regMaskTP addrReg = genMakeAddressable(tree, 0, RegSet::KEEP_REG, true); regSet.rsLockUsedReg(addrReg); // Must prevent regSet.rsGrabReg from choosing an addrReg var_types loadType = tree->TypeGet(); assert(loadType == TYP_DOUBLE || loadType == TYP_FLOAT); // Unaligned Floating-Point Loads must be loaded into integer register(s) // and then moved over to the Floating-Point register regNumber intRegLo = regSet.rsGrabReg(RBM_ALLINT); regNumber intRegHi = REG_NA; regMaskTP tmpLockMask = genRegMask(intRegLo); if (loadType == TYP_DOUBLE) { intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo)); tmpLockMask |= genRegMask(intRegHi); } regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs tree->gtType = TYP_INT; // Temporarily change the type to TYP_INT inst_RV_TT(ins_Load(TYP_INT), intRegLo, tree); regTracker.rsTrackRegTrash(intRegLo); if (loadType == TYP_DOUBLE) { inst_RV_TT(ins_Load(TYP_INT), intRegHi, tree, 4); regTracker.rsTrackRegTrash(intRegHi); } tree->gtType = loadType; // Change the type back to the floating point type regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs // move the integer register(s) over to the FP register // if (loadType == TYP_DOUBLE) getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, reg, intRegLo, intRegHi); else getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, reg, intRegLo); // Free up anything that was tied up by genMakeAddressable // regSet.rsUnlockUsedReg(addrReg); genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); } else { inst_RV_TT(ins_FloatLoad(tree->TypeGet()), reg, tree); } if (((tree->OperGet() == GT_CLS_VAR) || (tree->OperGet() == GT_IND)) && (tree->gtFlags & GTF_IND_VOLATILE)) { // Emit a memory barrier instruction after the load instGen_MemoryBarrier(); } } }
void CodeGen::genFloatAssign(GenTree *tree) { var_types type = tree->TypeGet(); GenTreePtr op1 = tree->gtGetOp1(); GenTreePtr op2 = tree->gtGetOp2(); regMaskTP needRegOp1 = RBM_ALLINT; regMaskTP addrReg = RBM_NONE; bool volat = false; // Is this a volatile store bool unaligned = false; // Is this an unaligned store regNumber op2reg = REG_NA; #ifdef DEBUGGING_SUPPORT unsigned lclVarNum = compiler->lvaCount; unsigned lclILoffs = DUMMY_INIT(0); #endif noway_assert(tree->OperGet() == GT_ASG); // Is the target a floating-point local variable? // possibly even an enregistered floating-point local variable? // switch (op1->gtOper) { unsigned varNum; LclVarDsc * varDsc; case GT_LCL_FLD: // Check for a misalignment on a Floating Point field // if (varTypeIsFloating(op1->TypeGet())) { if ((op1->gtLclFld.gtLclOffs % emitTypeSize(op1->TypeGet())) != 0) { unaligned = true; } } break; case GT_LCL_VAR: varNum = op1->gtLclVarCommon.gtLclNum; noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; #ifdef DEBUGGING_SUPPORT // For non-debuggable code, every definition of a lcl-var has // to be checked to see if we need to open a new scope for it. // Remember the local var info to call siCheckVarScope // AFTER code generation of the assignment. // if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0)) { lclVarNum = varNum; lclILoffs = op1->gtLclVar.gtLclILoffs; } #endif // Dead Store assert (with min opts we may have dead stores) // noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH)); // Does this variable live in a register? // if (genMarkLclVar(op1)) { noway_assert(!compiler->opts.compDbgCode); // We don't enregister any floats with debug codegen // Get hold of the target register // regNumber op1Reg = op1->gtRegVar.gtRegNum; // the variable being assigned should be dead in op2 assert(!varDsc->lvTracked || !VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex)); // Setup register preferencing, so that we try to target the op1 enregistered variable // regMaskTP bestMask = genRegMask(op1Reg); if (type==TYP_DOUBLE) { assert((bestMask & RBM_DBL_REGS) != 0); bestMask |= genRegMask(REG_NEXT(op1Reg)); } RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestMask); // Evaluate op2 into a floating point register // genCodeForTreeFloat(op2, &pref); noway_assert(op2->gtFlags & GTF_REG_VAL); // Make sure the value ends up in the right place ... // For example if op2 is a call that returns a result // in REG_F0, we will need to do a move instruction here // if ((op2->gtRegNum != op1Reg) || (op2->TypeGet() != type)) { regMaskTP spillRegs = regSet.rsMaskUsed & genRegMaskFloat(op1Reg, op1->TypeGet()); if (spillRegs != 0) regSet.rsSpillRegs(spillRegs); assert(type == op1->TypeGet()); inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op1Reg, op2->gtRegNum, type); } genUpdateLife(op1); goto DONE_ASG; } break; case GT_CLS_VAR: case GT_IND: // Check for a volatile/unaligned store // assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND)); // Required for GTF_IND_VOLATILE flag to be valid if (op1->gtFlags & GTF_IND_VOLATILE) volat = true; if (op1->gtFlags & GTF_IND_UNALIGNED) unaligned = true; break; default: break; } // Is the value being assigned an enregistered floating-point local variable? // switch (op2->gtOper) { case GT_LCL_VAR: if (!genMarkLclVar(op2)) break; __fallthrough; case GT_REG_VAR: // We must honor the order evalauation in case op1 reassigns our op2 register // if (tree->gtFlags & GTF_REVERSE_OPS) break; // Is there an implicit conversion that we have to insert? // Handle this case with the normal cases below. // if (type != op2->TypeGet()) break; // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); noway_assert(op2->gtFlags & GTF_REG_VAL); noway_assert(op2->IsRegVar()); op2reg = op2->gtRegVar.gtRegNum; genUpdateLife(op2); goto CHK_VOLAT_UNALIGN; default: break; } // Is the op2 (RHS) more complex than op1 (LHS)? // if (tree->gtFlags & GTF_REVERSE_OPS) { regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs); RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs); // Generate op2 (RHS) into a floating point register // genCodeForTreeFloat(op2, &pref); regSet.SetUsedRegFloat(op2, true); // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG); noway_assert(op2->gtFlags & GTF_REG_VAL); regSet.SetUsedRegFloat(op2, false); } else { needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs); // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); // Generate the RHS into any floating point register genCodeForTreeFloat(op2); } noway_assert(op2->gtFlags & GTF_REG_VAL); op2reg = op2->gtRegNum; // Is there an implicit conversion that we have to insert? // if (type != op2->TypeGet()) { regMaskTP bestMask = genRegMask(op2reg); if (type==TYP_DOUBLE) { if (bestMask & RBM_DBL_REGS) { bestMask |= genRegMask(REG_NEXT(op2reg)); } else { bestMask |= genRegMask(REG_PREV(op2reg)); } } RegSet::RegisterPreference op2Pref(RBM_ALLFLOAT, bestMask); op2reg = regSet.PickRegFloat(type, &op2Pref); inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op2reg, op2->gtRegNum, type); } // Make sure the LHS is still addressable // addrReg = genKeepAddressable(op1, addrReg); CHK_VOLAT_UNALIGN: regSet.rsLockUsedReg(addrReg); // Must prevent unaligned regSet.rsGrabReg from choosing an addrReg if (volat) { // Emit a memory barrier instruction before the store instGen_MemoryBarrier(); } if (unaligned) { var_types storeType = op1->TypeGet(); assert(storeType == TYP_DOUBLE || storeType == TYP_FLOAT); // Unaligned Floating-Point Stores must be done using the integer register(s) regNumber intRegLo = regSet.rsGrabReg(RBM_ALLINT); regNumber intRegHi = REG_NA; regMaskTP tmpLockMask = genRegMask(intRegLo); if (storeType == TYP_DOUBLE) { intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo)); tmpLockMask |= genRegMask(intRegHi); } // move the FP register over to the integer register(s) // if (storeType == TYP_DOUBLE) { getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegLo, intRegHi, op2reg); regTracker.rsTrackRegTrash(intRegHi); } else { getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intRegLo, op2reg); } regTracker.rsTrackRegTrash(intRegLo); regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs op1->gtType = TYP_INT; // Temporarily change the type to TYP_INT inst_TT_RV(ins_Store(TYP_INT), op1, intRegLo); if (storeType == TYP_DOUBLE) { inst_TT_RV(ins_Store(TYP_INT), op1, intRegHi, 4); } op1->gtType = storeType; // Change the type back to the floating point type regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs } else { // Move the value into the target // inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2reg); } // Free up anything that was tied up by the LHS // regSet.rsUnlockUsedReg(addrReg); genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); DONE_ASG: genUpdateLife(tree); #ifdef DEBUGGING_SUPPORT /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. */ if (lclVarNum < compiler->lvaCount) siCheckVarScope(lclVarNum, lclILoffs); #endif }
//------------------------------------------------------------------------ // genHWIntrinsic: Generates the code for a given hardware intrinsic node. // // Arguments: // node - The hardware intrinsic node // void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID); HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); int ival = Compiler::ivalOfHWIntrinsic(intrinsicID); int numArgs = Compiler::numArgsOfHWIntrinsic(node); assert((flags & HW_Flag_NoCodeGen) == 0); if (genIsTableDrivenHWIntrinsic(category, flags)) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = getEmitter(); assert(numArgs >= 0); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); assert(ins != INS_invalid); emitAttr simdSize = EA_ATTR(node->gtSIMDSize); assert(simdSize != 0); switch (numArgs) { case 1: genConsumeOperands(node); op1Reg = op1->gtRegNum; if (category == HW_Category_MemoryLoad) { emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0); } else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0) { emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg); } else if ((ival != -1) && varTypeIsFloating(baseType)) { emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival); } else { emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg); } break; case 2: genConsumeOperands(node); op1Reg = op1->gtRegNum; op2Reg = op2->gtRegNum; if (category == HW_Category_MemoryStore) { emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0); } else if ((ival != -1) && varTypeIsFloating(baseType)) { genHWIntrinsic_R_R_RM_I(node, ins); } else if (category == HW_Category_MemoryLoad) { emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg); } else if (Compiler::isImmHWIntrinsic(intrinsicID, op2)) { if (intrinsicID == NI_SSE2_Extract) { // extract instructions return to GP-registers, so it needs int size as the emitsize simdSize = emitTypeSize(TYP_INT); } auto emitSwCase = [&](unsigned i) { emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i); }; if (op2->IsCnsIntOrI()) { ssize_t ival = op2->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase); } } else { genHWIntrinsic_R_R_RM(node, ins); } break; case 3: { assert(op1->OperIsList()); assert(op1->gtGetOp2()->OperIsList()); assert(op1->gtGetOp2()->gtGetOp2()->OperIsList()); GenTreeArgList* argList = op1->AsArgList(); op1 = argList->Current(); genConsumeRegs(op1); op1Reg = op1->gtRegNum; argList = argList->Rest(); op2 = argList->Current(); genConsumeRegs(op2); op2Reg = op2->gtRegNum; argList = argList->Rest(); GenTree* op3 = argList->Current(); genConsumeRegs(op3); regNumber op3Reg = op3->gtRegNum; if (Compiler::isImmHWIntrinsic(intrinsicID, op3)) { auto emitSwCase = [&](unsigned i) { emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i); }; if (op3->IsCnsIntOrI()) { ssize_t ival = op3->AsIntCon()->IconValue(); emitSwCase((unsigned)ival); } else { // We emit a fallback case for the scenario when the imm-op is not a constant. This should // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it // can also occur if the consumer calls it directly and just doesn't pass a constant value. regNumber baseReg = node->ExtractTempReg(); regNumber offsReg = node->GetSingleTempReg(); genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase); } } else if (category == HW_Category_MemoryStore) { assert(intrinsicID == NI_SSE2_MaskMove); assert(targetReg == REG_NA); // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI if (op3Reg != REG_EDI) { emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg); } emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); } else { emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg); } break; } default: unreached(); break; } genProduceReg(node); return; } switch (isa) { case InstructionSet_SSE: genSSEIntrinsic(node); break; case InstructionSet_SSE2: genSSE2Intrinsic(node); break; case InstructionSet_SSE41: genSSE41Intrinsic(node); break; case InstructionSet_SSE42: genSSE42Intrinsic(node); break; case InstructionSet_AVX: genAVXIntrinsic(node); break; case InstructionSet_AVX2: genAVX2Intrinsic(node); break; case InstructionSet_AES: genAESIntrinsic(node); break; case InstructionSet_BMI1: genBMI1Intrinsic(node); break; case InstructionSet_BMI2: genBMI2Intrinsic(node); break; case InstructionSet_FMA: genFMAIntrinsic(node); break; case InstructionSet_LZCNT: genLZCNTIntrinsic(node); break; case InstructionSet_PCLMULQDQ: genPCLMULQDQIntrinsic(node); break; case InstructionSet_POPCNT: genPOPCNTIntrinsic(node); break; default: unreached(); break; } }