// Rewrite a SIMD indirection as GT_IND(GT_LEA(obj.op1)), or as a simple // lclVar if possible. // // Arguments: // use - A use reference for a block node // keepBlk - True if this should remain a block node if it is not a lclVar // // Return Value: // None. // // TODO-1stClassStructs: These should be eliminated earlier, once we can handle // lclVars in all the places that used to have GT_OBJ. // void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk) { #ifdef FEATURE_SIMD // No lowering is needed for non-SIMD nodes, so early out if featureSIMD is not enabled. if (!comp->featureSIMD) { return; } GenTree* tree = use.Def(); if (!tree->OperIsIndir()) { return; } var_types simdType = tree->TypeGet(); if (!varTypeIsSIMD(simdType)) { return; } // If we have GT_IND(GT_LCL_VAR_ADDR) and the GT_LCL_VAR_ADDR is TYP_BYREF/TYP_I_IMPL, // and the var is a SIMD type, replace the expression by GT_LCL_VAR. GenTree* addr = tree->AsIndir()->Addr(); if (addr->OperIsLocalAddr() && comp->isAddrOfSIMDType(addr)) { BlockRange().Remove(tree); addr->SetOper(loadForm(addr->OperGet())); addr->gtType = simdType; use.ReplaceWith(comp, addr); } else if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_SIMD)) { // if we have GT_IND(GT_ADDR(GT_SIMD)), remove the GT_IND(GT_ADDR()), leaving just the GT_SIMD. BlockRange().Remove(tree); BlockRange().Remove(addr); use.ReplaceWith(comp, addr->gtGetOp1()); } else if (!keepBlk) { tree->SetOper(GT_IND); tree->gtType = simdType; } #endif // FEATURE_SIMD }
// Rewrite GT_OBJ of SIMD Vector as GT_IND(GT_LEA(obj.op1)) of a SIMD type. // // Arguments: // ppTree - A pointer-to-a-pointer for the GT_OBJ // fgWalkData - A pointer to tree walk data providing the context // // Return Value: // None. // // TODO-Cleanup: Once SIMD types are plumbed through the frontend, this will no longer // be required. // void Rationalizer::RewriteObj(LIR::Use& use) { #ifdef FEATURE_SIMD GenTreeObj* obj = use.Def()->AsObj(); // For UNIX struct passing, we can have Obj nodes for arguments. // For other cases, we should never see a non-SIMD type here. #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (!varTypeIsSIMD(obj)) { return; } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // Should come here only if featureSIMD is enabled noway_assert(comp->featureSIMD); // We should only call this with a SIMD type. noway_assert(varTypeIsSIMD(obj)); var_types simdType = obj->TypeGet(); // If the operand of obj is a GT_ADDR(GT_LCL_VAR) and LclVar is known to be a SIMD type, // replace obj by GT_LCL_VAR. GenTree* srcAddr = obj->gtGetOp1(); if (srcAddr->OperIsLocalAddr() && comp->isAddrOfSIMDType(srcAddr)) { BlockRange().Remove(obj); srcAddr->SetOper(loadForm(srcAddr->OperGet())); srcAddr->gtType = simdType; use.ReplaceWith(comp, srcAddr); } else { obj->SetOper(GT_IND); obj->gtType = simdType; } #else // we should never reach without feature SIMD assert(!"Unexpected obj during rationalization\n"); unreached(); #endif }
// Rewrite a SIMD indirection as GT_IND(GT_LEA(obj.op1)), or as a simple // lclVar if possible. // // Arguments: // use - A use reference for a block node // keepBlk - True if this should remain a block node if it is not a lclVar // // Return Value: // None. // // TODO-1stClassStructs: These should be eliminated earlier, once we can handle // lclVars in all the places that used to have GT_OBJ. // void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk) { #ifdef FEATURE_SIMD // No lowering is needed for non-SIMD nodes, so early out if featureSIMD is not enabled. if (!comp->featureSIMD) { return; } GenTree* tree = use.Def(); if (!tree->OperIsIndir()) { return; } var_types simdType = tree->TypeGet(); if (!varTypeIsSIMD(simdType)) { return; } // If the operand of is a GT_ADDR(GT_LCL_VAR) and LclVar is known to be of simdType, // replace obj by GT_LCL_VAR. GenTree* addr = tree->AsIndir()->Addr(); if (addr->OperIsLocalAddr() && comp->isAddrOfSIMDType(addr)) { BlockRange().Remove(tree); addr->SetOper(loadForm(addr->OperGet())); addr->gtType = simdType; use.ReplaceWith(comp, addr); } else if (!keepBlk) { tree->SetOper(GT_IND); tree->gtType = simdType; } #endif // FEATURE_SIMD }
//------------------------------------------------------------------------ // LowerBlockStore: Set block store type // // Arguments: // blkNode - The block store node of interest // // Return Value: // None. // void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { GenTree* dstAddr = blkNode->Addr(); unsigned size = blkNode->gtBlkSize; GenTree* source = blkNode->Data(); Compiler* compiler = comp; // Sources are dest address and initVal or source. GenTree* srcAddrOrFill = nullptr; bool isInitBlk = blkNode->OperIsInitBlkOp(); if (!isInitBlk) { // CopyObj or CopyBlk if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe)) { blkNode->SetOper(GT_STORE_BLK); } if (source->gtOper == GT_IND) { srcAddrOrFill = blkNode->Data()->gtGetOp1(); } } if (isInitBlk) { GenTree* initVal = source; if (initVal->OperIsInitVal()) { initVal->SetContained(); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; #ifdef _TARGET_ARM64_ if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI()) { // TODO-ARM-CQ: Currently we generate a helper call for every // initblk we encounter. Later on we should implement loop unrolling // code sequences to improve CQ. // For reference see the code in LowerXArch.cpp. NYI_ARM("initblk loop unrolling is currently not implemented."); // The fill value of an initblk is interpreted to hold a // value of (unsigned int8) however a constant of any size // may practically reside on the evaluation stack. So extract // the lower byte out of the initVal constant and replicate // it to a larger constant whose size is sufficient to support // the largest width store of the desired inline expansion. ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; if (fill == 0) { MakeSrcContained(blkNode, source); } else if (size < REGSIZE_BYTES) { initVal->gtIntCon.gtIconVal = 0x01010101 * fill; } else { initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill; initVal->gtType = TYP_LONG; } blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else #endif // _TARGET_ARM64_ { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else { // CopyObj or CopyBlk // Sources are src and dest and size if not constant. if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj GenTreeObj* objNode = blkNode->AsObj(); unsigned slots = objNode->gtSlots; #ifdef DEBUG // CpObj must always have at least one GC-Pointer as a member. assert(objNode->gtGcPtrCount > 0); assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass; size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); // Currently, the EE always round up a class data structure so // we are not handling the case where we have a non multiple of pointer sized // struct. This behavior may change in the future so in order to keeps things correct // let's assert it just to be safe. Going forward we should simply // handle this case. assert(classSize == blkSize); assert((blkSize / TARGET_POINTER_SIZE) == slots); assert(objNode->HasGCPtr()); #endif blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else // CopyBlk { // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size // we should unroll the loop to improve CQ. // For reference see the code in lowerxarch.cpp. if ((size != 0) && (size <= CPBLK_UNROLL_LIMIT)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else { // In case we have a constant integer this means we went beyond // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of // any GC-Pointers in the src struct. blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } // CopyObj or CopyBlk if (source->gtOper == GT_IND) { MakeSrcContained(blkNode, source); GenTree* addr = source->AsIndir()->Addr(); if (!addr->OperIsLocalAddr()) { addr->ClearContained(); } } else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) { assert(source->IsLocal()); MakeSrcContained(blkNode, source); } } }
// Transform CopyBlk involving SIMD vectors into stlclvar or stind of a SIMD type. // Transformation is done if either src or dst are known to be SIMD vectors. // // Arguments: // ppTree - A pointer-to-a-pointer for the GT_COPYBLK // fgWalkData - A pointer to tree walk data providing the context // // Return Value: // None. // // If either the source or the dst are known to be SIMD (a lclVar or SIMD intrinsic), // get the simdType (TYP_DOUBLE or a SIMD type for SSE2) from the size of the SIMD node. // // For the source: // - If it is a SIMD intrinsic or a lvSIMDType lclVar, change the node type to simdType. // - Otherwise, add a GT_IND of simdType. // For the dst: // - If it is a lclVar of a SIMD type, chanage the node type to simdType. // - Otherwise, change it to a GT_STORE_IND of simdType // // TODO-Cleanup: Once SIMD types are plumbed through the frontend, this will no longer // be required. // void Rationalizer::RewriteCopyBlk(LIR::Use& use) { #ifdef FEATURE_SIMD // No need to transofrm non-SIMD nodes, if featureSIMD is not enabled. if (!comp->featureSIMD) { return; } // See if this is a SIMD copyBlk GenTreeCpBlk* cpBlk = use.Def()->AsCpBlk(); GenTreePtr dstAddr = cpBlk->Dest(); GenTree* srcAddr = cpBlk->Source(); const bool srcIsSIMDAddr = comp->isAddrOfSIMDType(srcAddr); const bool dstIsSIMDAddr = comp->isAddrOfSIMDType(dstAddr); // Do not transform if neither src or dst is known to be a SIMD type. // If src tree type is something we cannot reason but if dst is known to be of a SIMD type // we will treat src tree as a SIMD type and vice versa. if (!srcIsSIMDAddr && !dstIsSIMDAddr) { return; } // At this point it is known to be a copyblk of SIMD vectors and we can // start transforming the original tree. Prior to this point do not perform // any modifications to the original tree. JITDUMP("\nRewriting SIMD CopyBlk\n"); DISPTREERANGE(BlockRange(), cpBlk); // There are currently only three sizes supported: 8 bytes, 12 bytes, 16 bytes or the vector register length. GenTreeIntConCommon* sizeNode = cpBlk->Size()->AsIntConCommon(); var_types simdType = comp->getSIMDTypeForSize((unsigned int)sizeNode->IconValue()); // Remove 'size' from execution order BlockRange().Remove(sizeNode); // Is destination a lclVar which is not an arg? // If yes then we can turn it to a stlcl.var, otherwise turn into stind. GenTree* simdDst = nullptr; genTreeOps oper = GT_NONE; if (dstIsSIMDAddr && dstAddr->OperIsLocalAddr()) { simdDst = dstAddr; simdDst->gtType = simdType; oper = GT_STORE_LCL_VAR; // For structs that are padded (e.g. Vector3f, Vector3i), the morpher will have marked them // as GTF_VAR_USEASG. Unmark them. simdDst->gtFlags &= ~(GTF_VAR_USEASG); } else { // Address of a non-local var simdDst = dstAddr; oper = GT_STOREIND; } GenTree* simdSrc = nullptr; if ((srcAddr->OperGet() == GT_ADDR) && varTypeIsSIMD(srcAddr->gtGetOp1())) { // Get rid of parent node of GT_ADDR(..) if its child happens to be of a SIMD type. BlockRange().Remove(srcAddr); simdSrc = srcAddr->gtGetOp1(); } else if (srcIsSIMDAddr && srcAddr->OperIsLocalAddr()) { // If the source has been rewritten into a local addr node, rewrite it back into a // local var node. simdSrc = srcAddr; simdSrc->SetOper(loadForm(srcAddr->OperGet())); } else { // Since destination is known to be a SIMD type, src must be a SIMD type too // though we cannot figure it out easily enough. Transform src into // GT_IND(src) of simdType. GenTree* indir = comp->gtNewOperNode(GT_IND, simdType, srcAddr); BlockRange().InsertAfter(srcAddr, indir); cpBlk->gtGetOp1()->gtOp.gtOp2 = indir; simdSrc = indir; } simdSrc->gtType = simdType; // Change cpblk to either a st.lclvar or st.ind. // At this point we are manipulating cpblk node with the knowledge of // its internals (i.e. op1 is the size node, and the src & dst are in a GT_LIST on op2). // This logic might need to be changed if we ever restructure cpblk node. assert(simdDst != nullptr); assert(simdSrc != nullptr); GenTree* newNode = nullptr; if (oper == GT_STORE_LCL_VAR) { newNode = simdDst; newNode->SetOper(oper); GenTreeLclVar* store = newNode->AsLclVar(); store->gtOp1 = simdSrc; store->gtType = simdType; store->gtFlags |= ((simdSrc->gtFlags & GTF_ALL_EFFECT) | GTF_ASG); BlockRange().Remove(simdDst); BlockRange().InsertAfter(simdSrc, store); } else { assert(oper == GT_STOREIND); newNode = cpBlk->gtGetOp1(); newNode->SetOper(oper); GenTreeStoreInd* storeInd = newNode->AsStoreInd(); storeInd->gtType = simdType; storeInd->gtFlags |= ((simdSrc->gtFlags & GTF_ALL_EFFECT) | GTF_ASG); storeInd->gtOp1 = simdDst; storeInd->gtOp2 = simdSrc; BlockRange().InsertBefore(cpBlk, storeInd); } use.ReplaceWith(comp, newNode); BlockRange().Remove(cpBlk); JITDUMP("After rewriting SIMD CopyBlk:\n"); DISPTREERANGE(BlockRange(), use.Def()); JITDUMP("\n"); #endif // FEATURE_SIMD }
// Rewrite InitBlk involving SIMD vector into stlcl.var of a SIMD type. // // Arguments: // ppTree - A pointer-to-a-pointer for the GT_INITBLK // fgWalkData - A pointer to tree walk data providing the context // // Return Value: // None. // // TODO-Cleanup: Once SIMD types are plumbed through the frontend, this will no longer // be required. // void Rationalizer::RewriteInitBlk(LIR::Use& use) { #ifdef FEATURE_SIMD // No lowering is needed for non-SIMD nodes, so early out if featureSIMD is not enabled. if (!comp->featureSIMD) { return; } // See if this is a SIMD initBlk that needs to be changed to a simple st.lclVar. GenTreeInitBlk* initBlk = use.Def()->AsInitBlk(); // Is the dstAddr is addr of a SIMD type lclVar? GenTree* dstAddr = initBlk->Dest(); if (!comp->isAddrOfSIMDType(dstAddr) || !dstAddr->OperIsLocalAddr()) { return; } unsigned lclNum = dstAddr->AsLclVarCommon()->gtLclNum; if (!comp->lvaTable[lclNum].lvSIMDType) { return; } var_types baseType = comp->lvaTable[lclNum].lvBaseType; CORINFO_CLASS_HANDLE typeHnd = comp->lvaTable[lclNum].lvVerTypeInfo.GetClassHandle(); unsigned simdLocalSize = comp->getSIMDTypeSizeInBytes(typeHnd); JITDUMP("Rewriting SIMD InitBlk\n"); DISPTREERANGE(BlockRange(), initBlk); assert((dstAddr->gtFlags & GTF_VAR_USEASG) == 0); // There are currently only three sizes supported: 8 bytes, 16 bytes or the vector register length. GenTreeIntConCommon* sizeNode = initBlk->Size()->AsIntConCommon(); unsigned int size = (unsigned int)roundUp(sizeNode->IconValue(), TARGET_POINTER_SIZE); var_types simdType = comp->getSIMDTypeForSize(size); assert(roundUp(simdLocalSize, TARGET_POINTER_SIZE) == size); GenTree* initVal = initBlk->InitVal(); GenTreeSIMD* simdNode = new (comp, GT_SIMD) GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, (unsigned)sizeNode->IconValue()); dstAddr->SetOper(GT_STORE_LCL_VAR); GenTreeLclVar* store = dstAddr->AsLclVar(); store->gtType = simdType; store->gtOp.gtOp1 = simdNode; store->gtFlags |= ((simdNode->gtFlags & GTF_ALL_EFFECT) | GTF_ASG); BlockRange().Remove(store); // Insert the new nodes into the block BlockRange().InsertAfter(initVal, simdNode, store); use.ReplaceWith(comp, store); // Remove the old size and GT_INITBLK nodes. BlockRange().Remove(sizeNode); BlockRange().Remove(initBlk); JITDUMP("After rewriting SIMD InitBlk:\n"); DISPTREERANGE(BlockRange(), use.Def()); JITDUMP("\n"); #endif // FEATURE_SIMD }