//------------------------------------------------------------------------ // LowerBlockStore: Set block store type // // Arguments: // blkNode - The block store node of interest // // Return Value: // None. // void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { GenTree* dstAddr = blkNode->Addr(); unsigned size = blkNode->gtBlkSize; GenTree* source = blkNode->Data(); Compiler* compiler = comp; // Sources are dest address and initVal or source. GenTree* srcAddrOrFill = nullptr; bool isInitBlk = blkNode->OperIsInitBlkOp(); if (!isInitBlk) { // CopyObj or CopyBlk if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe)) { blkNode->SetOper(GT_STORE_BLK); } if (source->gtOper == GT_IND) { srcAddrOrFill = blkNode->Data()->gtGetOp1(); } } if (isInitBlk) { GenTree* initVal = source; if (initVal->OperIsInitVal()) { initVal->SetContained(); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; #ifdef _TARGET_ARM64_ if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI()) { // TODO-ARM-CQ: Currently we generate a helper call for every // initblk we encounter. Later on we should implement loop unrolling // code sequences to improve CQ. // For reference see the code in LowerXArch.cpp. NYI_ARM("initblk loop unrolling is currently not implemented."); // The fill value of an initblk is interpreted to hold a // value of (unsigned int8) however a constant of any size // may practically reside on the evaluation stack. So extract // the lower byte out of the initVal constant and replicate // it to a larger constant whose size is sufficient to support // the largest width store of the desired inline expansion. ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; if (fill == 0) { MakeSrcContained(blkNode, source); } else if (size < REGSIZE_BYTES) { initVal->gtIntCon.gtIconVal = 0x01010101 * fill; } else { initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill; initVal->gtType = TYP_LONG; } blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else #endif // _TARGET_ARM64_ { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else { // CopyObj or CopyBlk // Sources are src and dest and size if not constant. if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj GenTreeObj* objNode = blkNode->AsObj(); unsigned slots = objNode->gtSlots; #ifdef DEBUG // CpObj must always have at least one GC-Pointer as a member. assert(objNode->gtGcPtrCount > 0); assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass; size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); // Currently, the EE always round up a class data structure so // we are not handling the case where we have a non multiple of pointer sized // struct. This behavior may change in the future so in order to keeps things correct // let's assert it just to be safe. Going forward we should simply // handle this case. assert(classSize == blkSize); assert((blkSize / TARGET_POINTER_SIZE) == slots); assert(objNode->HasGCPtr()); #endif blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else // CopyBlk { // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size // we should unroll the loop to improve CQ. // For reference see the code in lowerxarch.cpp. if ((size != 0) && (size <= CPBLK_UNROLL_LIMIT)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else { // In case we have a constant integer this means we went beyond // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of // any GC-Pointers in the src struct. blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } // CopyObj or CopyBlk if (source->gtOper == GT_IND) { MakeSrcContained(blkNode, source); GenTree* addr = source->AsIndir()->Addr(); if (!addr->OperIsLocalAddr()) { addr->ClearContained(); } } else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) { assert(source->IsLocal()); MakeSrcContained(blkNode, source); } } }
//------------------------------------------------------------------------ // TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. // // Arguments: // blkNode - The block store node of interest // // Return Value: // None. // void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) { GenTree* dstAddr = blkNode->Addr(); unsigned size = blkNode->gtBlkSize; GenTree* source = blkNode->Data(); LinearScan* l = m_lsra; Compiler* compiler = comp; // Sources are dest address and initVal or source. // We may require an additional source or temp register for the size. blkNode->gtLsraInfo.srcCount = 2; blkNode->gtLsraInfo.dstCount = 0; GenTreePtr srcAddrOrFill = nullptr; bool isInitBlk = blkNode->OperIsInitBlkOp(); if (!isInitBlk) { // CopyObj or CopyBlk if (source->gtOper == GT_IND) { srcAddrOrFill = blkNode->Data()->gtGetOp1(); // We're effectively setting source as contained, but can't call MakeSrcContained, because the // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. // If srcAddr is already non-contained, we don't need to change it. if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) { srcAddrOrFill->gtLsraInfo.setDstCount(1); srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); } m_lsra->clearOperandCounts(source); source->SetContained(); source->AsIndir()->Addr()->ClearContained(); } else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) { assert(source->IsLocal()); MakeSrcContained(blkNode, source); blkNode->gtLsraInfo.srcCount--; } } if (isInitBlk) { GenTreePtr initVal = source; if (initVal->OperIsInitVal()) { initVal->SetContained(); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) { // TODO-ARM-CQ: Currently we generate a helper call for every // initblk we encounter. Later on we should implement loop unrolling // code sequences to improve CQ. // For reference see the code in lsraxarch.cpp. NYI_ARM("initblk loop unrolling is currently not implemented."); #ifdef _TARGET_ARM64_ // No additional temporaries required ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; if (fill == 0) { MakeSrcContained(blkNode, source); blkNode->gtLsraInfo.srcCount--; } #endif // _TARGET_ARM64_ } else { assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); // The helper follows the regular ABI. dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); if (size != 0) { // Reserve a temp register for the block size argument. blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2); blkNode->gtLsraInfo.internalIntCount = 1; } else { // The block size argument is a third argument to GT_STORE_DYN_BLK noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); blkNode->gtLsraInfo.setSrcCount(3); GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); } } } else { // CopyObj or CopyBlk // Sources are src and dest and size if not constant. if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. blkNode->gtLsraInfo.internalIntCount = 1; if (size >= 2 * REGSIZE_BYTES) { // We will use ldp/stp to reduce code size and improve performance // so we need to reserve an extra internal register blkNode->gtLsraInfo.internalIntCount++; } // We can't use the special Write Barrier registers, so exclude them from the mask regMaskTP internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, // which is killed by a StoreObj (and thus needn't be reserved). if (srcAddrOrFill != nullptr) { srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); } } else { // CopyBlk short internalIntCount = 0; regMaskTP internalIntCandidates = RBM_NONE; if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) { // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented. // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size // we should unroll the loop to improve CQ. // For reference see the code in lsraxarch.cpp. NYI_ARM("cpblk loop unrolling is currently not implemented."); #ifdef _TARGET_ARM64_ internalIntCount = 1; internalIntCandidates = RBM_ALLINT; if (size >= 2 * REGSIZE_BYTES) { // We will use ldp/stp to reduce code size and improve performance // so we need to reserve an extra internal register internalIntCount++; } #endif // _TARGET_ARM64_ } else { assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); // The srcAddr goes in arg1. if (srcAddrOrFill != nullptr) { srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); } if (size != 0) { // Reserve a temp register for the block size argument. internalIntCandidates |= RBM_ARG_2; internalIntCount++; } else { // The block size argument is a third argument to GT_STORE_DYN_BLK noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); blkNode->gtLsraInfo.setSrcCount(3); GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); } } if (internalIntCount != 0) { blkNode->gtLsraInfo.internalIntCount = internalIntCount; blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); } } } }
void Rationalizer::RewriteAssignment(LIR::Use& use) { assert(use.IsInitialized()); GenTreeOp* assignment = use.Def()->AsOp(); assert(assignment->OperGet() == GT_ASG); GenTree* location = assignment->gtGetOp1(); GenTree* value = assignment->gtGetOp2(); genTreeOps locationOp = location->OperGet(); if (assignment->OperIsBlkOp()) { #ifdef FEATURE_SIMD if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp()) { if (location->OperGet() == GT_LCL_VAR) { var_types simdType = location->TypeGet(); GenTree* initVal = assignment->gtOp.gtOp2; var_types baseType = comp->getBaseTypeOfSIMDLocal(location); if (baseType != TYP_UNKNOWN) { GenTreeSIMD* simdTree = new (comp, GT_SIMD) GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType)); assignment->gtOp.gtOp2 = simdTree; value = simdTree; initVal->gtNext = simdTree; simdTree->gtPrev = initVal; simdTree->gtNext = location; location->gtPrev = simdTree; } } } #endif // FEATURE_SIMD if ((location->TypeGet() == TYP_STRUCT) && !assignment->IsPhiDefn() && !value->IsMultiRegCall()) { if ((location->OperGet() == GT_LCL_VAR)) { // We need to construct a block node for the location. // Modify lcl to be the address form. location->SetOper(addrForm(locationOp)); LclVarDsc* varDsc = &(comp->lvaTable[location->AsLclVarCommon()->gtLclNum]); location->gtType = TYP_BYREF; GenTreeBlk* storeBlk = nullptr; unsigned int size = varDsc->lvExactSize; if (varDsc->lvStructGcCount != 0) { CORINFO_CLASS_HANDLE structHnd = varDsc->lvVerTypeInfo.GetClassHandle(); GenTreeObj* objNode = comp->gtNewObjNode(structHnd, location)->AsObj(); unsigned int slots = (unsigned)(roundUp(size, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE); objNode->SetGCInfo(varDsc->lvGcLayout, varDsc->lvStructGcCount, slots); objNode->ChangeOper(GT_STORE_OBJ); objNode->SetData(value); comp->fgMorphUnsafeBlk(objNode); storeBlk = objNode; } else { storeBlk = new (comp, GT_STORE_BLK) GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, location, value, size); } storeBlk->gtFlags |= (GTF_REVERSE_OPS | GTF_ASG); storeBlk->gtFlags |= ((location->gtFlags | value->gtFlags) & GTF_ALL_EFFECT); GenTree* insertionPoint = location->gtNext; BlockRange().InsertBefore(insertionPoint, storeBlk); use.ReplaceWith(comp, storeBlk); BlockRange().Remove(assignment); JITDUMP("After transforming local struct assignment into a block op:\n"); DISPTREERANGE(BlockRange(), use.Def()); JITDUMP("\n"); return; } else { assert(location->OperIsBlk()); } } } switch (locationOp) { case GT_LCL_VAR: case GT_LCL_FLD: case GT_REG_VAR: case GT_PHI_ARG: RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp); BlockRange().Remove(location); break; case GT_IND: { GenTreeStoreInd* store = new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value); copyFlags(store, assignment, GTF_ALL_EFFECT); copyFlags(store, location, GTF_IND_FLAGS); if (assignment->IsReverseOp()) { store->gtFlags |= GTF_REVERSE_OPS; } // TODO: JIT dump // Remove the GT_IND node and replace the assignment node with the store BlockRange().Remove(location); BlockRange().InsertBefore(assignment, store); use.ReplaceWith(comp, store); BlockRange().Remove(assignment); } break; case GT_CLS_VAR: { location->SetOper(GT_CLS_VAR_ADDR); location->gtType = TYP_BYREF; assignment->SetOper(GT_STOREIND); // TODO: JIT dump } break; case GT_BLK: case GT_OBJ: case GT_DYN_BLK: { assert(varTypeIsStruct(location)); GenTreeBlk* storeBlk = location->AsBlk(); genTreeOps storeOper; switch (location->gtOper) { case GT_BLK: storeOper = GT_STORE_BLK; break; case GT_OBJ: storeOper = GT_STORE_OBJ; break; case GT_DYN_BLK: storeOper = GT_STORE_DYN_BLK; break; default: unreached(); } JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper), GenTree::NodeName(storeOper)); storeBlk->SetOperRaw(storeOper); storeBlk->gtFlags &= ~GTF_DONT_CSE; storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE | GTF_BLK_UNALIGNED | GTF_DONT_CSE)); storeBlk->gtBlk.Data() = value; // Replace the assignment node with the store use.ReplaceWith(comp, storeBlk); BlockRange().Remove(assignment); DISPTREERANGE(BlockRange(), use.Def()); JITDUMP("\n"); } break; default: unreached(); break; } }