//------------------------------------------------------------------------ // TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. // // Arguments: // blkNode - The block store node of interest // // Return Value: // None. // void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) { GenTree* dstAddr = blkNode->Addr(); unsigned size = blkNode->gtBlkSize; GenTree* source = blkNode->Data(); LinearScan* l = m_lsra; Compiler* compiler = comp; // Sources are dest address and initVal or source. // We may require an additional source or temp register for the size. blkNode->gtLsraInfo.srcCount = 2; blkNode->gtLsraInfo.dstCount = 0; GenTreePtr srcAddrOrFill = nullptr; bool isInitBlk = blkNode->OperIsInitBlkOp(); if (!isInitBlk) { // CopyObj or CopyBlk if (source->gtOper == GT_IND) { srcAddrOrFill = blkNode->Data()->gtGetOp1(); // We're effectively setting source as contained, but can't call MakeSrcContained, because the // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. // If srcAddr is already non-contained, we don't need to change it. if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) { srcAddrOrFill->gtLsraInfo.setDstCount(1); srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); } m_lsra->clearOperandCounts(source); source->SetContained(); source->AsIndir()->Addr()->ClearContained(); } else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) { assert(source->IsLocal()); MakeSrcContained(blkNode, source); blkNode->gtLsraInfo.srcCount--; } } if (isInitBlk) { GenTreePtr initVal = source; if (initVal->OperIsInitVal()) { initVal->SetContained(); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) { // TODO-ARM-CQ: Currently we generate a helper call for every // initblk we encounter. Later on we should implement loop unrolling // code sequences to improve CQ. // For reference see the code in lsraxarch.cpp. NYI_ARM("initblk loop unrolling is currently not implemented."); #ifdef _TARGET_ARM64_ // No additional temporaries required ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; if (fill == 0) { MakeSrcContained(blkNode, source); blkNode->gtLsraInfo.srcCount--; } #endif // _TARGET_ARM64_ } else { assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); // The helper follows the regular ABI. dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); if (size != 0) { // Reserve a temp register for the block size argument. blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2); blkNode->gtLsraInfo.internalIntCount = 1; } else { // The block size argument is a third argument to GT_STORE_DYN_BLK noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); blkNode->gtLsraInfo.setSrcCount(3); GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); } } } else { // CopyObj or CopyBlk // Sources are src and dest and size if not constant. if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. blkNode->gtLsraInfo.internalIntCount = 1; if (size >= 2 * REGSIZE_BYTES) { // We will use ldp/stp to reduce code size and improve performance // so we need to reserve an extra internal register blkNode->gtLsraInfo.internalIntCount++; } // We can't use the special Write Barrier registers, so exclude them from the mask regMaskTP internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, // which is killed by a StoreObj (and thus needn't be reserved). if (srcAddrOrFill != nullptr) { srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); } } else { // CopyBlk short internalIntCount = 0; regMaskTP internalIntCandidates = RBM_NONE; if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) { // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented. // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size // we should unroll the loop to improve CQ. // For reference see the code in lsraxarch.cpp. NYI_ARM("cpblk loop unrolling is currently not implemented."); #ifdef _TARGET_ARM64_ internalIntCount = 1; internalIntCandidates = RBM_ALLINT; if (size >= 2 * REGSIZE_BYTES) { // We will use ldp/stp to reduce code size and improve performance // so we need to reserve an extra internal register internalIntCount++; } #endif // _TARGET_ARM64_ } else { assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); // The srcAddr goes in arg1. if (srcAddrOrFill != nullptr) { srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); } if (size != 0) { // Reserve a temp register for the block size argument. internalIntCandidates |= RBM_ARG_2; internalIntCount++; } else { // The block size argument is a third argument to GT_STORE_DYN_BLK noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); blkNode->gtLsraInfo.setSrcCount(3); GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); } } if (internalIntCount != 0) { blkNode->gtLsraInfo.internalIntCount = internalIntCount; blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); } } } }
//------------------------------------------------------------------------ // LowerBlockStore: Set block store type // // Arguments: // blkNode - The block store node of interest // // Return Value: // None. // void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { GenTree* dstAddr = blkNode->Addr(); unsigned size = blkNode->gtBlkSize; GenTree* source = blkNode->Data(); Compiler* compiler = comp; // Sources are dest address and initVal or source. GenTreePtr srcAddrOrFill = nullptr; bool isInitBlk = blkNode->OperIsInitBlkOp(); if (!isInitBlk) { // CopyObj or CopyBlk if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe)) { blkNode->SetOper(GT_STORE_BLK); } if (source->gtOper == GT_IND) { srcAddrOrFill = blkNode->Data()->gtGetOp1(); } } if (isInitBlk) { GenTreePtr initVal = source; if (initVal->OperIsInitVal()) { initVal->SetContained(); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; #ifdef _TARGET_ARM64_ if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI()) { // TODO-ARM-CQ: Currently we generate a helper call for every // initblk we encounter. Later on we should implement loop unrolling // code sequences to improve CQ. // For reference see the code in LowerXArch.cpp. NYI_ARM("initblk loop unrolling is currently not implemented."); // The fill value of an initblk is interpreted to hold a // value of (unsigned int8) however a constant of any size // may practically reside on the evaluation stack. So extract // the lower byte out of the initVal constant and replicate // it to a larger constant whose size is sufficient to support // the largest width store of the desired inline expansion. ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; if (fill == 0) { MakeSrcContained(blkNode, source); } else if (size < REGSIZE_BYTES) { initVal->gtIntCon.gtIconVal = 0x01010101 * fill; } else { initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill; initVal->gtType = TYP_LONG; } blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else #endif // _TARGET_ARM64_ { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else { // CopyObj or CopyBlk // Sources are src and dest and size if not constant. if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj GenTreeObj* objNode = blkNode->AsObj(); unsigned slots = objNode->gtSlots; #ifdef DEBUG // CpObj must always have at least one GC-Pointer as a member. assert(objNode->gtGcPtrCount > 0); assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass; size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); // Currently, the EE always round up a class data structure so // we are not handling the case where we have a non multiple of pointer sized // struct. This behavior may change in the future so in order to keeps things correct // let's assert it just to be safe. Going forward we should simply // handle this case. assert(classSize == blkSize); assert((blkSize / TARGET_POINTER_SIZE) == slots); assert(objNode->HasGCPtr()); #endif blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else // CopyBlk { // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size // we should unroll the loop to improve CQ. // For reference see the code in lowerxarch.cpp. if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else { // In case we have a constant integer this means we went beyond // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of // any GC-Pointers in the src struct. blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } // CopyObj or CopyBlk if (source->gtOper == GT_IND) { MakeSrcContained(blkNode, source); } else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) { assert(source->IsLocal()); MakeSrcContained(blkNode, source); } } }