void CodeGen::genFloatConst(GenTree* tree, RegSet::RegisterPreference* pref) { assert(tree->gtOper == GT_CNS_DBL); var_types type = tree->gtType; double constValue = tree->gtDblCon.gtDconVal; size_t* cv = (size_t*)&constValue; regNumber dst = regSet.PickRegFloat(type, pref); if (type == TYP_FLOAT) { regNumber reg = regSet.rsPickReg(); float f = forceCastToFloat(constValue); genSetRegToIcon(reg, *((int*)(&f))); getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, dst, reg); } else { assert(type == TYP_DOUBLE); regNumber reg1 = regSet.rsPickReg(); regNumber reg2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg1)); genSetRegToIcon(reg1, cv[0]); regSet.rsLockReg(genRegMask(reg1)); genSetRegToIcon(reg2, cv[1]); regSet.rsUnlockReg(genRegMask(reg1)); getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, dst, reg1, reg2); } genMarkTreeInReg(tree, dst); return; }
//------------------------------------------------------------------------ // TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. // // Arguments: // node - The PUTARG_REG node. // argReg - The register in which to pass the argument. // info - The info for the node's using call. // isVarArgs - True if the call uses a varargs calling convention. // callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. // // Return Value: // None. // void Lowering::TreeNodeInfoInitPutArgReg( GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) { assert(node != nullptr); assert(node->OperIsPutArgReg()); assert(argReg != REG_NA); // Each register argument corresponds to one source. info.srcCount++; // Set the register requirements for the node. regMaskTP argMask = genRegMask(argReg); #ifdef ARM_SOFTFP // If type of node is `long` then it is actually `double`. // The actual `long` types must have been transformed as a field list with two fields. if (node->TypeGet() == TYP_LONG) { info.srcCount++; assert(genRegArgNext(argReg) == REG_NEXT(argReg)); argMask |= genRegMask(REG_NEXT(argReg)); } #endif // ARM_SOFTFP node->gtLsraInfo.setDstCandidates(m_lsra, argMask); node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); }
//------------------------------------------------------------------------ // Compiler::unwindPush: Record a push/save of a register. // // Arguments: // reg - The register being pushed/saved. // void Compiler::unwindPush(regNumber reg) { assert(compGeneratingProlog); FuncInfoDsc* func = funCurrentFunc(); assert(func->unwindHeader.Version == 1); // Can't call this before unwindBegProlog assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve assert(func->unwindCodeSlot > sizeof(UNWIND_CODE)); UNWIND_CODE * code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)]; unsigned int cbProlog = unwindGetCurrentOffset(func); noway_assert((BYTE)cbProlog == cbProlog); code->CodeOffset = (BYTE)cbProlog; if ((RBM_CALLEE_SAVED & genRegMask(reg)) #if ETW_EBP_FRAMED // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP) // is excluded from the callee-save register list. // Make sure the register gets PUSH unwind info in this case, // since it is pushed as a frame register. || (reg == REG_FPBASE) #endif // ETW_EBP_FRAMED ) { code->UnwindOp = UWOP_PUSH_NONVOL; code->OpInfo = (BYTE)reg; } else { // Push of a volatile register is just a small stack allocation code->UnwindOp = UWOP_ALLOC_SMALL; code->OpInfo = 0; } }
void Compiler::unwindPushPopCFI(regNumber reg) { #if defined(_TARGET_ARM_) assert(compGeneratingEpilog); #else assert(compGeneratingProlog); #endif FuncInfoDsc* func = funCurrentFunc(); unsigned int cbProlog = 0; if (compGeneratingProlog) { cbProlog = unwindGetCurrentOffset(func); noway_assert((BYTE)cbProlog == cbProlog); createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, REGSIZE_BYTES == 8 ? 8 : 4); } if ((RBM_CALLEE_SAVED & genRegMask(reg)) #if defined(UNIX_AMD64_ABI) #if ETW_EBP_FRAMED // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP) // is excluded from the callee-save register list. // Make sure the register gets PUSH unwind info in this case, // since it is pushed as a frame register. || (reg == REG_FPBASE) #endif // ETW_EBP_FRAMED #endif ) { createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg)); } }
GenTreePtr CodeGen::genMakeAddressableFloat(GenTreePtr tree, regMaskTP * regMaskIntPtr, regMaskTP * regMaskFltPtr, bool bCollapseConstantDoubles) { *regMaskIntPtr = *regMaskFltPtr = 0; switch (tree->OperGet()) { case GT_LCL_VAR: genMarkLclVar(tree); __fallthrough; case GT_REG_VAR: case GT_LCL_FLD: case GT_CLS_VAR: return tree; case GT_IND: // Try to make the address directly addressable if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, RBM_ALLFLOAT, RegSet::KEEP_REG, regMaskIntPtr, false)) { genUpdateLife(tree); return tree; } else { GenTreePtr addr = tree; tree = tree->gtOp.gtOp1; genCodeForTree(tree, 0); regSet.rsMarkRegUsed(tree, addr); *regMaskIntPtr = genRegMask(tree->gtRegNum); return addr; } // fall through default: genCodeForTreeFloat(tree); regSet.SetUsedRegFloat(tree, true); // update mask *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum, tree->TypeGet()); return tree; break; } }
void GCInfo::gcMarkRegPtrVal(GenTreePtr tree) { if (varTypeIsGC(tree->TypeGet())) { if (tree->gtOper == GT_LCL_VAR) compiler->codeGen->genMarkLclVar(tree); if (tree->InReg()) { gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); } } }
void Compiler::unwindSaveRegCFI(regNumber reg, unsigned offset) { assert(compGeneratingProlog); if (RBM_CALLEE_SAVED & genRegMask(reg)) { FuncInfoDsc* func = funCurrentFunc(); unsigned int cbProlog = unwindGetCurrentOffset(func); noway_assert((BYTE)cbProlog == cbProlog); createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset); } }
void GCInfo::gcMarkRegPtrVal(regNumber reg, var_types type) { regMaskTP regMask = genRegMask(reg); switch (type) { case TYP_REF: gcMarkRegSetGCref(regMask); break; case TYP_BYREF: gcMarkRegSetByref(regMask); break; default: gcMarkRegSetNpt(regMask); break; } }
void Compiler::unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat) { regMaskTP regBit = isFloat ? genRegMask(REG_FP_FIRST) : 1; for (regNumber regNum = isFloat ? REG_FP_FIRST : REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1) { if (regBit > regMask) { break; } if (regBit & regMask) { unwindPushPopCFI(regNum); } } }
//------------------------------------------------------------------------ // Compiler::unwindSaveReg: Record a register save. // // Arguments: // reg - The register being saved. // offset - The offset from the current stack pointer where the register is being saved. // void Compiler::unwindSaveReg(regNumber reg, unsigned offset) { assert(compGeneratingProlog); FuncInfoDsc* func = funCurrentFunc(); assert(func->unwindHeader.Version == 1); // Can't call this before unwindBegProlog assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve if (RBM_CALLEE_SAVED & genRegMask(reg)) { UNWIND_CODE * code; if (offset < 0x80000) { assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(USHORT))); USHORT * codedSize = (USHORT*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(USHORT)]; code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)]; // As per AMD64 ABI, if saving entire xmm reg, then offset need to be scaled by 16. if (genIsValidFloatReg(reg)) { *codedSize = (USHORT) (offset/16); code->UnwindOp = UWOP_SAVE_XMM128; } else { *codedSize = (USHORT) (offset/8); code->UnwindOp = UWOP_SAVE_NONVOL; } } else { assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(ULONG))); ULONG * codedSize = (ULONG*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(ULONG)]; *codedSize = offset; code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)]; code->UnwindOp = (genIsValidFloatReg(reg)) ? UWOP_SAVE_XMM128_FAR : UWOP_SAVE_NONVOL_FAR; } code->OpInfo = (BYTE)reg; unsigned int cbProlog = unwindGetCurrentOffset(func); noway_assert((BYTE)cbProlog == cbProlog); code->CodeOffset = (BYTE)cbProlog; } }
//------------------------------------------------------------------------ // TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. // // Arguments: // node - The PUTARG_REG node. // argReg - The register in which to pass the argument. // info - The info for the node's using call. // isVarArgs - True if the call uses a varargs calling convention. // callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. // // Return Value: // None. // void Lowering::TreeNodeInfoInitPutArgReg( GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) { assert(node != nullptr); assert(node->OperIsPutArgReg()); assert(argReg != REG_NA); // Each register argument corresponds to one source. info.srcCount++; // Set the register requirements for the node. const regMaskTP argMask = genRegMask(argReg); node->gtLsraInfo.setDstCandidates(m_lsra, argMask); node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); }
//------------------------------------------------------------------------ // TreeNodeInfoInitPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node // // Arguments: // argNode - a GT_PUTARG_SPLIT node // // Return Value: // None. // // Notes: // Set the child node(s) to be contained // void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo& info, fgArgTabEntryPtr argInfo) { assert(argNode->gtOper == GT_PUTARG_SPLIT); GenTreePtr putArgChild = argNode->gtOp.gtOp1; // Registers for split argument corresponds to source argNode->gtLsraInfo.dstCount = argInfo->numRegs; info.srcCount += argInfo->numRegs; regNumber argReg = argInfo->regNum; regMaskTP argMask = RBM_NONE; for (unsigned i = 0; i < argInfo->numRegs; i++) { argMask |= genRegMask((regNumber)((unsigned)argReg + i)); } argNode->gtLsraInfo.setDstCandidates(m_lsra, argMask); if (putArgChild->OperGet() == GT_FIELD_LIST) { // Generated code: // 1. Consume all of the items in the GT_FIELD_LIST (source) // 2. Store to target slot and move to target registers (destination) from source // argNode->gtLsraInfo.srcCount = argInfo->numRegs + argInfo->numSlots; // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. GenTreeFieldList* fieldListPtr = putArgChild->AsFieldList(); for (unsigned idx = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), idx++) { if (idx < argInfo->numRegs) { GenTreePtr node = fieldListPtr->gtGetOp1(); node->gtLsraInfo.setSrcCandidates(m_lsra, genRegMask((regNumber)((unsigned)argReg + idx))); } } putArgChild->SetContained(); } else { assert(putArgChild->TypeGet() == TYP_STRUCT); assert(putArgChild->OperGet() == GT_OBJ); // We could use a ldr/str sequence so we need a internal register argNode->gtLsraInfo.srcCount = 1; argNode->gtLsraInfo.internalIntCount = 1; regMaskTP internalMask = RBM_ALLINT & ~argMask; argNode->gtLsraInfo.setInternalCandidates(m_lsra, internalMask); GenTreePtr objChild = putArgChild->gtOp.gtOp1; if (objChild->OperGet() == GT_LCL_VAR_ADDR) { // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR // as one contained operation // MakeSrcContained(putArgChild, objChild); putArgChild->gtLsraInfo.srcCount--; } argNode->gtLsraInfo.srcCount = putArgChild->gtLsraInfo.srcCount; MakeSrcContained(argNode, putArgChild); } }
//------------------------------------------------------------------------ // TreeNodeInfoInitCall: Set the NodeInfo for a call. // // Arguments: // call - The call node of interest // // Return Value: // None. // void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) { TreeNodeInfo* info = &(call->gtLsraInfo); LinearScan* l = m_lsra; Compiler* compiler = comp; bool hasMultiRegRetVal = false; ReturnTypeDesc* retTypeDesc = nullptr; info->srcCount = 0; if (call->TypeGet() != TYP_VOID) { hasMultiRegRetVal = call->HasMultiRegRetVal(); if (hasMultiRegRetVal) { // dst count = number of registers in which the value is returned by call retTypeDesc = call->GetReturnTypeDesc(); info->dstCount = retTypeDesc->GetReturnRegCount(); } else { info->dstCount = 1; } } else { info->dstCount = 0; } GenTree* ctrlExpr = call->gtControlExpr; if (call->gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. // Both cannot be non-null at the same time. assert(ctrlExpr == nullptr); assert(call->gtCallAddr != nullptr); ctrlExpr = call->gtCallAddr; } // set reg requirements on call target represented as control sequence. if (ctrlExpr != nullptr) { // we should never see a gtControlExpr whose type is void. assert(ctrlExpr->TypeGet() != TYP_VOID); info->srcCount++; // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into a register. if (call->IsFastTailCall()) { NYI_ARM("tail call"); #ifdef _TARGET_ARM64_ // Fast tail call - make sure that call target is always computed in IP0 // so that epilog sequence can generate "br xip0" to achieve fast tail call. ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0)); #endif // _TARGET_ARM64_ } } #ifdef _TARGET_ARM_ else { info->internalIntCount = 1; } #endif // _TARGET_ARM_ RegisterType registerType = call->TypeGet(); // Set destination candidates for return value of the call. #ifdef _TARGET_ARM_ if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) { // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. info->setDstCandidates(l, RBM_PINVOKE_TCB); } else #endif // _TARGET_ARM_ if (hasMultiRegRetVal) { assert(retTypeDesc != nullptr); info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); } else if (varTypeIsFloating(registerType)) { info->setDstCandidates(l, RBM_FLOATRET); } else if (registerType == TYP_LONG) { info->setDstCandidates(l, RBM_LNGRET); } else { info->setDstCandidates(l, RBM_INTRET); } // If there is an explicit this pointer, we don't want that node to produce anything // as it is redundant if (call->gtCallObjp != nullptr) { GenTreePtr thisPtrNode = call->gtCallObjp; if (thisPtrNode->gtOper == GT_PUTARG_REG) { l->clearOperandCounts(thisPtrNode); thisPtrNode->SetContained(); l->clearDstCount(thisPtrNode->gtOp.gtOp1); } else { l->clearDstCount(thisPtrNode); } } // First, count reg args bool callHasFloatRegArgs = false; for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { assert(list->OperIsList()); GenTreePtr argNode = list->Current(); fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); assert(curArgTabEntry); if (curArgTabEntry->regNum == REG_STK) { // late arg that is not passed in a register assert(argNode->gtOper == GT_PUTARG_STK); TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); continue; } // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct if (argNode->OperGet() == GT_FIELD_LIST) { argNode->SetContained(); // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) regNumber argReg = curArgTabEntry->regNum; for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) { TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs); // Update argReg for the next putarg_reg (if any) argReg = genRegArgNext(argReg); #if defined(_TARGET_ARM_) // A double register is modelled as an even-numbered single one if (entry->Current()->TypeGet() == TYP_DOUBLE) { argReg = genRegArgNext(argReg); } #endif // _TARGET_ARM_ } } #ifdef _TARGET_ARM_ else if (argNode->OperGet() == GT_PUTARG_SPLIT) { fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); TreeNodeInfoInitPutArgSplit(argNode->AsPutArgSplit(), *info, curArgTabEntry); } #endif else { TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs); } } // Now, count stack args // Note that these need to be computed into a register, but then // they're just stored to the stack - so the reg doesn't // need to remain live until the call. In fact, it must not // because the code generator doesn't actually consider it live, // so it can't be spilled. GenTreePtr args = call->gtCallArgs; while (args) { GenTreePtr arg = args->gtOp.gtOp1; // Skip arguments that have been moved to the Late Arg list if (!(args->gtFlags & GTF_LATE_ARG)) { if (arg->gtOper == GT_PUTARG_STK) { fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); assert(curArgTabEntry); assert(curArgTabEntry->regNum == REG_STK); TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); } #ifdef _TARGET_ARM_ else if (arg->OperGet() == GT_PUTARG_SPLIT) { fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); TreeNodeInfoInitPutArgSplit(arg->AsPutArgSplit(), *info, curArgTabEntry); } #endif else { TreeNodeInfo* argInfo = &(arg->gtLsraInfo); if (argInfo->dstCount != 0) { argInfo->isLocalDefUse = true; } argInfo->dstCount = 0; } } args = args->gtOp.gtOp2; } // If it is a fast tail call, it is already preferenced to use IP0. // Therefore, no need set src candidates on call tgt again. if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) { NYI_ARM("float reg varargs"); // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by Arm64 ABI. ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS)); } #ifdef _TARGET_ARM_ if (call->NeedsNullCheck()) { info->internalIntCount++; } #endif // _TARGET_ARM_ }
void dspRegMask(regMaskTP regMask, size_t minSiz) { const char* sep = ""; printf("["); bool inRegRange = false; regNumber regPrev = REG_NA; regNumber regHead = REG_NA; // When we start a range, remember the first register of the range, so we don't use range notation if the range contains just a single register. for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum)) { regMaskTP regBit = genRegMask(regNum); if ((regMask & regBit) != 0) { // We have a register to display. It gets displayed now if: // 1. This is the first register to display of a new range of registers (possibly because // no register has ever been displayed). // 2. This is the last register of an acceptable range (either the last integer register, // or the last of a range that is displayed with range notation). if (!inRegRange) { // It's the first register of a potential range. const char* nam = getRegName(regNum); printf("%s%s", sep, nam); minSiz -= strlen(sep) + strlen(nam); // By default, we're not starting a potential register range. sep = " "; // What kind of separator should we use for this range (if it is indeed going to be a range)? #if defined(_TARGET_AMD64_) // For AMD64, create ranges for int registers R8 through R15, but not the "old" registers. if (regNum >= REG_R8) { regHead = regNum; inRegRange = true; sep = "-"; } #elif defined(_TARGET_ARM64_) // R17 and R28 can't be the start of a range, since the range would include TEB or FP if ((regNum < REG_R17) || ((REG_R19 <= regNum) && (regNum < REG_R28))) { regHead = regNum; inRegRange = true; sep = "-"; } #elif defined(_TARGET_ARM_) if (regNum < REG_R12) { regHead = regNum; inRegRange = true; sep = "-"; } #elif defined(_TARGET_X86_) // No register ranges #else // _TARGET_* #error Unsupported or unset target architecture #endif // _TARGET_* } // We've already printed a register. Is this the end of a range? #if defined(_TARGET_ARM64_) else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB || (regNum == REG_R28)) // last register before FP #else // _TARGET_ARM64_ else if (regNum == REG_INT_LAST) #endif // _TARGET_ARM64_ { const char* nam = getRegName(regNum); printf("%s%s", sep, nam); minSiz -= strlen(sep) + strlen(nam); inRegRange = false; // No longer in the middle of a register range regHead = REG_NA; sep = " "; } } else // ((regMask & regBit) == 0) { if (inRegRange) { assert(regHead != REG_NA); if (regPrev != regHead) { // Close out the previous range, if it included more than one register. const char* nam = getRegName(regPrev); printf("%s%s", sep, nam); minSiz -= strlen(sep) + strlen(nam); } sep = " "; inRegRange = false; regHead = REG_NA; } } if (regBit > regMask) break; regPrev = regNum; }
void CodeGen::genLoadFloat(GenTreePtr tree, regNumber reg) { if (tree->IsRegVar()) { // if it has been spilled, unspill it.% LclVarDsc * varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]; if (varDsc->lvSpilled) { UnspillFloat(varDsc); } inst_RV_RV(ins_FloatCopy(tree->TypeGet()), reg, tree->gtRegNum, tree->TypeGet()); } else { bool unalignedLoad = false; switch (tree->OperGet()) { case GT_IND: case GT_CLS_VAR: if (tree->gtFlags & GTF_IND_UNALIGNED) unalignedLoad = true; break; case GT_LCL_FLD: // Check for a misalignment on a Floating Point field // if (varTypeIsFloating(tree->TypeGet())) { if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0) { unalignedLoad = true; } } break; default: break; } if (unalignedLoad) { // Make the target addressable // regMaskTP addrReg = genMakeAddressable(tree, 0, RegSet::KEEP_REG, true); regSet.rsLockUsedReg(addrReg); // Must prevent regSet.rsGrabReg from choosing an addrReg var_types loadType = tree->TypeGet(); assert(loadType == TYP_DOUBLE || loadType == TYP_FLOAT); // Unaligned Floating-Point Loads must be loaded into integer register(s) // and then moved over to the Floating-Point register regNumber intRegLo = regSet.rsGrabReg(RBM_ALLINT); regNumber intRegHi = REG_NA; regMaskTP tmpLockMask = genRegMask(intRegLo); if (loadType == TYP_DOUBLE) { intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo)); tmpLockMask |= genRegMask(intRegHi); } regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs tree->gtType = TYP_INT; // Temporarily change the type to TYP_INT inst_RV_TT(ins_Load(TYP_INT), intRegLo, tree); regTracker.rsTrackRegTrash(intRegLo); if (loadType == TYP_DOUBLE) { inst_RV_TT(ins_Load(TYP_INT), intRegHi, tree, 4); regTracker.rsTrackRegTrash(intRegHi); } tree->gtType = loadType; // Change the type back to the floating point type regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs // move the integer register(s) over to the FP register // if (loadType == TYP_DOUBLE) getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, reg, intRegLo, intRegHi); else getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, reg, intRegLo); // Free up anything that was tied up by genMakeAddressable // regSet.rsUnlockUsedReg(addrReg); genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); } else { inst_RV_TT(ins_FloatLoad(tree->TypeGet()), reg, tree); } if (((tree->OperGet() == GT_CLS_VAR) || (tree->OperGet() == GT_IND)) && (tree->gtFlags & GTF_IND_VOLATILE)) { // Emit a memory barrier instruction after the load instGen_MemoryBarrier(); } } }
void CodeGen::genFloatAssign(GenTree *tree) { var_types type = tree->TypeGet(); GenTreePtr op1 = tree->gtGetOp1(); GenTreePtr op2 = tree->gtGetOp2(); regMaskTP needRegOp1 = RBM_ALLINT; regMaskTP addrReg = RBM_NONE; bool volat = false; // Is this a volatile store bool unaligned = false; // Is this an unaligned store regNumber op2reg = REG_NA; #ifdef DEBUGGING_SUPPORT unsigned lclVarNum = compiler->lvaCount; unsigned lclILoffs = DUMMY_INIT(0); #endif noway_assert(tree->OperGet() == GT_ASG); // Is the target a floating-point local variable? // possibly even an enregistered floating-point local variable? // switch (op1->gtOper) { unsigned varNum; LclVarDsc * varDsc; case GT_LCL_FLD: // Check for a misalignment on a Floating Point field // if (varTypeIsFloating(op1->TypeGet())) { if ((op1->gtLclFld.gtLclOffs % emitTypeSize(op1->TypeGet())) != 0) { unaligned = true; } } break; case GT_LCL_VAR: varNum = op1->gtLclVarCommon.gtLclNum; noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; #ifdef DEBUGGING_SUPPORT // For non-debuggable code, every definition of a lcl-var has // to be checked to see if we need to open a new scope for it. // Remember the local var info to call siCheckVarScope // AFTER code generation of the assignment. // if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0)) { lclVarNum = varNum; lclILoffs = op1->gtLclVar.gtLclILoffs; } #endif // Dead Store assert (with min opts we may have dead stores) // noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH)); // Does this variable live in a register? // if (genMarkLclVar(op1)) { noway_assert(!compiler->opts.compDbgCode); // We don't enregister any floats with debug codegen // Get hold of the target register // regNumber op1Reg = op1->gtRegVar.gtRegNum; // the variable being assigned should be dead in op2 assert(!varDsc->lvTracked || !VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex)); // Setup register preferencing, so that we try to target the op1 enregistered variable // regMaskTP bestMask = genRegMask(op1Reg); if (type==TYP_DOUBLE) { assert((bestMask & RBM_DBL_REGS) != 0); bestMask |= genRegMask(REG_NEXT(op1Reg)); } RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestMask); // Evaluate op2 into a floating point register // genCodeForTreeFloat(op2, &pref); noway_assert(op2->gtFlags & GTF_REG_VAL); // Make sure the value ends up in the right place ... // For example if op2 is a call that returns a result // in REG_F0, we will need to do a move instruction here // if ((op2->gtRegNum != op1Reg) || (op2->TypeGet() != type)) { regMaskTP spillRegs = regSet.rsMaskUsed & genRegMaskFloat(op1Reg, op1->TypeGet()); if (spillRegs != 0) regSet.rsSpillRegs(spillRegs); assert(type == op1->TypeGet()); inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op1Reg, op2->gtRegNum, type); } genUpdateLife(op1); goto DONE_ASG; } break; case GT_CLS_VAR: case GT_IND: // Check for a volatile/unaligned store // assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND)); // Required for GTF_IND_VOLATILE flag to be valid if (op1->gtFlags & GTF_IND_VOLATILE) volat = true; if (op1->gtFlags & GTF_IND_UNALIGNED) unaligned = true; break; default: break; } // Is the value being assigned an enregistered floating-point local variable? // switch (op2->gtOper) { case GT_LCL_VAR: if (!genMarkLclVar(op2)) break; __fallthrough; case GT_REG_VAR: // We must honor the order evalauation in case op1 reassigns our op2 register // if (tree->gtFlags & GTF_REVERSE_OPS) break; // Is there an implicit conversion that we have to insert? // Handle this case with the normal cases below. // if (type != op2->TypeGet()) break; // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); noway_assert(op2->gtFlags & GTF_REG_VAL); noway_assert(op2->IsRegVar()); op2reg = op2->gtRegVar.gtRegNum; genUpdateLife(op2); goto CHK_VOLAT_UNALIGN; default: break; } // Is the op2 (RHS) more complex than op1 (LHS)? // if (tree->gtFlags & GTF_REVERSE_OPS) { regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs); RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs); // Generate op2 (RHS) into a floating point register // genCodeForTreeFloat(op2, &pref); regSet.SetUsedRegFloat(op2, true); // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG); noway_assert(op2->gtFlags & GTF_REG_VAL); regSet.SetUsedRegFloat(op2, false); } else { needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs); // Make the target addressable // addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); // Generate the RHS into any floating point register genCodeForTreeFloat(op2); } noway_assert(op2->gtFlags & GTF_REG_VAL); op2reg = op2->gtRegNum; // Is there an implicit conversion that we have to insert? // if (type != op2->TypeGet()) { regMaskTP bestMask = genRegMask(op2reg); if (type==TYP_DOUBLE) { if (bestMask & RBM_DBL_REGS) { bestMask |= genRegMask(REG_NEXT(op2reg)); } else { bestMask |= genRegMask(REG_PREV(op2reg)); } } RegSet::RegisterPreference op2Pref(RBM_ALLFLOAT, bestMask); op2reg = regSet.PickRegFloat(type, &op2Pref); inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op2reg, op2->gtRegNum, type); } // Make sure the LHS is still addressable // addrReg = genKeepAddressable(op1, addrReg); CHK_VOLAT_UNALIGN: regSet.rsLockUsedReg(addrReg); // Must prevent unaligned regSet.rsGrabReg from choosing an addrReg if (volat) { // Emit a memory barrier instruction before the store instGen_MemoryBarrier(); } if (unaligned) { var_types storeType = op1->TypeGet(); assert(storeType == TYP_DOUBLE || storeType == TYP_FLOAT); // Unaligned Floating-Point Stores must be done using the integer register(s) regNumber intRegLo = regSet.rsGrabReg(RBM_ALLINT); regNumber intRegHi = REG_NA; regMaskTP tmpLockMask = genRegMask(intRegLo); if (storeType == TYP_DOUBLE) { intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo)); tmpLockMask |= genRegMask(intRegHi); } // move the FP register over to the integer register(s) // if (storeType == TYP_DOUBLE) { getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegLo, intRegHi, op2reg); regTracker.rsTrackRegTrash(intRegHi); } else { getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intRegLo, op2reg); } regTracker.rsTrackRegTrash(intRegLo); regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs op1->gtType = TYP_INT; // Temporarily change the type to TYP_INT inst_TT_RV(ins_Store(TYP_INT), op1, intRegLo); if (storeType == TYP_DOUBLE) { inst_TT_RV(ins_Store(TYP_INT), op1, intRegHi, 4); } op1->gtType = storeType; // Change the type back to the floating point type regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs } else { // Move the value into the target // inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2reg); } // Free up anything that was tied up by the LHS // regSet.rsUnlockUsedReg(addrReg); genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); DONE_ASG: genUpdateLife(tree); #ifdef DEBUGGING_SUPPORT /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. */ if (lclVarNum < compiler->lvaCount) siCheckVarScope(lclVarNum, lclILoffs); #endif }
regNumber RegSet::PickRegFloatOtherThan(var_types type, regNumber reg) { RegisterPreference pref(RBM_ALLFLOAT ^ genRegMask(reg), 0); return PickRegFloat(type, &pref); }
// The code to set the regState for each arg is outlined for shared use // by linear scan. (It is not shared for System V AMD64 platform.) regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc) { regNumber inArgReg = argDsc->lvArgReg; regMaskTP inArgMask = genRegMask(inArgReg); if (regState->rsIsFloat) { noway_assert(inArgMask & RBM_FLTARG_REGS); } else // regState is for the integer registers { // This might be the fixed return buffer register argument (on ARM64) // We check and allow inArgReg to be theFixedRetBuffReg if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg())) { // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL); // We should have recorded the variable number for the return buffer arg noway_assert(info.compRetBuffArg != BAD_VAR_NUM); } else // we have a regular arg { noway_assert(inArgMask & RBM_ARG_REGS); } } regState->rsCalleeRegArgMaskLiveIn |= inArgMask; #ifdef _TARGET_ARM_ if (argDsc->lvType == TYP_DOUBLE) { if (info.compIsVarArgs || opts.compUseSoftFP) { assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); assert(!regState->rsIsFloat); } else { assert(regState->rsIsFloat); assert(emitter::isDoubleReg(inArgReg)); } regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); } else if (argDsc->lvType == TYP_LONG) { assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); assert(!regState->rsIsFloat); regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); } #endif // _TARGET_ARM_ #if FEATURE_MULTIREG_ARGS if (varTypeIsStruct(argDsc->lvType)) { if (argDsc->lvIsHfaRegArg()) { assert(regState->rsIsFloat); unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass()); for (unsigned i = 1; i < cSlots; i++) { assert(inArgReg + i <= LAST_FP_ARGREG); regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i)); } } else { unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE; for (unsigned i = 1; i < cSlots; i++) { regNumber nextArgReg = (regNumber)(inArgReg + i); if (nextArgReg > REG_ARG_LAST) { break; } assert(regState->rsIsFloat == false); regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg); } } } #endif // FEATURE_MULTIREG_ARGS return inArgReg; }