//------------------------------------------------------------------------ // IsContainableImmed: Is an immediate encodable in-place? // // Return Value: // True if the immediate can be folded into an instruction, // for example small enough and non-relocatable. // // TODO-CQ: we can contain a floating point 0.0 constant in a compare instruction // (vcmp on arm, fcmp on arm64). // bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) { if (!varTypeIsFloating(parentNode->TypeGet())) { // Make sure we have an actual immediate if (!childNode->IsCnsIntOrI()) return false; if (childNode->gtIntCon.ImmedValNeedsReloc(comp)) return false; // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. target_ssize_t immVal = (target_ssize_t)childNode->gtIntCon.gtIconVal; emitAttr attr = emitActualTypeSize(childNode->TypeGet()); emitAttr size = EA_SIZE(attr); #ifdef _TARGET_ARM_ insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif switch (parentNode->OperGet()) { case GT_ADD: case GT_SUB: #ifdef _TARGET_ARM64_ case GT_CMPXCHG: case GT_LOCKADD: case GT_XADD: return comp->compSupports(InstructionSet_Atomics) ? false : emitter::emitIns_valid_imm_for_add(immVal, size); #elif defined(_TARGET_ARM_) return emitter::emitIns_valid_imm_for_add(immVal, flags); #endif break; #ifdef _TARGET_ARM64_ case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: return emitter::emitIns_valid_imm_for_cmp(immVal, size); case GT_AND: case GT_OR: case GT_XOR: case GT_TEST_EQ: case GT_TEST_NE: return emitter::emitIns_valid_imm_for_alu(immVal, size); case GT_JCMP: assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); return true; #elif defined(_TARGET_ARM_) case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_CMP: case GT_AND: case GT_OR: case GT_XOR: return emitter::emitIns_valid_imm_for_alu(immVal); #endif // _TARGET_ARM_ #ifdef _TARGET_ARM64_ case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: if (immVal == 0) return true; break; #endif default: break; } } return false; }
//------------------------------------------------------------------------ // IsContainableImmed: Is an immediate encodable in-place? // // Return Value: // True if the immediate can be folded into an instruction, // for example small enough and non-relocatable. bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) { if (varTypeIsFloating(parentNode->TypeGet())) { // We can contain a floating point 0.0 constant in a compare instruction switch (parentNode->OperGet()) { default: return false; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: if (childNode->IsIntegralConst(0)) { // TODO-ARM-Cleanup: not tested yet. NYI_ARM("ARM IsContainableImmed for floating point type"); return true; } break; } } else { // Make sure we have an actual immediate if (!childNode->IsCnsIntOrI()) return false; if (childNode->IsIconHandle() && comp->opts.compReloc) return false; ssize_t immVal = childNode->gtIntCon.gtIconVal; emitAttr attr = emitActualTypeSize(childNode->TypeGet()); emitAttr size = EA_SIZE(attr); #ifdef _TARGET_ARM_ insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif switch (parentNode->OperGet()) { default: return false; case GT_ADD: case GT_SUB: #ifdef _TARGET_ARM64_ case GT_CMPXCHG: case GT_LOCKADD: case GT_XADD: return emitter::emitIns_valid_imm_for_add(immVal, size); #elif defined(_TARGET_ARM_) return emitter::emitIns_valid_imm_for_add(immVal, flags); #endif break; #ifdef _TARGET_ARM64_ case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: return emitter::emitIns_valid_imm_for_cmp(immVal, size); break; case GT_AND: case GT_OR: case GT_XOR: case GT_TEST_EQ: case GT_TEST_NE: return emitter::emitIns_valid_imm_for_alu(immVal, size); break; case GT_JCMP: assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); return true; break; #elif defined(_TARGET_ARM_) case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_CMP: case GT_AND: case GT_OR: case GT_XOR: return emitter::emitIns_valid_imm_for_alu(immVal); break; #endif // _TARGET_ARM_ #ifdef _TARGET_ARM64_ case GT_STORE_LCL_VAR: if (immVal == 0) return true; break; #endif } } return false; }
void CodeGen::genFloatArith (GenTreePtr tree, RegSet::RegisterPreference *tgtPref) { var_types type = tree->TypeGet(); genTreeOps oper = tree->OperGet(); GenTreePtr op1 = tree->gtGetOp1(); GenTreePtr op2 = tree->gtGetOp2(); regNumber tgtReg; unsigned varNum; LclVarDsc * varDsc; VARSET_TP varBit; assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV); RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, RBM_NONE); if (tgtPref == NULL) { tgtPref = &defaultPref; } // Is the op2 (RHS)more complex than op1 (LHS)? // if (tree->gtFlags & GTF_REVERSE_OPS) { regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs); RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs); // Evaluate op2 into a floating point register // genCodeForTreeFloat(op2, &pref); regSet.SetUsedRegFloat(op2, true); // Evaluate op1 into any floating point register // genCodeForTreeFloat(op1); regSet.SetUsedRegFloat(op1, true); regNumber op1Reg = op1->gtRegNum; regMaskTP op1Mask = genRegMaskFloat(op1Reg, type); // Fix 388445 ARM JitStress WP7 regSet.rsLockUsedReg(op1Mask); genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG); noway_assert(op2->gtFlags & GTF_REG_VAL); regSet.rsUnlockUsedReg(op1Mask); regSet.SetUsedRegFloat(op1, false); regSet.SetUsedRegFloat(op2, false); } else { regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op2->gtRsvdRegs); RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs); // Evaluate op1 into a floating point register // genCodeForTreeFloat(op1, &pref); regSet.SetUsedRegFloat(op1, true); // Evaluate op2 into any floating point register // genCodeForTreeFloat(op2); regSet.SetUsedRegFloat(op2, true); regNumber op2Reg = op2->gtRegNum; regMaskTP op2Mask = genRegMaskFloat(op2Reg, type); // Fix 388445 ARM JitStress WP7 regSet.rsLockUsedReg(op2Mask); genRecoverReg(op1, RBM_ALLFLOAT, RegSet::KEEP_REG); noway_assert(op1->gtFlags & GTF_REG_VAL); regSet.rsUnlockUsedReg(op2Mask); regSet.SetUsedRegFloat(op2, false); regSet.SetUsedRegFloat(op1, false); } tgtReg = regSet.PickRegFloat(type, tgtPref, true); noway_assert(op1->gtFlags & GTF_REG_VAL); noway_assert(op2->gtFlags & GTF_REG_VAL); inst_RV_RV_RV(ins_MathOp(oper, type), tgtReg, op1->gtRegNum, op2->gtRegNum, emitActualTypeSize(type)); genCodeForTreeFloat_DONE(tree, tgtReg); }
//------------------------------------------------------------------------ // genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node // // Arguments: // node - The hardware intrinsic node // void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = getEmitter(); int ival = -1; if ((op1 != nullptr) && !op1->OperIsList()) { op1Reg = op1->gtRegNum; genConsumeOperands(node); } switch (intrinsicID) { // All integer overloads are handled by table codegen case NI_SSE2_CompareLessThan: { assert(op1 != nullptr); assert(op2 != nullptr); assert(baseType == TYP_DOUBLE); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); op2Reg = op2->gtRegNum; ival = Compiler::ivalOfHWIntrinsic(intrinsicID); assert(ival != -1); emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival); break; } case NI_SSE2_CompareEqualOrderedScalar: case NI_SSE2_CompareEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE2_CompareGreaterThanOrderedScalar: case NI_SSE2_CompareGreaterThanUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareGreaterThanOrEqualOrderedScalar: case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareLessThanOrderedScalar: case NI_SSE2_CompareLessThanUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareLessThanOrEqualOrderedScalar: case NI_SSE2_CompareLessThanOrEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE2_CompareNotEqualOrderedScalar: case NI_SSE2_CompareNotEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); op2Reg = op2->gtRegNum; instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg); break; } case NI_SSE2_ConvertScalarToVector128Double: case NI_SSE2_ConvertScalarToVector128Single: { assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE); assert(op1 != nullptr); assert(op2 != nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); genHWIntrinsic_R_R_RM(node, ins); break; } case NI_SSE2_ConvertScalarToVector128Int64: case NI_SSE2_ConvertScalarToVector128UInt64: { assert(baseType == TYP_LONG || baseType == TYP_ULONG); assert(op1 != nullptr); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); // TODO-XArch-CQ -> use of type size of TYP_SIMD16 leads to // instruction register encoding errors for SSE legacy encoding emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg); break; } case NI_SSE2_ConvertToDouble: { assert(op2 == nullptr); if (op1Reg != targetReg) { instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg); } break; } case NI_SSE2_ConvertToInt32: case NI_SSE2_ConvertToInt64: case NI_SSE2_ConvertToUInt32: case NI_SSE2_ConvertToUInt64: { assert(op2 == nullptr); assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT) { emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg); } else { emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg); } break; } case NI_SSE2_LoadFence: { assert(baseType == TYP_VOID); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns(INS_lfence); break; } case NI_SSE2_MemoryFence: { assert(baseType == TYP_VOID); assert(op1 == nullptr); assert(op2 == nullptr); emit->emitIns(INS_mfence); break; } case NI_SSE2_MoveMask: { assert(op2 == nullptr); assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg); break; } case NI_SSE2_SetScalarVector128: { assert(baseType == TYP_DOUBLE); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); if (op1Reg == targetReg) { regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); emit->emitIns_R_R(INS_movapd, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg); op1Reg = tmpReg; } emit->emitIns_SIMD_R_R_R(INS_xorpd, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg); break; } case NI_SSE2_SetZeroVector128: { assert(baseType != TYP_FLOAT); assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE); assert(op1 == nullptr); assert(op2 == nullptr); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); break; } default: unreached(); break; } genProduceReg(node); }