Пример #1
0
//------------------------------------------------------------------------
// IsContainableImmed: Is an immediate encodable in-place?
//
// Return Value:
//    True if the immediate can be folded into an instruction,
//    for example small enough and non-relocatable.
//
// TODO-CQ: we can contain a floating point 0.0 constant in a compare instruction
// (vcmp on arm, fcmp on arm64).
//
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
    if (!varTypeIsFloating(parentNode->TypeGet()))
    {
        // Make sure we have an actual immediate
        if (!childNode->IsCnsIntOrI())
            return false;
        if (childNode->gtIntCon.ImmedValNeedsReloc(comp))
            return false;

        // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type.
        target_ssize_t immVal = (target_ssize_t)childNode->gtIntCon.gtIconVal;
        emitAttr       attr   = emitActualTypeSize(childNode->TypeGet());
        emitAttr       size   = EA_SIZE(attr);
#ifdef _TARGET_ARM_
        insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
#endif

        switch (parentNode->OperGet())
        {
            case GT_ADD:
            case GT_SUB:
#ifdef _TARGET_ARM64_
            case GT_CMPXCHG:
            case GT_LOCKADD:
            case GT_XADD:
                return comp->compSupports(InstructionSet_Atomics) ? false
                                                                  : emitter::emitIns_valid_imm_for_add(immVal, size);
#elif defined(_TARGET_ARM_)
                return emitter::emitIns_valid_imm_for_add(immVal, flags);
#endif
                break;

#ifdef _TARGET_ARM64_
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                return emitter::emitIns_valid_imm_for_cmp(immVal, size);
            case GT_AND:
            case GT_OR:
            case GT_XOR:
            case GT_TEST_EQ:
            case GT_TEST_NE:
                return emitter::emitIns_valid_imm_for_alu(immVal, size);
            case GT_JCMP:
                assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
                return true;
#elif defined(_TARGET_ARM_)
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
            case GT_CMP:
            case GT_AND:
            case GT_OR:
            case GT_XOR:
                return emitter::emitIns_valid_imm_for_alu(immVal);
#endif // _TARGET_ARM_

#ifdef _TARGET_ARM64_
            case GT_STORE_LCL_FLD:
            case GT_STORE_LCL_VAR:
                if (immVal == 0)
                    return true;
                break;
#endif

            default:
                break;
        }
    }

    return false;
}
Пример #2
0
//------------------------------------------------------------------------
// IsContainableImmed: Is an immediate encodable in-place?
//
// Return Value:
//    True if the immediate can be folded into an instruction,
//    for example small enough and non-relocatable.
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
    if (varTypeIsFloating(parentNode->TypeGet()))
    {
        // We can contain a floating point 0.0 constant in a compare instruction
        switch (parentNode->OperGet())
        {
            default:
                return false;

            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                if (childNode->IsIntegralConst(0))
                {
                    // TODO-ARM-Cleanup: not tested yet.
                    NYI_ARM("ARM IsContainableImmed for floating point type");

                    return true;
                }
                break;
        }
    }
    else
    {
        // Make sure we have an actual immediate
        if (!childNode->IsCnsIntOrI())
            return false;
        if (childNode->IsIconHandle() && comp->opts.compReloc)
            return false;

        ssize_t  immVal = childNode->gtIntCon.gtIconVal;
        emitAttr attr   = emitActualTypeSize(childNode->TypeGet());
        emitAttr size   = EA_SIZE(attr);
#ifdef _TARGET_ARM_
        insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
#endif

        switch (parentNode->OperGet())
        {
            default:
                return false;

            case GT_ADD:
            case GT_SUB:
#ifdef _TARGET_ARM64_
            case GT_CMPXCHG:
            case GT_LOCKADD:
            case GT_XADD:
                return emitter::emitIns_valid_imm_for_add(immVal, size);
#elif defined(_TARGET_ARM_)
                return emitter::emitIns_valid_imm_for_add(immVal, flags);
#endif
                break;

#ifdef _TARGET_ARM64_
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                return emitter::emitIns_valid_imm_for_cmp(immVal, size);
                break;
            case GT_AND:
            case GT_OR:
            case GT_XOR:
            case GT_TEST_EQ:
            case GT_TEST_NE:
                return emitter::emitIns_valid_imm_for_alu(immVal, size);
                break;
            case GT_JCMP:
                assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
                return true;
                break;
#elif defined(_TARGET_ARM_)
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
            case GT_CMP:
            case GT_AND:
            case GT_OR:
            case GT_XOR:
                return emitter::emitIns_valid_imm_for_alu(immVal);
                break;
#endif // _TARGET_ARM_

#ifdef _TARGET_ARM64_
            case GT_STORE_LCL_VAR:
                if (immVal == 0)
                    return true;
                break;
#endif
        }
    }

    return false;
}
Пример #3
0
void CodeGen::genFloatArith (GenTreePtr tree,
                             RegSet::RegisterPreference *tgtPref)
{
    var_types       type    = tree->TypeGet();
    genTreeOps      oper    = tree->OperGet();
    GenTreePtr      op1     = tree->gtGetOp1();
    GenTreePtr      op2     = tree->gtGetOp2();

    regNumber       tgtReg;
    unsigned        varNum;
    LclVarDsc   *   varDsc;
    VARSET_TP       varBit;

    assert(oper == GT_ADD ||
           oper == GT_SUB ||
           oper == GT_MUL ||
           oper == GT_DIV);

    RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, RBM_NONE);
    if (tgtPref == NULL)
    {
        tgtPref = &defaultPref;
    }

    // Is the op2 (RHS)more complex than op1 (LHS)?
    //
    if  (tree->gtFlags & GTF_REVERSE_OPS)
    {
        regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs);
        RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);

        // Evaluate op2 into a floating point register 
        //
        genCodeForTreeFloat(op2, &pref);
        regSet.SetUsedRegFloat(op2, true);

        // Evaluate op1 into any floating point register 
        //
        genCodeForTreeFloat(op1);
        regSet.SetUsedRegFloat(op1, true);

        regNumber  op1Reg  = op1->gtRegNum;
        regMaskTP  op1Mask = genRegMaskFloat(op1Reg, type);

        // Fix 388445 ARM JitStress WP7
        regSet.rsLockUsedReg(op1Mask);
        genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG);
        noway_assert(op2->gtFlags & GTF_REG_VAL);
        regSet.rsUnlockUsedReg(op1Mask);

        regSet.SetUsedRegFloat(op1, false);
        regSet.SetUsedRegFloat(op2, false);
    }
    else
    {
        regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op2->gtRsvdRegs);
        RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);

        // Evaluate op1 into a floating point register 
        //
        genCodeForTreeFloat(op1, &pref);
        regSet.SetUsedRegFloat(op1, true);

        // Evaluate op2 into any floating point register 
        //
        genCodeForTreeFloat(op2);
        regSet.SetUsedRegFloat(op2, true);
        
        regNumber  op2Reg  = op2->gtRegNum;
        regMaskTP  op2Mask = genRegMaskFloat(op2Reg, type);

        // Fix 388445 ARM JitStress WP7
        regSet.rsLockUsedReg(op2Mask);
        genRecoverReg(op1, RBM_ALLFLOAT, RegSet::KEEP_REG);
        noway_assert(op1->gtFlags & GTF_REG_VAL);
        regSet.rsUnlockUsedReg(op2Mask);

        regSet.SetUsedRegFloat(op2, false); 
        regSet.SetUsedRegFloat(op1, false);
    }

    tgtReg = regSet.PickRegFloat(type, tgtPref, true);

    noway_assert(op1->gtFlags & GTF_REG_VAL);
    noway_assert(op2->gtFlags & GTF_REG_VAL);

    inst_RV_RV_RV(ins_MathOp(oper, type), tgtReg, op1->gtRegNum, op2->gtRegNum, emitActualTypeSize(type));

    genCodeForTreeFloat_DONE(tree, tgtReg);
}
Пример #4
0
//------------------------------------------------------------------------
// genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node
//
// Arguments:
//    node - The hardware intrinsic node
//
void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
{
    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
    GenTree*       op1         = node->gtGetOp1();
    GenTree*       op2         = node->gtGetOp2();
    regNumber      targetReg   = node->gtRegNum;
    var_types      targetType  = node->TypeGet();
    var_types      baseType    = node->gtSIMDBaseType;
    regNumber      op1Reg      = REG_NA;
    regNumber      op2Reg      = REG_NA;
    emitter*       emit        = getEmitter();
    int            ival        = -1;

    if ((op1 != nullptr) && !op1->OperIsList())
    {
        op1Reg = op1->gtRegNum;
        genConsumeOperands(node);
    }

    switch (intrinsicID)
    {
        // All integer overloads are handled by table codegen
        case NI_SSE2_CompareLessThan:
        {
            assert(op1 != nullptr);
            assert(op2 != nullptr);
            assert(baseType == TYP_DOUBLE);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            op2Reg          = op2->gtRegNum;
            ival            = Compiler::ivalOfHWIntrinsic(intrinsicID);
            assert(ival != -1);
            emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);

            break;
        }

        case NI_SSE2_CompareEqualOrderedScalar:
        case NI_SSE2_CompareEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg             = op2->gtRegNum;
            regNumber   tmpReg = node->GetSingleTempReg();
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE2_CompareGreaterThanOrderedScalar:
        case NI_SSE2_CompareGreaterThanUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareGreaterThanOrEqualOrderedScalar:
        case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareLessThanOrderedScalar:
        case NI_SSE2_CompareLessThanUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareLessThanOrEqualOrderedScalar:
        case NI_SSE2_CompareLessThanOrEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg          = op2->gtRegNum;
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
            emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
            break;
        }

        case NI_SSE2_CompareNotEqualOrderedScalar:
        case NI_SSE2_CompareNotEqualUnorderedScalar:
        {
            assert(baseType == TYP_DOUBLE);
            op2Reg             = op2->gtRegNum;
            instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            regNumber   tmpReg = node->GetSingleTempReg();

            // Ensure we aren't overwriting targetReg
            assert(tmpReg != targetReg);

            emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
            emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
            emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
            emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
            emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
            break;
        }

        case NI_SSE2_ConvertScalarToVector128Double:
        case NI_SSE2_ConvertScalarToVector128Single:
        {
            assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
            assert(op1 != nullptr);
            assert(op2 != nullptr);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            genHWIntrinsic_R_R_RM(node, ins);
            break;
        }

        case NI_SSE2_ConvertScalarToVector128Int64:
        case NI_SSE2_ConvertScalarToVector128UInt64:
        {
            assert(baseType == TYP_LONG || baseType == TYP_ULONG);
            assert(op1 != nullptr);
            assert(op2 == nullptr);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            // TODO-XArch-CQ -> use of type size of TYP_SIMD16 leads to
            // instruction register encoding errors for SSE legacy encoding
            emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg);
            break;
        }

        case NI_SSE2_ConvertToDouble:
        {
            assert(op2 == nullptr);
            if (op1Reg != targetReg)
            {
                instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
                emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
            }
            break;
        }

        case NI_SSE2_ConvertToInt32:
        case NI_SSE2_ConvertToInt64:
        case NI_SSE2_ConvertToUInt32:
        case NI_SSE2_ConvertToUInt64:
        {
            assert(op2 == nullptr);
            assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT ||
                   baseType == TYP_LONG || baseType == TYP_ULONG);
            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT)
            {
                emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
            }
            else
            {
                emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
            }
            break;
        }

        case NI_SSE2_LoadFence:
        {
            assert(baseType == TYP_VOID);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns(INS_lfence);
            break;
        }

        case NI_SSE2_MemoryFence:
        {
            assert(baseType == TYP_VOID);
            assert(op1 == nullptr);
            assert(op2 == nullptr);
            emit->emitIns(INS_mfence);
            break;
        }

        case NI_SSE2_MoveMask:
        {
            assert(op2 == nullptr);
            assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
            break;
        }

        case NI_SSE2_SetScalarVector128:
        {
            assert(baseType == TYP_DOUBLE);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
            if (op1Reg == targetReg)
            {
                regNumber tmpReg = node->GetSingleTempReg();

                // Ensure we aren't overwriting targetReg
                assert(tmpReg != targetReg);

                emit->emitIns_R_R(INS_movapd, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
                op1Reg = tmpReg;
            }

            emit->emitIns_SIMD_R_R_R(INS_xorpd, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
            break;
        }

        case NI_SSE2_SetZeroVector128:
        {
            assert(baseType != TYP_FLOAT);
            assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE);
            assert(op1 == nullptr);
            assert(op2 == nullptr);

            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
            emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}