Пример #1
0
static void __stdcall
comb_mask_1_simd(uint8_t* dstp, const uint8_t* srcp, const int dpitch,
                 const int spitch, const int cthresh, const int width,
                 const int height) noexcept
{
    const uint8_t* sc = srcp;
    const uint8_t* sb = sc + spitch;
    const uint8_t* sd = sc + spitch;

    const V cth = set1_i16<V>(static_cast<int16_t>(cthresh));
    const V all = cmpeq_i8(cth, cth);

    constexpr int step = sizeof(V) / 2;

    for (int y = 0; y < height; ++y) {
        for (int x = 0; x < width; x += step) {
            V xb = load_half<V>(sb + x);
            V xc = load_half<V>(sc + x);
            V xd = load_half<V>(sd + x);
            xb = sub_i16(xb, xc);
            xd = sub_i16(xd, xc);
            xc = andnot(mulhi(xb, xd), mullo(xb, xd));
            xc = cmpgt_u16(xc, cth, all);
            store_half(dstp + x, xc);
        }
        sb = sc;
        sc = sd;
        sd += (y < height - 2) ? spitch : -spitch;
        dstp += dpitch;
    }
}
Пример #2
0
always_inline VecFloat ldexp_float(VecFloat const & x, typename VecFloat::int_vec const & n)
{
    typedef typename VecFloat::int_vec int_vec;

    const VecFloat exponent_mask = VecFloat::gen_exp_mask();
    const VecFloat exponent = exponent_mask & x;
    const VecFloat x_wo_x = andnot(exponent_mask, x);     // clear exponent

    int_vec new_exp = slli(n, 16+7) + int_vec(exponent);  // new exponent
    VecFloat new_exp_float(new_exp);
    VecFloat ret = x_wo_x | new_exp_float;
    return ret;
}
Пример #3
0
always_inline VecFloat frexp_float(VecFloat const & x, typename VecFloat::int_vec & exp)
{
    typedef typename VecFloat::int_vec int_vec;

    const VecFloat exponent_mask = VecFloat::gen_exp_mask();
    const VecFloat exponent = exponent_mask & x;
    const VecFloat x_wo_x = andnot(exponent_mask, x);             // clear exponent

    const int_vec exp_int(exponent);

    exp = srli(exp_int, 16+7) - int_vec(126);
    return x_wo_x | VecFloat::gen_exp_mask_1();
}
Пример #4
0
always_inline VecType vec_cos_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;

    const typename VecType::float_type four_over_pi = 1.27323954473516268615107010698011489627567716592367;

    VecType abs_arg = arg & VecType::gen_abs_mask();

    VecType y = abs_arg * four_over_pi;

    int_vec j = y.truncate_to_int();

    /* cephes: j=(j+1) & (~1) */
    j = (j + int_vec(1)) & int_vec(~1);
    y = j.convert_to_float();

    /* sign based on quadrant */
    int_vec jm2 = j - int_vec(2);
    VecType sign = slli(andnot(jm2, int_vec(4)), 29);

    /* polynomial mask */
    VecType poly_mask = VecType (mask_eq(jm2 & int_vec(2), int_vec(0)));

    /* black magic */
    static float DP1 = 0.78515625;
    static float DP2 = 2.4187564849853515625e-4;
    static float DP3 = 3.77489497744594108e-8;
    VecType base = ((abs_arg - y * DP1) - y * DP2) - y * DP3;

    /* [0..pi/4] */
    VecType z = base * base;
    VecType p1 = ((  2.443315711809948E-005 * z
        - 1.388731625493765E-003) * z
        + 4.166664568298827E-002) * z * z
        -0.5f * z + 1
        ;

    /* [pi/4..pi/2] */
    VecType p2 = ((-1.9515295891E-4 * z
         + 8.3321608736E-3) * z
         - 1.6666654611E-1) * z * base + base;

    VecType approximation =  select(p1, p2, poly_mask);

    return approximation ^ sign;
}
Пример #5
0
always_inline VecType vec_select(VecType lhs, VecType rhs, VecType bitmask)
{
    return andnot(bitmask, lhs) | (bitmask & rhs);
}
always_inline VecType vec_select(VecType lhs, VecType rhs, VecType bitmask)
{
    const VecType result = andnot(bitmask, lhs) | (bitmask & rhs);
    return result;
}