always_inline VecType vec_pow(VecType arg1, VecType arg2)
{
    const VecType zero      = VecType::gen_zero();
    const VecType arg1_zero = mask_eq(arg1, zero);

    const VecType result = exp(arg2 * log(arg1));
    return select(result, zero, arg1_zero);
}
Exemplo n.º 2
0
always_inline VecType vec_sin_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;

    const typename VecType::float_type four_over_pi = 1.27323954473516268615107010698011489627567716592367;

    VecType sign = arg & VecType::gen_sign_mask();
    VecType abs_arg = arg & VecType::gen_abs_mask();

    VecType y = abs_arg * four_over_pi;

    int_vec j = y.truncate_to_int();

    /* cephes: j=(j+1) & (~1) */
    j = (j + int_vec(1)) & int_vec(~1);
    y = j.convert_to_float();

    /* sign based on quadrant */
    VecType swap_sign_bit = slli(j & int_vec(4), 29);
    sign = sign ^ swap_sign_bit;

    /* polynomial mask */
    VecType poly_mask = VecType (mask_eq(j & int_vec(2), int_vec(0)));

    /* black magic */
    static float DP1 = 0.78515625;
    static float DP2 = 2.4187564849853515625e-4;
    static float DP3 = 3.77489497744594108e-8;
    VecType base = ((abs_arg - y * DP1) - y * DP2) - y * DP3;

    /* [0..pi/4] */
    VecType z = base * base;
    VecType p1 = ((  2.443315711809948E-005 * z
        - 1.388731625493765E-003) * z
        + 4.166664568298827E-002) * z * z
        -0.5f * z + 1.0
        ;

    /* [pi/4..pi/2] */
    VecType p2 = ((-1.9515295891E-4 * z
         + 8.3321608736E-3) * z
         - 1.6666654611E-1) * z * base + base;

    VecType approximation =  select(p1, p2, poly_mask);

    return approximation ^ sign;
}
Exemplo n.º 3
0
always_inline VecType vec_tan_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;
    const typename VecType::float_type four_over_pi = 1.27323954473516268615107010698011489627567716592367;

    VecType sign = arg & VecType::gen_sign_mask();
    VecType abs_arg = arg & VecType::gen_abs_mask();

    VecType y = abs_arg * four_over_pi;
    int_vec j = y.truncate_to_int();

    /* cephes: j=(j+1) & (~1) */
    j = (j + int_vec(1)) & int_vec(~1);
    y = j.convert_to_float();

    /* approximation mask */
    VecType poly_mask = VecType (mask_eq(j & int_vec(2), int_vec(0)));

    /* black magic */
    static float DP1 = 0.78515625;
    static float DP2 = 2.4187564849853515625e-4;
    static float DP3 = 3.77489497744594108e-8;
    VecType base = ((abs_arg - y * DP1) - y * DP2) - y * DP3;

    VecType x = base; VecType x2 = x*x;

    // sollya: fpminimax(tan(x), [|3,5,7,9,11,13|], [|24...|], [-pi/4,pi/4], x);
    VecType approx =
        x + x * x2 * (0.3333315551280975341796875 + x2 * (0.1333882510662078857421875 + x2 * (5.3409568965435028076171875e-2 + x2 * (2.443529665470123291015625e-2 + x2 * (3.1127030961215496063232421875e-3 + x2 * 9.3892104923725128173828125e-3)))));

    //VecType recip = -reciprocal(approx);
    VecType recip = -1.0 / approx;

    VecType approximation = select(recip, approx, poly_mask);

    return approximation ^ sign;
}