Example #1
0
always_inline VecType vec_exp_tanh_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;

    /* Express e**x = e**g 2**n
     *   = e**g e**( n loge(2) )
     *   = e**( g + n loge(2) )
     */

    // black magic
    VecType x = arg;
    VecType z = round(VecType(1.44269504088896341f) * x);
    int_vec n = z.truncate_to_int();
    x -= z*VecType(0.693359375f);
    x -= z*VecType(-2.12194440e-4f);

    /* Theoretical peak relative error in [-0.5, +0.5] is 3.5e-8. */
    VecType p = 1.f +
        x * (1.00000035762786865234375f +
        x * (0.4999996721744537353515625f +
        x * (0.16665561497211456298828125f +
        x * (4.167006909847259521484375e-2f +
        x * (8.420792408287525177001953125e-3f +
        x * 1.386119984090328216552734375e-3f)))));

    /* multiply by power of 2 */
    VecType approx = ldexp_float(p, n);

    return approx;
}
Example #2
0
always_inline VecType vec_sin_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;

    const typename VecType::float_type four_over_pi = 1.27323954473516268615107010698011489627567716592367;

    VecType sign = arg & VecType::gen_sign_mask();
    VecType abs_arg = arg & VecType::gen_abs_mask();

    VecType y = abs_arg * four_over_pi;

    int_vec j = y.truncate_to_int();

    /* cephes: j=(j+1) & (~1) */
    j = (j + int_vec(1)) & int_vec(~1);
    y = j.convert_to_float();

    /* sign based on quadrant */
    VecType swap_sign_bit = slli(j & int_vec(4), 29);
    sign = sign ^ swap_sign_bit;

    /* polynomial mask */
    VecType poly_mask = VecType (mask_eq(j & int_vec(2), int_vec(0)));

    /* black magic */
    static float DP1 = 0.78515625;
    static float DP2 = 2.4187564849853515625e-4;
    static float DP3 = 3.77489497744594108e-8;
    VecType base = ((abs_arg - y * DP1) - y * DP2) - y * DP3;

    /* [0..pi/4] */
    VecType z = base * base;
    VecType p1 = ((  2.443315711809948E-005 * z
        - 1.388731625493765E-003) * z
        + 4.166664568298827E-002) * z * z
        -0.5f * z + 1.0
        ;

    /* [pi/4..pi/2] */
    VecType p2 = ((-1.9515295891E-4 * z
         + 8.3321608736E-3) * z
         - 1.6666654611E-1) * z * base + base;

    VecType approximation =  select(p1, p2, poly_mask);

    return approximation ^ sign;
}
always_inline VecType vec_exp_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;

    /* Express e**x = e**g 2**n
     *   = e**g e**( n loge(2) )
     *   = e**( g + n loge(2) )
     */

    // black magic
    VecType x = arg;
    VecType z = round(VecType(1.44269504088896341f) * x);
    int_vec n = z.truncate_to_int();
    x -= z*VecType(0.693359375f);
    x -= z*VecType(-2.12194440e-4f);

    /* Theoretical peak relative error in [-0.5, +0.5] is 3.5e-8. */
    VecType p = VecType(VecType::gen_one()) +
        x * (1.00000035762786865234375f +
        x * (0.4999996721744537353515625f +
        x * (0.16665561497211456298828125f +
        x * (4.167006909847259521484375e-2f +
        x * (8.420792408287525177001953125e-3f +
        x * 1.386119984090328216552734375e-3f)))));

    /* multiply by power of 2 */
    VecType approx = ldexp_float(p, n);

    /* handle min/max boundaries */
    const VecType maxlogf(88.72283905206835f);
//    const VecType minlogf(-103.278929903431851103f);
	const VecType minlogf = -maxlogf;
    const VecType max_float(std::numeric_limits<float>::max());
    const VecType zero = VecType::gen_zero();

    VecType too_large = mask_gt(arg, maxlogf);
    VecType too_small = mask_lt(arg, minlogf);

    VecType ret = select(approx, max_float, too_large);
    ret = select(ret, zero, too_small);

    return ret;
}
Example #4
0
always_inline VecType vec_tan_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;
    const typename VecType::float_type four_over_pi = 1.27323954473516268615107010698011489627567716592367;

    VecType sign = arg & VecType::gen_sign_mask();
    VecType abs_arg = arg & VecType::gen_abs_mask();

    VecType y = abs_arg * four_over_pi;
    int_vec j = y.truncate_to_int();

    /* cephes: j=(j+1) & (~1) */
    j = (j + int_vec(1)) & int_vec(~1);
    y = j.convert_to_float();

    /* approximation mask */
    VecType poly_mask = VecType (mask_eq(j & int_vec(2), int_vec(0)));

    /* black magic */
    static float DP1 = 0.78515625;
    static float DP2 = 2.4187564849853515625e-4;
    static float DP3 = 3.77489497744594108e-8;
    VecType base = ((abs_arg - y * DP1) - y * DP2) - y * DP3;

    VecType x = base; VecType x2 = x*x;

    // sollya: fpminimax(tan(x), [|3,5,7,9,11,13|], [|24...|], [-pi/4,pi/4], x);
    VecType approx =
        x + x * x2 * (0.3333315551280975341796875 + x2 * (0.1333882510662078857421875 + x2 * (5.3409568965435028076171875e-2 + x2 * (2.443529665470123291015625e-2 + x2 * (3.1127030961215496063232421875e-3 + x2 * 9.3892104923725128173828125e-3)))));

    //VecType recip = -reciprocal(approx);
    VecType recip = -1.0 / approx;

    VecType approximation = select(recip, approx, poly_mask);

    return approximation ^ sign;
}