Ejemplo n.º 1
0
always_inline VecType vec_atan_float(VecType const & arg)
{
    const VecType sign_arg = arg & VecType::gen_sign_mask();
    const VecType abs_arg  = arg & VecType::gen_abs_mask();
    const VecType one      = VecType::gen_one();
    VecType zero           = VecType::gen_zero();

    VecType arg_range0 = abs_arg;
    VecType arg_range1 = (abs_arg - one) / (abs_arg + one);
    VecType arg_range2 = -one / abs_arg;

    VecType offset_range0 = zero;
    VecType offset_range1 = 0.78539816339744830961566084581987572104929234984377;
    VecType offset_range2 = 1.57079632679489661923132169163975144209858469968754;

    VecType mask_range_01 = mask_gt(abs_arg, 0.41421356237309504880168872420969807856967187537695);
    VecType mask_range_12 = mask_gt(abs_arg, 2.41421356237309504880168872420969807856967187537698);

    VecType approx_arg = select(arg_range0,
                                select(arg_range1, arg_range2, mask_range_12),
                                mask_range_01);

    VecType approx_offset = select(offset_range0,
                                   select(offset_range1, offset_range2, mask_range_12),
                                   mask_range_01);


    VecType x = approx_arg;
    VecType x2 = x*x;

    VecType approx = approx_offset +
       x + x * x2 * (-0.333329498767852783203125 + x2 * (0.19977732002735137939453125 + x2 * (-0.1387787759304046630859375 + x2 * 8.054284751415252685546875e-2)));

    return approx ^ sign_arg;
}
Ejemplo n.º 2
0
always_inline VecType vec_acos_float(VecType const & arg)
{
    VecType abs_arg = arg & VecType::gen_abs_mask();
    VecType one = VecType::gen_one();
    VecType half = VecType::gen_05();
    VecType zero = VecType::gen_zero();

    VecType arg_greater_05 = mask_gt(abs_arg, half);
    VecType asin_arg_greater_05 = sqrt((one - abs_arg) * half);

    VecType asin_arg = select(arg, asin_arg_greater_05, arg_greater_05);

    VecType asin = vec_asin_float(asin_arg);
    VecType two_asin = asin + asin;

    VecType ret_m1_m05 = 3.1415927410125732421875 - two_asin;
    VecType ret_m05_05 = 1.57079637050628662109375 - asin;
    VecType ret_05_1 = two_asin;

    VecType ret_m05_1 = select(ret_m05_05, ret_05_1, mask_gt(arg, half));
    VecType ret = select(ret_m1_m05, ret_m05_1, mask_gt(arg, -0.5));

    // |arg| > 1: return 0
    ret = select(ret, zero, mask_gt(abs_arg, one));
    return ret;
}
Ejemplo n.º 3
0
always_inline VecType vec_asin_float(VecType const & arg)
{
    VecType abs_arg = arg & VecType::gen_abs_mask();
    VecType sign = arg & VecType::gen_sign_mask();
    VecType one = VecType::gen_one();
    VecType half = VecType::gen_05();
    VecType zero = VecType::gen_zero();

    // range redution: asin(x) = pi/2 - 2 asin( sqrt( (1-x)/2 ) ). for |arg| > 0.5
    VecType arg_greater_05 = mask_gt(abs_arg, 0.5);
    VecType arg_reduced_sqr = (one - abs_arg) * half;
    VecType arg_reduced = sqrt((one - abs_arg) * half);
    VecType approx_arg = select(abs_arg, arg_reduced, arg_greater_05);


    VecType z = select(abs_arg*abs_arg, arg_reduced_sqr, arg_greater_05);

    VecType x = approx_arg; VecType x2 = x*x;
    // sollya: fpminimax(asin(x), [|3,5,7,9,11|], [|24...|], [0.000000000000000000001,0.5], x);
    VecType approx_poly = x + x * x2 * (0.166667520999908447265625 + x2 * (7.4953101575374603271484375e-2 + x2 * (4.54690195620059967041015625e-2 + x2 * (2.418550290167331695556640625e-2 + x2 * 4.21570129692554473876953125e-2))));

    VecType approx_poly_reduced = 1.57079637050628662109375 - approx_poly - approx_poly;
    VecType approx = select(approx_poly, approx_poly_reduced, arg_greater_05);

    approx = approx ^ sign;
    // |arg| > 1: return 0
    VecType ret = select(approx, zero, mask_gt(abs_arg, one));
    return ret;
}
Ejemplo n.º 4
0
always_inline VecType vec_floor_float(VecType const & arg)
{
    typedef VecType vec;

    const vec rounded = vec_round_float(arg);

    const vec rounded_larger = mask_gt(rounded, arg);
    const vec add            = rounded_larger & vec::gen_one();
    return rounded - add;
}
Ejemplo n.º 5
0
always_inline VecType vec_tanh_float(VecType const & arg)
{
    /* this order of computation (large->small->medium) seems to be the most efficient on sse*/

    const VecType sign_arg = arg & VecType::gen_sign_mask();
    const VecType abs_arg  = arg ^ sign_arg;
    const VecType one      = VecType::gen_one();
    const VecType two (2.f);
    const VecType maxlogf_2 (22.f);
    const VecType limit_small (0.625f);

    /* large values */
    const VecType abs_big          = mask_gt(abs_arg, maxlogf_2);
    const VecType result_limit_abs = one;

    /* small values */
    const VecType f1((float)-5.70498872745e-3);
    const VecType f2((float) 2.06390887954e-2);
    const VecType f3((float)-5.37397155531e-2);
    const VecType f4((float) 1.33314422036e-1);
    const VecType f5((float)-3.33332819422e-1);

    const VecType arg_sqr = abs_arg * abs_arg;
    const VecType result_small = ((((f1 * arg_sqr
                                     + f2) * arg_sqr
                                    + f3) * arg_sqr
                                   + f4) * arg_sqr
                                  + f5) * arg_sqr * arg
        + arg;

    const VecType abs_small = mask_lt(abs_arg, limit_small);

    /* medium values */
    const VecType result_medium_abs = one - two / (vec_exp_tanh_float(abs_arg + abs_arg) + one);

    /* select from large and medium branches and set sign */
    const VecType result_lm_abs = select(result_medium_abs, result_limit_abs, abs_big);
    const VecType result_lm = result_lm_abs | sign_arg;

    const VecType result = select(result_lm, result_small, abs_small);

    return result;
}
Ejemplo n.º 6
0
always_inline VecType vec_exp_float(VecType const & arg)
{
    typedef typename VecType::int_vec int_vec;

    /* Express e**x = e**g 2**n
     *   = e**g e**( n loge(2) )
     *   = e**( g + n loge(2) )
     */

    // black magic
    VecType x = arg;
    VecType z = round(VecType(1.44269504088896341f) * x);
    int_vec n = z.truncate_to_int();
    x -= z*VecType(0.693359375f);
    x -= z*VecType(-2.12194440e-4f);

    /* Theoretical peak relative error in [-0.5, +0.5] is 3.5e-8. */
    VecType p = VecType(VecType::gen_one()) +
        x * (1.00000035762786865234375f +
        x * (0.4999996721744537353515625f +
        x * (0.16665561497211456298828125f +
        x * (4.167006909847259521484375e-2f +
        x * (8.420792408287525177001953125e-3f +
        x * 1.386119984090328216552734375e-3f)))));

    /* multiply by power of 2 */
    VecType approx = ldexp_float(p, n);

    /* handle min/max boundaries */
    const VecType maxlogf(88.72283905206835f);
//    const VecType minlogf(-103.278929903431851103f);
	const VecType minlogf = -maxlogf;
    const VecType max_float(std::numeric_limits<float>::max());
    const VecType zero = VecType::gen_zero();

    VecType too_large = mask_gt(arg, maxlogf);
    VecType too_small = mask_lt(arg, minlogf);

    VecType ret = select(approx, max_float, too_large);
    ret = select(ret, zero, too_small);

    return ret;
}